ohli24 api update 2023.01.13(01.)

This commit is contained in:
2023-01-13 20:14:11 +09:00
parent 2af1e6d738
commit 11afb7bf38

View File

@@ -80,6 +80,10 @@ class LogicOhli24(LogicModuleBase):
}
current_headers = None
current_data = None
referer = None
origin_url = None
episode_url = None
cookies = None
session = requests.Session()
headers = {
@@ -87,7 +91,8 @@ class LogicOhli24(LogicModuleBase):
"Chrome/71.0.3578.98 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
"Referer": "",
# "Referer": "",
# "Cookie": "PHPSESSID=hhhnrora8o9omv1tljq4efv216; 2a0d2363701f23f8a75028924a3af643=NDkuMTYzLjExMS4xMDk=; e1192aefb64683cc97abb83c71057733=aW5n",
}
useragent = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, "
@@ -100,6 +105,183 @@ class LogicOhli24(LogicModuleBase):
self.queue = None
default_route_socketio(P, self)
@staticmethod
async def get_html_playwright(
url: str,
headless: bool = False,
referer: str = "",
engine: str = "chrome",
stealth: bool = False,
):
try:
from playwright.sync_api import sync_playwright
from playwright.async_api import async_playwright
from playwright_stealth import stealth_sync, stealth_async
import time
cookie = None
browser_args = [
"--window-size=1300,570",
"--window-position=000,000",
"--disable-dev-shm-usage",
"--no-sandbox",
"--disable-web-security",
"--disable-features=site-per-process",
"--disable-setuid-sandbox",
"--disable-accelerated-2d-canvas",
"--no-first-run",
"--no-zygote",
# '--single-process',
"--disable-gpu",
"--use-gl=egl",
"--disable-blink-features=AutomationControlled",
# "--disable-background-networking",
"--enable-features=NetworkService,NetworkServiceInProcess",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-breakpad",
"--disable-client-side-phishing-detection",
"--disable-component-extensions-with-background-pages",
"--disable-default-apps",
"--disable-extensions",
"--disable-features=Translate",
"--disable-hang-monitor",
"--disable-ipc-flooding-protection",
"--disable-popup-blocking",
"--disable-prompt-on-repost",
"--disable-renderer-backgrounding",
"--disable-sync",
"--force-color-profile=srgb",
"--metrics-recording-only",
# "--enable-automation",
"--password-store=basic",
"--use-mock-keychain",
"--hide-scrollbars",
"--mute-audio",
]
# scraper = cloudscraper.create_scraper(
# browser={"browser": "chrome", "platform": "windows", "desktop": True},
# debug=False,
# # sess=LogicAniLife.session,
# delay=10,
# )
#
# cookie_value, user_agent = scraper.get_cookie_string(url)
#
# logger.debug(f"cookie_value:: {cookie_value}")
start = time.time()
ua = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/69.0.3497.100 Safari/537.36"
)
# from playwright_stealth import stealth_sync
def set_cookie(req):
nonlocal cookie
if "cookie" in req.headers:
cookie = req.headers["cookie"]
async with async_playwright() as p:
try:
if engine == "chrome":
browser = await p.chromium.launch(
channel="chrome", args=browser_args, headless=headless
)
elif engine == "webkit":
browser = await p.webkit.launch(
headless=headless,
args=browser_args,
)
else:
browser = await p.firefox.launch(
headless=headless,
args=browser_args,
)
# context = browser.new_context(
# user_agent=ua,
# )
LogicOhli24.headers[
"Referer"
] = "https://anilife.live/detail/id/471"
# print(LogicAniLife.headers)
LogicOhli24.headers["Referer"] = LogicOhli24.episode_url
if referer is not None:
LogicOhli24.headers["Referer"] = referer
# logger.debug(f"LogicAniLife.headers::: {LogicOhli24.headers}")
context = await browser.new_context(
extra_http_headers=LogicOhli24.headers, ignore_https_errors=True
)
# await context.add_cookies(LogicOhli24.cookies)
# LogicAniLife.headers["Cookie"] = cookie_value
await context.set_extra_http_headers(LogicOhli24.headers)
page = await context.new_page()
# page.set_extra_http_headers(LogicAniLife.headers)
if stealth:
await stealth_async(page)
# page.on("request", set_cookie)
# stealth_sync(page)
print(LogicOhli24.headers["Referer"])
page.on("request", set_cookie)
print(f'Referer:: {LogicOhli24.headers["Referer"]}')
# await page.set_extra_http_headers(LogicAniLife.headers)
# domcontentloaded
# load
# networkidle
await page.goto(
url,
wait_until="networkidle",
referer=LogicOhli24.headers["Referer"],
)
# page.wait_for_timeout(10000)
# await asyncio.sleep(2.9)
await asyncio.sleep(6)
# await page.reload()
# time.sleep(10)
# cookies = context.cookies
# print(cookies)
print(f"page.url:: {page.url}")
LogicOhli24.origin_url = page.url
# temp_content = await page.content()
#
# print(temp_content)
print(f"run at {time.time() - start} sec")
return await page.content()
except Exception as e:
logger.error("Exception:%s", e)
logger.error(traceback.format_exc())
finally:
await browser.close()
except Exception as e:
logger.error("Exception:%s", e)
logger.error(traceback.format_exc())
finally:
# browser.close()
pass
@staticmethod
def db_init():
pass
@@ -615,6 +797,16 @@ class LogicOhli24(LogicModuleBase):
logger.info("url:::> %s", url)
data = {}
response_data = LogicOhli24.get_html(url, timeout=10)
# response_data = asyncio.run(
# LogicOhli24.get_html_playwright(
# url,
# headless=False,
# # referer=referer_url,
# engine="chrome",
# # stealth=True,
# )
# )
# print(response_data)
tree = html.fromstring(response_data)
tmp_items = tree.xpath('//div[@class="list-row"]')
data["anime_count"] = len(tmp_items)
@@ -743,15 +935,40 @@ class LogicOhli24(LogicModuleBase):
return True
@staticmethod
def get_html(url, referer=None, stream=False, timeout=5):
def get_html(
url, headers=None, referer=None, stream=False, timeout=5, stealth=False
):
data = ""
try:
print("cloudflare protection bypass ==================P")
if headers is not None:
LogicOhli24.headers = headers
logger.debug(f"headers: {LogicOhli24.headers}")
response_data = asyncio.run(
LogicOhli24.get_html_playwright(
url,
headless=False,
# referer=referer_url,
engine="chrome",
# stealth=stealth,
)
)
logger.debug(len(response_data))
return response_data
if LogicOhli24.session is None:
LogicOhli24.session = requests.session()
# logger.debug('get_html :%s', url)
headers["Referer"] = "" if referer is None else referer
logger.info(headers)
logger.debug(LogicOhli24.headers)
page_content = LogicOhli24.session.get(
url, headers=headers, timeout=timeout
)
@@ -848,7 +1065,8 @@ class Ohli24QueueEntity(FfmpegQueueEntity):
logger.debug("make_episode_info()::url==> %s", url)
logger.info(f"self.info:::> {self.info}")
text = requests.get(url, headers=headers).text
# text = requests.get(url, headers=headers).text
text = LogicOhli24.get_html(url, headers=headers)
# logger.debug(text)
soup1 = BeautifulSoup(text, "lxml")
pattern = re.compile(r"url : \"\.\.(.*)\"")
@@ -864,10 +1082,11 @@ class Ohli24QueueEntity(FfmpegQueueEntity):
iframe_src = iframe_url
# logger.debug(f"iframe_src:::> {iframe_src}")
logger.debug(f"iframe_src:::> {iframe_src}")
resp1 = requests.get(iframe_src, headers=headers, timeout=600).text
# logger.info("resp1::>> %s", resp1)
# resp1 = requests.get(iframe_src, headers=headers, timeout=600).text
resp1 = LogicOhli24.get_html(iframe_src, headers=headers, timeout=600)
logger.info("resp1::>> %s", resp1)
soup3 = BeautifulSoup(resp1, "lxml")
# packed_pattern = re.compile(r'\\{*(eval.+)*\\}', re.MULTILINE | re.DOTALL)
s_pattern = re.compile(r"(eval.+)", re.MULTILINE | re.DOTALL)
@@ -918,6 +1137,7 @@ class Ohli24QueueEntity(FfmpegQueueEntity):
"Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 "
"Whale/3.12.129.46 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
"Cookie": "PHPSESSID=hhhnrora8o9omv1tljq4efv216; 2a0d2363701f23f8a75028924a3af643=NDkuMTYzLjExMS4xMDk=; e1192aefb64683cc97abb83c71057733=aW5n",
}
payload = {