From 11afb7bf38ff1c742b372b92d8e1069f6b713e7c Mon Sep 17 00:00:00 2001 From: projectdx Date: Fri, 13 Jan 2023 20:14:11 +0900 Subject: [PATCH] ohli24 api update 2023.01.13(01.) --- logic_ohli24.py | 232 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 226 insertions(+), 6 deletions(-) diff --git a/logic_ohli24.py b/logic_ohli24.py index edd7ad5..aa46b35 100644 --- a/logic_ohli24.py +++ b/logic_ohli24.py @@ -80,6 +80,10 @@ class LogicOhli24(LogicModuleBase): } current_headers = None current_data = None + referer = None + origin_url = None + episode_url = None + cookies = None session = requests.Session() headers = { @@ -87,7 +91,8 @@ class LogicOhli24(LogicModuleBase): "Chrome/71.0.3578.98 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7", - "Referer": "", + # "Referer": "", + # "Cookie": "PHPSESSID=hhhnrora8o9omv1tljq4efv216; 2a0d2363701f23f8a75028924a3af643=NDkuMTYzLjExMS4xMDk=; e1192aefb64683cc97abb83c71057733=aW5n", } useragent = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, " @@ -100,6 +105,183 @@ class LogicOhli24(LogicModuleBase): self.queue = None default_route_socketio(P, self) + @staticmethod + async def get_html_playwright( + url: str, + headless: bool = False, + referer: str = "", + engine: str = "chrome", + stealth: bool = False, + ): + try: + from playwright.sync_api import sync_playwright + from playwright.async_api import async_playwright + from playwright_stealth import stealth_sync, stealth_async + + import time + + cookie = None + browser_args = [ + "--window-size=1300,570", + "--window-position=000,000", + "--disable-dev-shm-usage", + "--no-sandbox", + "--disable-web-security", + "--disable-features=site-per-process", + "--disable-setuid-sandbox", + "--disable-accelerated-2d-canvas", + "--no-first-run", + "--no-zygote", + # '--single-process', + "--disable-gpu", + "--use-gl=egl", + "--disable-blink-features=AutomationControlled", + # "--disable-background-networking", + "--enable-features=NetworkService,NetworkServiceInProcess", + "--disable-background-timer-throttling", + "--disable-backgrounding-occluded-windows", + "--disable-breakpad", + "--disable-client-side-phishing-detection", + "--disable-component-extensions-with-background-pages", + "--disable-default-apps", + "--disable-extensions", + "--disable-features=Translate", + "--disable-hang-monitor", + "--disable-ipc-flooding-protection", + "--disable-popup-blocking", + "--disable-prompt-on-repost", + "--disable-renderer-backgrounding", + "--disable-sync", + "--force-color-profile=srgb", + "--metrics-recording-only", + # "--enable-automation", + "--password-store=basic", + "--use-mock-keychain", + "--hide-scrollbars", + "--mute-audio", + ] + # scraper = cloudscraper.create_scraper( + # browser={"browser": "chrome", "platform": "windows", "desktop": True}, + # debug=False, + # # sess=LogicAniLife.session, + # delay=10, + # ) + # + # cookie_value, user_agent = scraper.get_cookie_string(url) + # + # logger.debug(f"cookie_value:: {cookie_value}") + + start = time.time() + ua = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/69.0.3497.100 Safari/537.36" + ) + + # from playwright_stealth import stealth_sync + + def set_cookie(req): + nonlocal cookie + if "cookie" in req.headers: + cookie = req.headers["cookie"] + + async with async_playwright() as p: + try: + if engine == "chrome": + browser = await p.chromium.launch( + channel="chrome", args=browser_args, headless=headless + ) + elif engine == "webkit": + browser = await p.webkit.launch( + headless=headless, + args=browser_args, + ) + else: + browser = await p.firefox.launch( + headless=headless, + args=browser_args, + ) + # context = browser.new_context( + # user_agent=ua, + # ) + + LogicOhli24.headers[ + "Referer" + ] = "https://anilife.live/detail/id/471" + # print(LogicAniLife.headers) + + LogicOhli24.headers["Referer"] = LogicOhli24.episode_url + + if referer is not None: + LogicOhli24.headers["Referer"] = referer + + # logger.debug(f"LogicAniLife.headers::: {LogicOhli24.headers}") + context = await browser.new_context( + extra_http_headers=LogicOhli24.headers, ignore_https_errors=True + ) + # await context.add_cookies(LogicOhli24.cookies) + + # LogicAniLife.headers["Cookie"] = cookie_value + + await context.set_extra_http_headers(LogicOhli24.headers) + + page = await context.new_page() + + # page.set_extra_http_headers(LogicAniLife.headers) + + if stealth: + await stealth_async(page) + + # page.on("request", set_cookie) + # stealth_sync(page) + print(LogicOhli24.headers["Referer"]) + + page.on("request", set_cookie) + + print(f'Referer:: {LogicOhli24.headers["Referer"]}') + # await page.set_extra_http_headers(LogicAniLife.headers) + + # domcontentloaded + # load + # networkidle + await page.goto( + url, + wait_until="networkidle", + referer=LogicOhli24.headers["Referer"], + ) + # page.wait_for_timeout(10000) + # await asyncio.sleep(2.9) + await asyncio.sleep(6) + + # await page.reload() + + # time.sleep(10) + # cookies = context.cookies + # print(cookies) + + print(f"page.url:: {page.url}") + LogicOhli24.origin_url = page.url + + # temp_content = await page.content() + # + # print(temp_content) + + print(f"run at {time.time() - start} sec") + + return await page.content() + except Exception as e: + logger.error("Exception:%s", e) + logger.error(traceback.format_exc()) + finally: + await browser.close() + + except Exception as e: + logger.error("Exception:%s", e) + logger.error(traceback.format_exc()) + finally: + # browser.close() + pass + @staticmethod def db_init(): pass @@ -615,6 +797,16 @@ class LogicOhli24(LogicModuleBase): logger.info("url:::> %s", url) data = {} response_data = LogicOhli24.get_html(url, timeout=10) + # response_data = asyncio.run( + # LogicOhli24.get_html_playwright( + # url, + # headless=False, + # # referer=referer_url, + # engine="chrome", + # # stealth=True, + # ) + # ) + # print(response_data) tree = html.fromstring(response_data) tmp_items = tree.xpath('//div[@class="list-row"]') data["anime_count"] = len(tmp_items) @@ -743,15 +935,40 @@ class LogicOhli24(LogicModuleBase): return True @staticmethod - def get_html(url, referer=None, stream=False, timeout=5): + def get_html( + url, headers=None, referer=None, stream=False, timeout=5, stealth=False + ): data = "" try: + print("cloudflare protection bypass ==================P") + + if headers is not None: + LogicOhli24.headers = headers + + logger.debug(f"headers: {LogicOhli24.headers}") + response_data = asyncio.run( + LogicOhli24.get_html_playwright( + url, + headless=False, + # referer=referer_url, + engine="chrome", + # stealth=stealth, + ) + ) + + logger.debug(len(response_data)) + + return response_data + if LogicOhli24.session is None: LogicOhli24.session = requests.session() # logger.debug('get_html :%s', url) headers["Referer"] = "" if referer is None else referer + + logger.info(headers) + logger.debug(LogicOhli24.headers) page_content = LogicOhli24.session.get( url, headers=headers, timeout=timeout ) @@ -848,7 +1065,8 @@ class Ohli24QueueEntity(FfmpegQueueEntity): logger.debug("make_episode_info()::url==> %s", url) logger.info(f"self.info:::> {self.info}") - text = requests.get(url, headers=headers).text + # text = requests.get(url, headers=headers).text + text = LogicOhli24.get_html(url, headers=headers) # logger.debug(text) soup1 = BeautifulSoup(text, "lxml") pattern = re.compile(r"url : \"\.\.(.*)\"") @@ -864,10 +1082,11 @@ class Ohli24QueueEntity(FfmpegQueueEntity): iframe_src = iframe_url - # logger.debug(f"iframe_src:::> {iframe_src}") + logger.debug(f"iframe_src:::> {iframe_src}") - resp1 = requests.get(iframe_src, headers=headers, timeout=600).text - # logger.info("resp1::>> %s", resp1) + # resp1 = requests.get(iframe_src, headers=headers, timeout=600).text + resp1 = LogicOhli24.get_html(iframe_src, headers=headers, timeout=600) + logger.info("resp1::>> %s", resp1) soup3 = BeautifulSoup(resp1, "lxml") # packed_pattern = re.compile(r'\\{*(eval.+)*\\}', re.MULTILINE | re.DOTALL) s_pattern = re.compile(r"(eval.+)", re.MULTILINE | re.DOTALL) @@ -918,6 +1137,7 @@ class Ohli24QueueEntity(FfmpegQueueEntity): "Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 " "Whale/3.12.129.46 Safari/537.36", "X-Requested-With": "XMLHttpRequest", + "Cookie": "PHPSESSID=hhhnrora8o9omv1tljq4efv216; 2a0d2363701f23f8a75028924a3af643=NDkuMTYzLjExMS4xMDk=; e1192aefb64683cc97abb83c71057733=aW5n", } payload = {