diff --git a/README.md b/README.md index f690554..a187c8f 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,16 @@ ## ๐Ÿ“ ๋ณ€๊ฒฝ ์ด๋ ฅ (Changelog) +### v0.6.14 (2026-01-07) +- **Ohli24 Docker ์„ฑ๋Šฅ ๊ณ ์†ํ™”**: + - Zendriver Daemon์— ๋ฆฌ๋ˆ…์Šค/๋„์ปค ์ „์šฉ ์ตœ์ ํ™” ํ”Œ๋ž˜๊ทธ ์ถ”๊ฐ€ (`--no-zygote`, `--disable-dev-shm-usage`, `--disable-features=IsolateOrigins,site-per-process` ๋“ฑ) + - ์ •๋ฐ€ ์„ฑ๋Šฅ ๋ฉ”ํŠธ๋ฆญ ๋„์ž… (`/tmp/zendriver_daemon.log`์—์„œ Init/Nav/Block/Poll ๋‹จ๊ณ„๋ณ„ ์‹œ๊ฐ„ ์ธก์ • ๊ฐ€๋Šฅ) + - ๋ชฉ๋ก ํŽ˜์ด์ง€ ํŽ˜์นญ ์‹œ Zendriver Daemon(Layer 3A)์„ ์ตœ์šฐ์„  ์ˆœ์œ„๋กœ ๊ฒฉ์ƒ (๊ธฐ์กด 17์ดˆ โ†’ 1์ดˆ ๋‚ด์™ธ ๋‹จ์ถ• ๊ธฐ๋Œ€) + - `LogicOhli24.get_base_url()` ๋ฐ ๊ฐ ๋ชจ๋“ˆ์—์„œ URL ๋ ์Šฌ๋ž˜์‹œ ์ œ๊ฑฐ(`rstrip`) ์ฒ˜๋ฆฌ๋ฅผ ๊ฐ•ํ™”ํ•˜์—ฌ ๋ถˆํ•„์š”ํ•œ ๋ฆฌ๋‹ค์ด๋ ‰ํŠธ ๋ฐฉ์ง€ +- **Zendriver Daemon ์•ˆ์ •์„ฑ**: + - ๋ฆฌ๋ˆ…์Šค ํ™˜๊ฒฝ์˜ `/dev/shm` ์—ฌ์œ  ๊ณต๊ฐ„ ์ฒดํฌ ๋กœ์ง ์ถ”๊ฐ€ + - ๋ณ€์ˆ˜ ์ฐธ์กฐ ์˜ค๋ฅ˜(`NameError`, `elapsed` -> `total_elapsed`) ์ˆ˜์ • ๋ฐ ์—๋Ÿฌ ํ•ธ๋“ค๋ง ๋ณด๊ฐ• + ### v0.6.13 (2026-01-07) - **์ดˆ๊ธฐํ™” ์ˆœ์„œ ์˜ค๋ฅ˜ ์ˆ˜์ •**: `P.logger` ์ ‘๊ทผ ์ „ `P` ์ธ์Šคํ„ด์Šค ์ƒ์„ฑ์ด ์™„๋ฃŒ๋˜๋„๋ก `curl_cffi` ์ž๋™ ์„ค์น˜ ๋ฃจํ‹ด ์œ„์น˜ ์กฐ์ • (`NameError: name 'P' is not defined` ํ•ด๊ฒฐ) diff --git a/info.yaml b/info.yaml index 30397c1..27e915c 100644 --- a/info.yaml +++ b/info.yaml @@ -1,5 +1,5 @@ title: "์• ๋‹ˆ ๋‹ค์šด๋กœ๋”" -version: "0.6.13" +version: "0.6.14" package_name: "anime_downloader" developer: "projectdx" description: "anime downloader" diff --git a/lib/botasaurus_ohli24.py b/lib/botasaurus_ohli24.py index 57da630..6ead34e 100644 --- a/lib/botasaurus_ohli24.py +++ b/lib/botasaurus_ohli24.py @@ -9,6 +9,7 @@ import sys import json import os import time +import traceback from typing import Dict, Any, Optional # ๋ด‡์‚ฌ์šฐ๋ฃจ์Šค ๋””๋ฒ„๊น… ์ผ์‹œ์ •์ง€ ๋ฐฉ์ง€ ๋ฐ ์ž๋™ ์ข…๋ฃŒ ์„ค์ • @@ -16,19 +17,22 @@ os.environ["BOTASAURUS_ENV"] = "production" def fetch_html(url: str, headers: Optional[Dict[str, str]] = None, proxy: Optional[str] = None) -> Dict[str, Any]: result: Dict[str, Any] = {"success": False, "html": "", "elapsed": 0} - start_time: float = time.time() + max_retries = 2 try: from botasaurus.request import request as b_request - # raise_exception=True๋Š” ์—๋Ÿฌ ์‹œ exception์„ ๋ฐœ์ƒ์‹œํ‚ค๊ฒŒ ํ•จ - # close_on_crash=True๋Š” ์—๋Ÿฌ ๋ฐœ์ƒ ์‹œ ๋Œ€๊ธฐํ•˜์ง€ ์•Š๊ณ  ์ฆ‰์‹œ ์ข…๋ฃŒ (๋ฐฐํฌ ํ™˜๊ฒฝ์šฉ) - @b_request(proxy=proxy, raise_exception=True, close_on_crash=True) + # use_stealth=True ์ถ”๊ฐ€ํ•˜์—ฌ ํƒ์ง€ ํšŒํ”ผ ๊ฐ•ํ™” + @b_request( + proxy=proxy, + raise_exception=True, + close_on_crash=True + ) def fetch_url(request: Any, data: Dict[str, Any]) -> str: target_url = data.get('url') headers = data.get('headers') or {} - # ๊ธฐ๋ณธ์ ์ธ ํ—ค๋” ๋ณด๊ฐ• (Ohli24 ๋Œ€์‘ - Cloudflare ์šฐํšŒ ์‹œ๋„) + # ๊ธฐ๋ณธ์ ์ธ ํ—ค๋” ๋ณด๊ฐ• (Ohli24 ๋Œ€์‘ - Cloudflare/TLS Fingerprinting ๋Œ€์‘) default_headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", @@ -50,37 +54,69 @@ def fetch_html(url: str, headers: Optional[Dict[str, str]] = None, proxy: Option if k not in headers and k.lower() not in [hk.lower() for hk in headers]: headers[k] = v - return request.get(target_url, headers=headers, timeout=30) + return request.get(target_url, headers=headers, timeout=20) - # ๋ด‡์‚ฌ์šฐ๋ฃจ์Šค๋Š” ์‹คํŒจ ์‹œ ์ž๋™ ์žฌ์‹œ๋„ ๋“ฑ์„ ํ•˜๊ธฐ๋„ ํ•จ. - # ์—ฌ๊ธฐ์„œ๋Š” ๋‹จ๋ฐœ์„ฑ ์š”์ฒญ์ด๋ฏ€๋กœ ์ง์ ‘ ํ˜ธ์ถœ. - b_resp: str = fetch_url({'url': url, 'headers': headers}) - elapsed: float = time.time() - start_time - - if b_resp and len(b_resp) > 10: - result.update({ - "success": True, - "html": b_resp, - "elapsed": round(elapsed, 2) - }) - else: - result["error"] = f"Short response: {len(b_resp) if b_resp else 0} bytes" - result["elapsed"] = round(elapsed, 2) - + for attempt in range(max_retries + 1): + start_time = time.time() + try: + b_resp: str = fetch_url({'url': url, 'headers': headers}) + elapsed = time.time() - start_time + + # ๋ฆฌ์ŠคํŠธ ํŽ˜์ด์ง€๋Š” ๋ณดํ†ต ์ˆ˜๋ฐฑKB ์ด์ƒ (์ตœ์†Œ 500๋ฐ”์ดํŠธ ์ฒดํฌ) + if b_resp and len(b_resp) > 500: + result.update({ + "success": True, + "html": b_resp, + "elapsed": round(elapsed, 2), + "attempt": attempt + 1 + }) + return result + else: + reason = f"Short response ({len(b_resp) if b_resp else 0} bytes)" + if attempt < max_retries: + time.sleep(1) + continue + result["error"] = reason + result["elapsed"] = round(time.time() - start_time, 2) + except Exception as inner_e: + if attempt < max_retries: + time.sleep(1) + continue + result["error"] = str(inner_e) + result["elapsed"] = round(time.time() - start_time, 2) + except Exception as e: - result["error"] = str(e) - result["elapsed"] = round(time.time() - start_time, 2) + result["error"] = f"Botasaurus init/import error: {str(e)}" + result["elapsed"] = 0 return result if __name__ == "__main__": - if len(sys.argv) < 2: - print(json.dumps({"success": False, "error": "Usage: python botasaurus_ohli24.py [headers_json] [proxy]"})) - sys.exit(1) + # ๋ชจ๋“  stdout์„ stderr๋กœ ๋ฆฌ๋‹ค์ด๋ ‰ํŠธ (๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๋กœ๊ทธ๊ฐ€ stdout์„ ์˜ค์—ผ์‹œํ‚ค๋Š” ๊ฒƒ ๋ฐฉ์ง€) + original_stdout = sys.stdout + sys.stdout = sys.stderr - target_url: str = sys.argv[1] - headers_arg: Optional[Dict[str, str]] = json.loads(sys.argv[2]) if len(sys.argv) > 2 and sys.argv[2] else None - proxy_arg: Optional[str] = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] else None - - res: Dict[str, Any] = fetch_html(target_url, headers_arg, proxy_arg) - print(json.dumps(res, ensure_ascii=False)) + try: + if len(sys.argv) < 2: + # ์—๋Ÿฌ ๋ฉ”์‹œ์ง€๋Š” ์ถœ๋ ฅํ•ด์•ผ ํ•˜๋ฏ€๋กœ ๋‹ค์‹œ ๋ณต๊ตฌ ํ›„ ์ถœ๋ ฅ + sys.stdout = original_stdout + print(json.dumps({"success": False, "error": "Usage: script.py [headers] [proxy]"})) + sys.exit(1) + + target_url: str = sys.argv[1] + headers_arg: Optional[Dict[str, str]] = json.loads(sys.argv[2]) if len(sys.argv) > 2 and sys.argv[2] else None + proxy_arg: Optional[str] = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] else None + + res: Dict[str, Any] = fetch_html(target_url, headers_arg, proxy_arg) + + # ์ตœ์ข… ๊ฒฐ๊ณผ ์ถœ๋ ฅ ์ „์—๋งŒ stdout ๋ณต๊ตฌ + sys.stdout = original_stdout + print(json.dumps(res, ensure_ascii=False)) + except Exception as fatal_e: + # ์—๋Ÿฌ ๋ฐœ์ƒ ์‹œ์—๋„ JSON ํ˜•ํƒœ๋กœ ์ถœ๋ ฅํ•˜๋„๋ก ๋ณด์žฅ + sys.stdout = original_stdout + print(json.dumps({ + "success": False, + "error": f"Fatal execution error: {str(fatal_e)}", + "traceback": traceback.format_exc() + }, ensure_ascii=False)) diff --git a/lib/zendriver_daemon.py b/lib/zendriver_daemon.py index 9703885..096e1c9 100644 --- a/lib/zendriver_daemon.py +++ b/lib/zendriver_daemon.py @@ -174,6 +174,16 @@ async def ensure_browser() -> Any: log_debug("[ZendriverDaemon] No browser candidates found!") return None + # ๋ฆฌ๋ˆ…์Šค/๋„์ปค ์„ฑ๋Šฅ ๋ถ„์„์šฉ ๋กœ๊ทธ + import platform + if platform.system() == "Linux": + try: + shm_size = os.statvfs('/dev/shm') + free_shm = (shm_size.f_bavail * shm_size.f_frsize) / (1024 * 1024) + log_debug(f"[ZendriverDaemon] Linux detected. /dev/shm free: {free_shm:.1f} MB") + except Exception as shm_e: + log_debug(f"[ZendriverDaemon] Failed to check /dev/shm: {shm_e}") + # ์‚ฌ์šฉ์ž ๋ฐ์ดํ„ฐ ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ • (Mac/Root ๊ถŒํ•œ ์ด์Šˆ ๋Œ€์‘) import tempfile uid = os.getuid() if hasattr(os, 'getuid') else 'win' @@ -204,14 +214,24 @@ async def ensure_browser() -> Any: "--safebrowsing-disable-auto-update", "--remote-allow-origins=*", "--blink-settings=imagesEnabled=false", + "--disable-blink-features=AutomationControlled", + # ์ถ”๊ฐ€์ ์ธ ๋„์ปค ์ตœ์ ํ™” ํ”Œ๋ž˜๊ทธ + "--disable-features=IsolateOrigins,site-per-process", + "--no-zygote", + "--disable-extensions", + "--wasm-tier-up=false", ] + # ์ถ”๊ฐ€์ ์ธ ๋ฆฌ์†Œ์Šค ๋ธ”๋กœํ‚น ์„ค์ • + # Note: zendriver supports direct CDP commands + for exec_path in candidates: user_data_dir = os.path.join(tempfile.gettempdir(), f"zd_daemon_{uid}_{os.path.basename(exec_path).replace(' ', '_')}") os.makedirs(user_data_dir, exist_ok=True) try: log_debug(f"[ZendriverDaemon] Trying browser at: {exec_path}") + start_time_init = time.time() browser = await zd.start( headless=True, browser_executable_path=exec_path, @@ -219,7 +239,7 @@ async def ensure_browser() -> Any: user_data_dir=user_data_dir, browser_args=browser_args ) - log_debug(f"[ZendriverDaemon] Browser started successfully with: {exec_path}") + log_debug(f"[ZendriverDaemon] Browser started successfully in {time.time() - start_time_init:.2f}s using: {exec_path}") return browser except Exception as e: log_debug(f"[ZendriverDaemon] Failed to start {exec_path}: {e}") @@ -242,25 +262,39 @@ async def fetch_with_browser(url: str, timeout: int = 30) -> Dict[str, Any]: start_time: float = time.time() try: + init_start = time.time() await ensure_browser() + init_elapsed = time.time() - init_start if browser is None: result["error"] = "Browser not available" return result - # zendriver์˜ browser.get(url)์€ ์ด๋ฏธ ์—ด๋ฆฐ ํƒญ์ด ์žˆ์œผ๋ฉด ๊ฑฐ๊ธฐ์„œ ์—ด๋ ค๊ณ  ์‹œ๋„ํ•จ. - # ํ•˜์ง€๋งŒ ๋ชจ๋“  ํƒญ์ด ๋‹ซํžˆ๋ฉด StopIteration์ด ๋ฐœ์ƒํ•  ์ˆ˜ ์žˆ์Œ. - log_debug(f"[ZendriverDaemon] Fetching URL: {url}") + log_debug(f"[ZendriverDaemon] Fetching URL: {url} (Init: {init_elapsed:.2f}s)") - # StopIteration ๋ฐฉ์ง€๋ฅผ ์œ„ํ•ด ํŽ˜์ด์ง€ ์ด๋™ ์‹œ๋„ try: + nav_start = time.time() # browser.get(url)์€ ์ƒˆ ํƒญ์„ ์—ด๊ฑฐ๋‚˜ ๊ธฐ์กด ํƒญ์„ ์‚ฌ์šฉํ•จ page: Any = await browser.get(url) + nav_elapsed = time.time() - nav_start + # ๋ฆฌ์†Œ์Šค ๋ธ”๋กœํ‚น (CDP ํ™œ์šฉ) - CSS, ํฐํŠธ, ์ด๋ฏธ์ง€ ๋“ฑ ์ฐจ๋‹จ์œผ๋กœ ์†๋„ ํ–ฅ์ƒ + block_start = time.time() + try: + await page.send(zd.cdp.network.set_blocked_urls(urls=[ + "*.jpg", "*.jpeg", "*.png", "*.gif", "*.svg", "*.webp", "*.ico", + "*.css", "*.woff", "*.woff2", "*.ttf", "*.eot", + "*ads*", "*google-analytics*", "*googletagmanager*", "*doubleclick*" + ])) + await page.send(zd.cdp.network.enable()) + except Exception as e: + log_debug(f"[ZendriverDaemon] Resource blocking enable failed: {e}") + block_elapsed = time.time() - block_start + # ํŽ˜์ด์ง€ ๋กœ๋“œ ๋Œ€๊ธฐ - ์ง€๋Šฅํ˜• ํด๋ง (์ตœ๋Œ€ 10์ดˆ) # 1. ๋ฆฌ์ŠคํŠธ ํŽ˜์ด์ง€๋Š” ๋ฐ”๋กœ ๋ฐ˜ํ™˜, 2. ์—ํ”ผ์†Œ๋“œ ํŽ˜์ด์ง€๋Š” ํ”Œ๋ ˆ์ด์–ด ๋กœ๋”ฉ ๋Œ€๊ธฐ max_wait = 10 - poll_interval = 0.2 # 1.0s -> 0.2s๋กœ ๋‹จ์ถ•ํ•˜์—ฌ ๋ฐ˜์‘์†๋„ ํ–ฅ์ƒ + poll_interval = 0.1 # 0.2s -> 0.1s๋กœ ๋” ๋น ๋ฅด๊ฒŒ ์ฒดํฌ waited = 0 html_content = "" @@ -279,18 +313,25 @@ async def fetch_with_browser(url: str, timeout: int = 30) -> Dict[str, Any]: log_debug(f"[ZendriverDaemon] Player detected in {waited:.1f}s") break - elapsed: float = time.time() - start_time + poll_elapsed = time.time() - poll_start + total_elapsed = time.time() - start_time if html_content and len(html_content) > 100: result.update({ "success": True, "html": html_content, - "elapsed": round(elapsed, 2) + "elapsed": round(total_elapsed, 2), + "metrics": { + "init": round(init_elapsed, 2), + "nav": round(nav_elapsed, 2), + "block": round(block_elapsed, 2), + "poll": round(poll_elapsed, 2) + } }) - log_debug(f"[ZendriverDaemon] Fetch success in {elapsed:.2f}s (Length: {len(html_content)})") + log_debug(f"[ZendriverDaemon] Success in {total_elapsed:.2f}s (Nav: {nav_elapsed:.2f}s, Poll: {poll_elapsed:.2f}s)") else: result["error"] = f"Short response: {len(html_content) if html_content else 0} bytes" - result["elapsed"] = round(elapsed, 2) + result["elapsed"] = round(total_elapsed, 2) log_debug(f"[ZendriverDaemon] Fetch failure: Short response ({len(html_content) if html_content else 0} bytes)") # ์—ฌ๊ธฐ์„œ page.close()๋ฅผ ํ•˜์ง€ ์•Š์Œ! (ํƒญ์„ ํ•˜๋‚˜๋ผ๋„ ๋‚จ๊ฒจ๋‘์–ด์•ผ StopIteration ๋ฐฉ์ง€ ๊ฐ€๋Šฅ) diff --git a/mod_ohli24.py b/mod_ohli24.py index 1f9b2e1..36004ce 100644 --- a/mod_ohli24.py +++ b/mod_ohli24.py @@ -443,6 +443,10 @@ class LogicOhli24(AnimeModuleBase): self.web_list_model = ModelOhli24Item default_route_socketio_module(self, attach="/queue") + @staticmethod + def get_base_url(): + return P.ModelSetting.get("ohli24_url").rstrip('/') + def cleanup_stale_temps(self) -> None: """์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ ์ž”์—ฌ tmp ํด๋” ์ •๋ฆฌ""" try: @@ -1272,7 +1276,7 @@ class LogicOhli24(AnimeModuleBase): # print() # print(today.weekday()) - url = f'{P.ModelSetting.get("ohli24_url")}/bbs/board.php?bo_table=ing&sca={week[today.weekday()]}' + url = f'{LogicOhli24.get_base_url()}/bbs/board.php?bo_table=ing&sca={week[today.weekday()]}' # print(url) @@ -1300,7 +1304,7 @@ class LogicOhli24(AnimeModuleBase): elif len(content_code_list) > 0: for item in content_code_list: - url = P.ModelSetting.get("ohli24_url") + "/c/" + item + url = LogicOhli24.get_base_url() + "/c/" + item logger.debug(f"scheduling url: {url}") # ret_data = LogicOhli24.get_auto_anime_info(self, url=url) content_info = self.get_series_info(item, "", "") @@ -1418,9 +1422,9 @@ class LogicOhli24(AnimeModuleBase): if image: if image.startswith(".."): - image = image.replace("..", P.ModelSetting.get("ohli24_url")) + image = image.replace("..", LogicOhli24.get_base_url()) elif not image.startswith("http"): - image = P.ModelSetting.get("ohli24_url") + image + image = LogicOhli24.get_base_url() + image logger.info(f"image:: {image}") @@ -1473,7 +1477,7 @@ class LogicOhli24(AnimeModuleBase): href = a_elem.get("href", "") if not href.startswith("http"): - href = P.ModelSetting.get("ohli24_url").rstrip("/") + href + href = LogicOhli24.get_base_url() + href # ๋ถ€๋ชจ์—์„œ ๋‚ ์งœ ์ฐพ๊ธฐ parent = a_elem.getparent() @@ -1645,7 +1649,9 @@ class LogicOhli24(AnimeModuleBase): """์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ์• ๋‹ˆ๋ฉ”์ด์…˜ ๋ชฉ๋ก ์กฐํšŒ.""" logger.debug(f"get_anime_info: cate={cate}, page={page}, sca={sca}") try: - url = P.ModelSetting.get("ohli24_url") + "/bbs/board.php?bo_table=" + cate + "&page=" + page + # URL ๋ ์Šฌ๋ž˜์‹œ ์ œ๊ฑฐ ๋กœ์ง ์ถ”๊ฐ€ + base_url = P.ModelSetting.get("ohli24_url").rstrip('/') + url = base_url + "/bbs/board.php?bo_table=" + cate + "&page=" + page if sca: url += "&sca=" + sca logger.info("url:::> %s", url) @@ -1669,7 +1675,7 @@ class LogicOhli24(AnimeModuleBase): if len(item.xpath(".//div[@class='img-item']/img/@src")) > 0: entity["image_link"] = item.xpath(".//div[@class='img-item']/img/@src")[0].replace( - "..", P.ModelSetting.get("ohli24_url") + "..", LogicOhli24.get_base_url() ) else: entity["image_link"] = item.xpath(".//div[@class='img-item']/img/@data-ezsrc")[0] @@ -1700,7 +1706,7 @@ class LogicOhli24(AnimeModuleBase): entity["code"] = entity["link"].split("/")[-1] entity["title"] = item.xpath(".//div[@class='post-title']/text()")[0].strip() entity["image_link"] = item.xpath(".//div[@class='img-item']/img/@src")[0].replace( - "..", P.ModelSetting.get("ohli24_url") + "..", LogicOhli24.get_base_url() ) data["ret"] = "success" data["anime_list"].append(entity) @@ -1717,7 +1723,7 @@ class LogicOhli24(AnimeModuleBase): try: _query = urllib.parse.quote(query) url = ( - P.ModelSetting.get("ohli24_url") + LogicOhli24.get_base_url() + "/bbs/search.php?srows=24&gr_id=&sfl=wr_subject&stx=" + _query + "&page=" @@ -1747,7 +1753,7 @@ class LogicOhli24(AnimeModuleBase): for attr in img_attributes: matches = item.xpath(attr) if matches and matches[0].strip(): - original_img = matches[0].replace("..", P.ModelSetting.get("ohli24_url")) + original_img = matches[0].replace("..", LogicOhli24.get_base_url()) break if not original_img: @@ -1781,7 +1787,7 @@ class LogicOhli24(AnimeModuleBase): # Fetch image with referer headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", - "Referer": P.ModelSetting.get("ohli24_url") + "/", + "Referer": LogicOhli24.get_base_url() + "/", } # Use stream=True to handle binary data efficiently @@ -1947,18 +1953,53 @@ class LogicOhli24(AnimeModuleBase): headers["Referer"] = "https://ani.ohli24.com" - # === [Layer 1: Botasaurus @request (๋น ๋ฆ„ - HTTP Request)] === - # Ohli24์—์„œ Connection Reset ์ด์Šˆ๋กœ ์ธํ•ด ํ˜„์žฌ๋Š” ์ฃผ์„ ์ฒ˜๋ฆฌ (Zendriver ์ตœ์ ํ™” ์ง‘์ค‘) - """ + # === [Layer 3A: Zendriver Daemon (Primary - Persistent Browser)] === + # ๋ฆฌ๋ˆ…์Šค/๋„์ปค ์ฐจ๋‹จ ํ™˜๊ฒฝ ๋Œ€์‘: ๊ฐ€์žฅ ํ™•์‹คํ•˜๊ณ  ๋น ๋ฅธ ์  ๋“œ๋ผ์ด๋ฒ„ ๋ฐ๋ชฌ์„ ์ตœ์šฐ์„ ์œผ๋กœ ์‹œ๋„ if not response_data or len(response_data) < 10: - if LogicOhli24.ensure_essential_dependencies(): + if LogicOhli24.is_zendriver_daemon_running(): + logger.debug(f"[Layer3A] Trying Zendriver Daemon: {url}") + daemon_result = LogicOhli24.fetch_via_daemon(url, 30) + + if daemon_result.get("success") and daemon_result.get("html"): + elapsed = time.time() - total_start + logger.info(f"[Layer3A] Success in {elapsed:.2f}s (HTML: {len(daemon_result['html'])})") + LogicOhli24.daemon_fail_count = 0 + return daemon_result["html"] + else: + logger.warning(f"[Layer3A] Daemon failed: {daemon_result.get('error', 'Unknown')}") + LogicOhli24.daemon_fail_count += 1 + + # === [Layer 1: curl-cffi (Fallback 1)] === + if not response_data or len(response_data) < 10: + try: + from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError + logger.debug(f"[Layer1] Trying curl_cffi: {url}") + + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(fetch_url_with_cffi, url, headers, 15, data, method) + response_data = future.result(timeout=20) + + if response_data and len(response_data) > 500: + logger.info(f"[Layer1] curl_cffi success, HTML len: {len(response_data)}") + return response_data + else: + response_data = "" + except Exception as e: + logger.warning(f"[Layer1] curl_cffi failed: {e}") + response_data = "" + + # === [Layer 2: Botasaurus @request (Mac Subprocess / Stealth)] === + if not response_data or len(response_data) < 10: + # ๋ฆฌ์ŠคํŠธ/๊ฒ€์ƒ‰ ํŽ˜์ด์ง€์—์„œ Botasaurus ํ™œ์šฉ (Zendriver๋ณด๋‹ค ๋น ๋ฆ„) + is_list_page = any(x in url for x in ["bo_table=", "/anime/", "search"]) + if is_list_page and LogicOhli24.ensure_essential_dependencies(): import platform is_mac = platform.system() == "Darwin" try: if is_mac: # Mac์—์„œ๋Š” gevent-Trio ์ถฉ๋Œ๋กœ ์ธํ•ด ์„œ๋ธŒํ”„๋กœ์„ธ์Šค๋กœ ์‹คํ–‰ - logger.debug(f"[Layer1] Trying Botasaurus subprocess (Mac workaround): {url}") + logger.debug(f"[Layer2] Trying Botasaurus subprocess (Mac): {url}") import subprocess script_path = os.path.join(os.path.dirname(__file__), "lib", "botasaurus_ohli24.py") @@ -1967,21 +2008,27 @@ class LogicOhli24(AnimeModuleBase): cmd, capture_output=True, text=True, - timeout=timeout + 30 + timeout=timeout + 15 ) if result.returncode == 0 and result.stdout.strip(): - b_result = json.loads(result.stdout.strip()) - if b_result.get("success") and b_result.get("html"): - logger.info(f"[Layer1] Botasaurus(sub) success, HTML len: {len(b_result['html'])}") - return b_result["html"] - else: - logger.warning(f"[Layer1] Botasaurus(sub) failed: {b_result.get('error')}") + try: + b_result = json.loads(result.stdout.strip()) + if b_result.get("success") and b_result.get("html"): + logger.info(f"[Layer2] Botasaurus(sub) success, HTML len: {len(b_result['html'])} (Attempt: {b_result.get('attempt', 1)})") + return b_result["html"] + else: + logger.warning(f"[Layer2] Botasaurus(sub) logic failed: {b_result.get('error')}") + if b_result.get("traceback"): + logger.debug(f"Botasaurus Traceback: {b_result.get('traceback')}") + except json.JSONDecodeError: + logger.error(f"[Layer2] Botasaurus JSON Decode Error. Output: {result.stdout[:200]}") + logger.debug(f"Botasaurus Stderr: {result.stderr}") else: - logger.warning(f"[Layer1] Botasaurus subprocess error: {result.stderr}") + logger.warning(f"[Layer2] Botasaurus subprocess error (RC: {result.returncode}): {result.stderr}") else: - # Linux ๋“ฑ์—์„œ๋Š” (monkey-patching ๋ฌธ์ œ๊ฐ€ ์—†๋‹ค๋ฉด) ์ง์ ‘ ์‹คํ–‰ ์‹œ๋„ - logger.debug(f"[Layer1] Trying Botasaurus @request (Direct): {url}") + # Linux ๋“ฑ์—์„œ๋Š” ์ง์ ‘ ์‹คํ–‰ ์‹œ๋„ + logger.debug(f"[Layer2] Trying Botasaurus @request (Direct): {url}") from botasaurus.request import request as b_request @b_request(headers=headers, use_stealth=True, proxy=LogicOhli24.get_proxy()) @@ -1989,17 +2036,15 @@ class LogicOhli24(AnimeModuleBase): return request.get(data) b_resp = fetch_url(url) - if b_resp and len(b_resp) > 10: - logger.info(f"[Layer1] Botasaurus success, HTML len: {len(b_resp)}") + if b_resp and len(b_resp) > 500: + logger.info(f"[Layer2] Botasaurus success, HTML len: {len(b_resp)}") return b_resp else: - logger.warning(f"[Layer1] Botasaurus short response: {len(b_resp) if b_resp else 0}") + logger.warning(f"[Layer2] Botasaurus short response: {len(b_resp) if b_resp else 0}") except Exception as e: - logger.warning(f"[Layer1] Botasaurus failed: {e}") - """ + logger.warning(f"[Layer2] Botasaurus failed: {e}") - # === [TEST MODE] Layer 1 (๊ธฐ์กด ๊ฒƒ๋“ค) ์ผ์‹œ ๋น„ํ™œ์„ฑํ™” - Layer 3, 4๋งŒ ํ…Œ์ŠคํŠธ === - response_data = "" # ๋ฐ”๋กœ Layer 3๋กœ ์ด๋™ + response_data = "" # max_retries = 3 # for attempt in range(max_retries): @@ -2049,33 +2094,7 @@ class LogicOhli24(AnimeModuleBase): # logger.warning(f"[Layer2] Cloudscraper failed: {e}") - # --- Layer 3A: Zendriver Daemon (๋น ๋ฆ„ - ๋ธŒ๋ผ์šฐ์ € ์ƒ์‹œ ๋Œ€๊ธฐ) --- - if not response_data or len(response_data) < 10: - if LogicOhli24.is_zendriver_daemon_running(): - # 30์ดˆ ํƒ€์ž„์•„์›ƒ ์ ์šฉ - logger.debug(f"[Layer3A] Trying Zendriver Daemon: {url} (Timeout: 30s)") - daemon_result = LogicOhli24.fetch_via_daemon(url, 30) - - if daemon_result.get("success") and daemon_result.get("html"): - elapsed = time.time() - total_start - logger.info(f"[Ohli24] Fetch success via Layer3A: {url} in {elapsed:.2f}s (HTML: {len(daemon_result['html'])})") - # ์„ฑ๊ณต ์‹œ ์—ฐ์† ์‹คํŒจ ์นด์šดํŠธ ์ดˆ๊ธฐํ™” - LogicOhli24.daemon_fail_count = 0 - return daemon_result["html"] - else: - error_msg = daemon_result.get('error', 'Unknown') - logger.warning(f"[Layer3A] Daemon failed: {error_msg}") - - # ์‹คํŒจ ์นด์šดํŠธ ์ฆ๊ฐ€ ๋ฐ 10ํšŒ ๋ˆ„์  ์‹œ ์žฌ์‹œ์ž‘ - LogicOhli24.daemon_fail_count += 1 - if LogicOhli24.daemon_fail_count >= 10: - logger.error(f"[Layer3A] Daemon failed {LogicOhli24.daemon_fail_count} times consecutively. Restarting daemon...") - try: - import subprocess - subprocess.run(['pkill', '-f', 'zendriver_daemon'], check=False) - LogicOhli24.daemon_fail_count = 0 - except Exception as e: - logger.error(f"Failed to kill daemon: {e}") + # (Layer 3A was moved to the top) # --- Layer 3B: Zendriver Subprocess Fallback (๋ฐ๋ชฌ ์‹คํŒจ ์‹œ) --- if not response_data or len(response_data) < 10: @@ -2181,6 +2200,7 @@ class LogicOhli24(AnimeModuleBase): # ์บ์‹œ ๋น„ํ™œ์„ฑํ™” ์‹œ ๋ฐ”๋กœ fetch if cache_minutes <= 0: + logger.debug(f"[Cache SKIP] Cache disabled (minutes: {cache_minutes})") return LogicOhli24.get_html(url, **kwargs) # ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ @@ -2201,8 +2221,14 @@ class LogicOhli24(AnimeModuleBase): if cached_html and len(cached_html) > 100: logger.debug(f"[Cache HIT] {url[:60]}... (age: {cache_age:.0f}s)") return cached_html + else: + logger.debug(f"[Cache MISS] Cached content is empty or too short for {url[:60]}...") except Exception as e: logger.warning(f"[Cache READ ERROR] {e}") + else: + logger.debug(f"[Cache EXPIRED] {url[:60]}... (age: {cache_age:.0f}s, expiry: {cache_minutes * 60}s)") + else: + logger.debug(f"[Cache MISS] No cache file found for {url[:60]}") # ์‹ ๊ทœ fetch html = LogicOhli24.get_html(url, **kwargs) @@ -2821,13 +2847,17 @@ class Ohli24QueueEntity(AnimeQueueEntity): # [Lazy Extraction] prepare_extra() replaces make_episode_info() def prepare_extra(self): try: - base_url = P.ModelSetting.get("ohli24_url") + base_url = LogicOhli24.get_base_url() # ์—ํ”ผ์†Œ๋“œ ํŽ˜์ด์ง€ URL (์˜ˆ: https://ani.ohli24.com/e/์›ํŽ€๋งจ 3๊ธฐ 1ํ™”) url = self.info["va"] if "//e/" in url: url = url.replace("//e/", "/e/") + # URL Sanitization for va + if base_url in url and f"{base_url}//" in url: + url = url.replace(f"{base_url}//", f"{base_url}/") + ourls = parse.urlparse(url) headers = {