From 1c6ec0c52ebccfa9ce40530a1acffa03bb1d6d17 Mon Sep 17 00:00:00 2001 From: projectdx Date: Mon, 23 Mar 2026 21:26:54 +0900 Subject: [PATCH] fix(cache): validate cached ohli24 html before reuse - add basic html validation before reading or writing browse cache - purge invalid cached responses instead of reusing them for cache ttl - bump anime_downloader plugin version to 0.7.19 Co-Authored-By: First Fluke --- README.md | 5 +++ info.yaml | 2 +- mod_ohli24.py | 84 ++++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 82 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index fe81403..90beacb 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,11 @@ ## πŸ“ λ³€κ²½ 이λ ₯ (Changelog) +### v0.7.19 (2026-03-23) +- **Ohli24 μΊμ‹œ 검증 1μ°¨ 적용**: + - λΈŒλΌμš°μ§• μΊμ‹œ μ‚¬μš© μ „ HTML μœ νš¨μ„± 검사λ₯Ό μΆ”κ°€ν•˜μ—¬ 차단 νŽ˜μ΄μ§€λ‚˜ 비정상 응닡이 10λΆ„κ°„ μž¬μ‚¬μš©λ˜μ§€ μ•Šλ„λ‘ ν–ˆμŠ΅λ‹ˆλ‹€. + - λͺ©λ‘/검색/상세 URL μœ ν˜•λ³„λ‘œ κΈ°λ³Έ μ‹œκ·Έλ‹ˆμ²˜λ₯Ό κ²€μ‚¬ν•˜κ³ , `Just a moment`, `Cloudflare`, `captcha` λ“± 차단 λ§ˆμ»€κ°€ 있으면 μΊμ‹œλ₯Ό μ¦‰μ‹œ νκΈ°ν•©λ‹ˆλ‹€. + ### v0.7.18 (2026-03-23) - **Zendriver μ—…λ°μ΄νŠΈ**: - μžλ™ μ„€μΉ˜ λŒ€μƒ 버전을 `zendriver==0.15.3`으둜 λͺ…μ‹œν•˜μ—¬ ν™˜κ²½λ³„ μ΅œμ‹  패치 차이둜 μΈν•œ λ™μž‘ 편차λ₯Ό μ€„μ˜€μŠ΅λ‹ˆλ‹€. diff --git a/info.yaml b/info.yaml index 8e98562..aadc637 100644 --- a/info.yaml +++ b/info.yaml @@ -1,5 +1,5 @@ title: "μ• λ‹ˆ λ‹€μš΄λ‘œλ”" -version: 0.7.18 +version: 0.7.19 package_name: "anime_downloader" developer: "projectdx" description: "anime downloader" diff --git a/mod_ohli24.py b/mod_ohli24.py index dffbe5a..8e2979e 100644 --- a/mod_ohli24.py +++ b/mod_ohli24.py @@ -16,6 +16,7 @@ import re import subprocess import sys import threading +import time import traceback import urllib import unicodedata @@ -2345,6 +2346,66 @@ class LogicOhli24(AnimeModuleBase): return response_data + @staticmethod + def _get_cache_page_type(url: str) -> str: + parsed = parse.urlparse(url) + + if "/bbs/search.php" in parsed.path: + return "search" + if "/bbs/board.php" in parsed.path: + if "wr_id=" in parsed.query: + return "detail" + return "list" + if "/e/" in parsed.path or "/c/" in parsed.path: + return "detail" + return "generic" + + @classmethod + def _is_valid_cached_html(cls, url: str, html_text: str) -> Tuple[bool, str]: + if not html_text: + return False, "empty" + + html_text = html_text.strip() + if len(html_text) < 200: + return False, "too_short" + + lowered = html_text.lower() + blocked_markers = [ + "just a moment", + "access denied", + "captcha", + "attention required", + "enable javascript", + "cf-browser-verification", + "cloudflare", + "blocked", + "403 forbidden", + "error 403", + "error 404", + "error 500", + "too many requests", + ] + for marker in blocked_markers: + if marker in lowered: + return False, f"blocked:{marker}" + + if "" not in lowered: + return False, "not_html_document" + + page_type = cls._get_cache_page_type(url) + markers_by_type = { + "list": ["list-row", "post-title", "img-item", "board-list", "list-wrap"], + "search": ["list-row", "post-title", "img-item", "search.php", "board-list"], + "detail": ["item-subject", 'itemprop="headline"', 'itemprop="image"', "view-wrap", "serial-movie-wrap"], + "generic": [" str: @@ -2353,8 +2414,6 @@ class LogicOhli24(AnimeModuleBase): μΊμ‹œ μ‹œκ°„μ€ ohli24_cache_minutes 섀정에 따름 (0=μΊμ‹œ μ—†μŒ) λ‹€μš΄λ‘œλ“œ 루틴은 이 ν•¨μˆ˜λ₯Ό μ‚¬μš©ν•˜μ§€ μ•ŠμŒ (μ„Έμ…˜/헀더 ν•„μš”) """ - import hashlib - cache_minutes = int(P.ModelSetting.get("ohli24_cache_minutes") or 0) # μΊμ‹œ λΉ„ν™œμ„±ν™” μ‹œ λ°”λ‘œ fetch @@ -2377,11 +2436,17 @@ class LogicOhli24(AnimeModuleBase): try: with open(cache_file, 'r', encoding='utf-8') as f: cached_html = f.read() - if cached_html and len(cached_html) > 100: - logger.debug(f"[Cache HIT] {url[:60]}... (age: {cache_age:.0f}s)") + is_valid, reason = LogicOhli24._is_valid_cached_html(url, cached_html) + if is_valid: + logger.debug(f"[Cache HIT] {url[:60]}... (age: {cache_age:.0f}s, reason: {reason})") return cached_html - else: - logger.debug(f"[Cache MISS] Cached content is empty or too short for {url[:60]}...") + + logger.warning(f"[Cache INVALID] {url[:60]}... (reason: {reason})") + try: + os.remove(cache_file) + logger.debug(f"[Cache PURGE] Removed invalid cache for {url[:60]}...") + except OSError as purge_error: + logger.warning(f"[Cache PURGE ERROR] {purge_error}") except Exception as e: logger.warning(f"[Cache READ ERROR] {e}") else: @@ -2393,13 +2458,16 @@ class LogicOhli24(AnimeModuleBase): html = LogicOhli24.get_html(url, **kwargs) # μΊμ‹œμ— μ €μž₯ (μœ νš¨ν•œ HTML만) - if html and len(html) > 100: + is_valid, reason = LogicOhli24._is_valid_cached_html(url, html) + if is_valid: try: with open(cache_file, 'w', encoding='utf-8') as f: f.write(html) - logger.debug(f"[Cache SAVE] {url[:60]}...") + logger.debug(f"[Cache SAVE] {url[:60]}... (reason: {reason})") except Exception as e: logger.warning(f"[Cache WRITE ERROR] {e}") + elif html: + logger.warning(f"[Cache SKIP SAVE] {url[:60]}... (reason: {reason})") return html