feat: Apply early skip check to all sites with glob pattern matching

This commit is contained in:
2026-01-02 17:06:41 +09:00
parent c3a83dfe85
commit 805464cb25
4 changed files with 174 additions and 99 deletions

View File

@@ -1,5 +1,5 @@
title: "애니 다운로더"
version: "0.4.8"
version: "0.4.9"
package_name: "anime_downloader"
developer: "projectdx"
description: "anime downloader"

View File

@@ -1276,8 +1276,11 @@ class LogicAniLife(AnimeModuleBase):
return "enqueue_db_exist"
def _predict_filepath(self, episode_info):
"""Predict the output filepath from episode info WITHOUT expensive site access."""
"""Predict the output filepath from episode info WITHOUT expensive site access.
Uses glob pattern to match any quality variant (720p, 1080p, etc.)."""
try:
import glob
title = episode_info.get("title", "")
if not title:
return None
@@ -1291,19 +1294,19 @@ class LogicAniLife(AnimeModuleBase):
content_title = match.group("title").strip()
season = int(match.group("season")) if match.group("season") else 1
epi_no = int(match.group("epi_no"))
quality = "1080P"
filename = "%s.S%sE%s.%s-AL.mp4" % (
# Use glob pattern for quality: *-AL.mp4 matches any quality
filename_pattern = "%s.S%sE%s.*-AL.mp4" % (
content_title,
"0%s" % season if season < 10 else season,
"0%s" % epi_no if epi_no < 10 else epi_no,
quality,
)
else:
filename = "%s.720p-AL.mp4" % title
# Fallback pattern for non-standard titles
filename_pattern = "%s.*-AL.mp4" % title
# Sanitize filename
filename = AniUtil.change_text_for_use_filename(filename)
# Sanitize pattern (but keep glob wildcards)
filename_pattern = AniUtil.change_text_for_use_filename(filename_pattern)
# Get save path
savepath = P.ModelSetting.get("anilife_download_path")
@@ -1320,12 +1323,21 @@ class LogicAniLife(AnimeModuleBase):
folder_name = AniUtil.change_text_for_use_filename(folder_name)
savepath = os.path.join(savepath, folder_name)
return os.path.join(savepath, filename)
# Use glob to find any matching file
full_pattern = os.path.join(savepath, filename_pattern)
matching_files = glob.glob(full_pattern)
if matching_files:
# Return first matching file
logger.debug(f"Found existing file: {matching_files[0]}")
return matching_files[0]
return None
except Exception as e:
logger.debug(f"_predict_filepath error: {e}")
return None
def is_exist(self, info):
for e in self.queue.entity_list:
if e.info["_id"] == info["_id"]:

View File

@@ -1507,6 +1507,7 @@ class LogicLinkkf(AnimeModuleBase):
logger.error(traceback.format_exc())
def add(self, episode_info):
"""Add episode to download queue with early skip checks."""
# 큐가 초기화되지 않았으면 초기화 (클래스 레벨 큐 확인)
if LogicLinkkf.queue is None:
logger.warning("Queue is None in add(), initializing...")
@@ -1522,50 +1523,47 @@ class LogicLinkkf(AnimeModuleBase):
# self.queue를 LogicLinkkf.queue로 바인딩 (프로세스 내부 공유 보장)
self.queue = LogicLinkkf.queue
# 큐 상태 로깅
queue_len = len(self.queue.entity_list) if self.queue else 0
logger.info(f"add() called - Queue length: {queue_len}, episode _id: {episode_info.get('_id')}")
# 1. Check if already in queue
if self.is_exist(episode_info):
logger.info(f"is_exist returned True for _id: {episode_info.get('_id')}")
return "queue_exist"
else:
# 2. Check DB for completion status FIRST (before expensive operations)
db_entity = ModelLinkkfItem.get_by_linkkf_id(episode_info["_id"])
# logger.info(f"db_entity: {db_entity}")
# logger.debug("db_entity:::> %s", db_entity)
# logger.debug("db_entity.status ::: %s", db_entity.status)
if db_entity is not None and db_entity.status == "completed":
logger.info(f"[Skip] Already completed in DB: {episode_info.get('program_title')} {episode_info.get('title')}")
return "db_completed"
# 3. Early file existence check - filepath is already in episode_info from get_series_info
filepath = episode_info.get("filepath")
if filepath and os.path.exists(filepath):
logger.info(f"[Skip] File already exists: {filepath}")
# Update DB status to completed if not already
if db_entity is not None and db_entity.status != "completed":
db_entity.status = "completed"
db_entity.filepath = filepath
db_entity.save()
return "file_exists"
# 4. Proceed with queue addition
queue_len = len(self.queue.entity_list) if self.queue else 0
logger.info(f"add() - Queue length: {queue_len}, episode _id: {episode_info.get('_id')}")
if db_entity is None:
entity = LinkkfQueueEntity(P, self, episode_info)
logger.debug("entity:::> %s", entity.as_dict())
ModelLinkkfItem.append(entity.as_dict())
# # logger.debug("entity:: type >> %s", type(entity))
#
self.queue.add_queue(entity)
# self.download_queue.add_queue(entity)
# P.logger.debug(F.config['path_data'])
# P.logger.debug(self.headers)
# filename = os.path.basename(entity.filepath)
# ffmpeg = SupportFfmpeg(entity.url, entity.filename, callback_function=self.callback_function,
# max_pf_count=0,
# save_path=entity.savepath, timeout_minute=60, headers=self.headers)
# ret = {'ret': 'success'}
# ret['json'] = ffmpeg.start()
return "enqueue_db_append"
elif db_entity.get("status") != "completed" if isinstance(db_entity, dict) else db_entity.status != "completed":
# DB에 있지만 완료되지 않은 경우도 큐에 추가
else:
# db_entity exists but status is not completed
status = db_entity.get("status") if isinstance(db_entity, dict) else db_entity.status
logger.info(f"db_entity status: {status}, adding to queue")
try:
logger.info("Creating LinkkfQueueEntity...")
entity = LinkkfQueueEntity(P, self, episode_info)
logger.info(f"LinkkfQueueEntity created, url: {entity.url}, filepath: {entity.filepath}")
logger.debug("entity:::> %s", entity.as_dict())
logger.info(f"Adding to queue, queue length before: {len(self.queue.entity_list)}")
result = self.queue.add_queue(entity)
logger.info(f"add_queue result: {result}, queue length after: {len(self.queue.entity_list)}")
except Exception as e:
@@ -1574,8 +1572,7 @@ class LogicLinkkf(AnimeModuleBase):
return "entity_creation_error"
return "enqueue_db_exist"
else:
return "db_completed"
# def is_exist(self, info):
# print(self.download_queue.entity_list)

View File

@@ -1262,52 +1262,117 @@ class LogicOhli24(AnimeModuleBase):
#########################################################
def add(self, episode_info: Dict[str, Any]) -> str:
"""Add episode to download queue with early skip checks."""
# 1. Check if already in queue
if self.is_exist(episode_info):
return "queue_exist"
else:
logger.debug(f"episode_info:: {episode_info}")
db_entity = ModelOhli24Item.get_by_ohli24_id(episode_info["_id"])
logger.debug("db_entity:::> %s", db_entity)
# logger.debug("db_entity.status ::: %s", db_entity.status)
# 2. Check DB for completion status FIRST (before expensive operations)
db_entity = ModelOhli24Item.get_by_ohli24_id(episode_info["_id"])
logger.debug(f"db_entity:::> {db_entity}")
if db_entity is not None and db_entity.status == "completed":
logger.info(f"[Skip] Already completed in DB: {episode_info.get('title')}")
return "db_completed"
# 3. Early file existence check - predict filepath before expensive extraction
predicted_filepath = self._predict_filepath(episode_info)
if predicted_filepath and os.path.exists(predicted_filepath):
logger.info(f"[Skip] File already exists: {predicted_filepath}")
# Update DB status to completed if not already
if db_entity is not None and db_entity.status != "completed":
db_entity.status = "completed"
db_entity.filepath = predicted_filepath
db_entity.save()
return "file_exists"
# 4. Proceed with queue addition
logger.debug(f"episode_info:: {episode_info}")
if db_entity is None:
entity = Ohli24QueueEntity(P, self, episode_info)
entity.proxy = LogicOhli24.get_proxy()
logger.debug("entity:::> %s", entity.as_dict())
ModelOhli24Item.append(entity.as_dict())
# # logger.debug("entity:: type >> %s", type(entity))
#
self.queue.add_queue(entity)
# P.logger.debug(F.config['path_data'])
# P.logger.debug(self.headers)
# filename = os.path.basename(entity.filepath)
# ffmpeg = SupportFfmpeg(entity.url, entity.filename, callback_function=self.callback_function,
# max_pf_count=0,
# save_path=entity.savepath, timeout_minute=60, headers=self.headers)
# ret = {'ret': 'success'}
# ret['json'] = ffmpeg.start()
return "enqueue_db_append"
elif db_entity.status != "completed":
else:
# db_entity exists but status is not completed
entity = Ohli24QueueEntity(P, self, episode_info)
entity.proxy = LogicOhli24.get_proxy()
logger.debug("entity:::> %s", entity.as_dict())
# P.logger.debug(F.config['path_data'])
# P.logger.debug(self.headers)
# filename = os.path.basename(entity.filepath)
# ffmpeg = SupportFfmpeg(entity.url, entity.filename, callback_function=self.callback_function,
# max_pf_count=0, save_path=entity.savepath, timeout_minute=60,
# headers=self.headers)
# ret = {'ret': 'success'}
# ret['json'] = ffmpeg.start()
self.queue.add_queue(entity)
return "enqueue_db_exist"
def _predict_filepath(self, episode_info: Dict[str, Any]) -> Optional[str]:
"""Predict the output filepath from episode info WITHOUT expensive site access.
Uses glob pattern to match any quality variant (720p, 1080p, etc.)."""
try:
import glob
title = episode_info.get("title", "")
if not title:
return None
# Parse title pattern: "제목 N기 M화" or "제목 M화"
match = re.compile(
r"(?P<title>.*?)\s*((?P<season>\d+)기)?\s*((?P<epi_no>\d+)화)"
).search(title)
if match:
content_title = match.group("title").strip()
season = int(match.group("season")) if match.group("season") else 1
epi_no = int(match.group("epi_no"))
# Use glob pattern for quality: *-OHNI24.mp4 matches any quality
filename_pattern = "%s.S%sE%s.*-OHNI24.mp4" % (
content_title,
"0%s" % season if season < 10 else season,
"0%s" % epi_no if epi_no < 10 else epi_no,
)
else:
return "db_completed"
# Fallback pattern for non-standard titles
filename_pattern = "%s.*-OHNI24.mp4" % title
# Sanitize pattern (but keep glob wildcards)
filename_pattern = Util.change_text_for_use_filename(filename_pattern)
# Get save path
savepath = P.ModelSetting.get("ohli24_download_path")
if not savepath:
return None
# Check auto folder option
if P.ModelSetting.get_bool("ohli24_auto_make_folder"):
day = episode_info.get("day", "")
content_title_clean = match.group("title").strip() if match else title
if "완결" in day:
folder_name = "%s %s" % (
P.ModelSetting.get("ohli24_finished_insert"),
content_title_clean,
)
else:
folder_name = content_title_clean
folder_name = Util.change_text_for_use_filename(folder_name.strip())
savepath = os.path.join(savepath, folder_name)
if P.ModelSetting.get_bool("ohli24_auto_make_season_folder"):
season_val = int(match.group("season")) if match and match.group("season") else 1
savepath = os.path.join(savepath, "Season %s" % season_val)
# Use glob to find any matching file
full_pattern = os.path.join(savepath, filename_pattern)
matching_files = glob.glob(full_pattern)
if matching_files:
# Return first matching file
logger.debug(f"Found existing file: {matching_files[0]}")
return matching_files[0]
return None
except Exception as e:
logger.debug(f"_predict_filepath error: {e}")
return None
def is_exist(self, info: Dict[str, Any]) -> bool:
# print(self.queue)
@@ -1317,6 +1382,7 @@ class LogicOhli24(AnimeModuleBase):
return True
return False
def callback_function(self, **args: Any) -> None:
logger.debug(f"callback_function invoked with args: {args}")
if 'status' in args: