Files
anime_downloader/logic_ohli24.py

928 lines
34 KiB
Python
Raw Normal View History

2022-02-08 23:17:30 +09:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2022/02/08 3:44 PM
# @Author : yommi
2022-04-06 23:45:15 +09:00
# @Site :
2022-02-08 23:17:30 +09:00
# @File : logic_ohli24
# @Software: PyCharm
import os, sys, traceback, re, json, threading
from datetime import datetime
import copy
2022-03-27 18:26:18 +09:00
import hashlib
2022-04-06 23:45:15 +09:00
2022-02-08 23:17:30 +09:00
# third-party
import requests
2022-02-10 00:34:14 +09:00
from lxml import html
2022-03-27 18:26:18 +09:00
from urllib import parse
import urllib
2022-04-06 23:45:15 +09:00
2022-02-08 23:17:30 +09:00
# third-party
from flask import request, render_template, jsonify
from sqlalchemy import or_, and_, func, not_, desc
2022-03-27 18:26:18 +09:00
from pip._internal import main
2022-04-06 23:45:15 +09:00
pkgs = ["beautifulsoup4", "jsbeautifier"]
2022-03-27 18:26:18 +09:00
for pkg in pkgs:
try:
import pkg
except ImportError:
2022-04-06 23:45:15 +09:00
main(["install", pkg])
2022-03-27 18:26:18 +09:00
from bs4 import BeautifulSoup
import jsbeautifier
2022-02-08 23:17:30 +09:00
# sjva 공용
from framework import db, scheduler, path_data, socketio
from framework.util import Util
from framework.common.util import headers
2022-04-06 23:45:15 +09:00
from plugin import (
LogicModuleBase,
FfmpegQueueEntity,
FfmpegQueue,
default_route_socketio,
)
2022-02-08 23:17:30 +09:00
from tool_base import d
2022-04-06 23:45:15 +09:00
2022-02-08 23:17:30 +09:00
# 패키지
from .plugin import P
logger = P.logger
#########################################################
class LogicOhli24(LogicModuleBase):
db_default = {
2022-04-06 23:45:15 +09:00
"ohli24_db_version": "1",
"ohli24_url": "https://ohli24.net",
"ohli24_download_path": os.path.join(path_data, P.package_name, "ohli24"),
"ohli24_auto_make_folder": "True",
"ohli24_auto_make_season_folder": "True",
"ohli24_finished_insert": "[완결]",
"ohli24_max_ffmpeg_process_count": "1",
"ohli24_order_desc": "False",
"ohli24_auto_start": "False",
"ohli24_interval": "* 5 * * *",
"ohli24_auto_mode_all": "False",
"ohli24_auto_code_list": "all",
"ohli24_current_code": "",
"ohli24_uncompleted_auto_enqueue": "False",
"ohli24_image_url_prefix_series": "https://www.jetcloud.cc/series/",
"ohli24_image_url_prefix_episode": "https://www.jetcloud-list.cc/thumbnail/",
2022-02-08 23:17:30 +09:00
}
current_headers = None
2022-02-10 00:34:14 +09:00
current_data = None
2022-03-27 18:26:18 +09:00
2022-02-10 00:34:14 +09:00
session = requests.Session()
headers = {
2022-04-06 23:45:15 +09:00
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/71.0.3578.98 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
"Referer": "",
}
useragent = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, "
"like Gecko) Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36"
2022-02-10 00:34:14 +09:00
}
2022-02-08 23:17:30 +09:00
def __init__(self, P):
2022-04-06 23:45:15 +09:00
super(LogicOhli24, self).__init__(P, "setting", scheduler_desc="ani365 자동 다운로드")
self.name = "ohli24"
2022-03-27 18:26:18 +09:00
self.queue = None
2022-02-08 23:17:30 +09:00
default_route_socketio(P, self)
@staticmethod
def db_init():
pass
# try:
# for key, value in P.Logic.db_default.items():
# if db.session.query(ModelSetting).filter_by(key=key).count() == 0:
# db.session.add(ModelSetting(key, value))
# db.session.commit()
# except Exception as e:
# logger.error('Exception:%s', e)
# logger.error(traceback.format_exc())
def process_menu(self, sub, req):
arg = P.ModelSetting.to_dict()
2022-04-06 23:45:15 +09:00
arg["sub"] = self.name
if sub in ["setting", "queue", "list", "category", "request"]:
if sub == "request" and req.args.get("content_code") is not None:
arg["ohli24_current_code"] = req.args.get("content_code")
if sub == "setting":
job_id = "%s_%s" % (self.P.package_name, self.name)
arg["scheduler"] = str(scheduler.is_include(job_id))
arg["is_running"] = str(scheduler.is_running(job_id))
2022-02-08 23:17:30 +09:00
return render_template(
2022-04-06 23:45:15 +09:00
"{package_name}_{module_name}_{sub}.html".format(
package_name=P.package_name, module_name=self.name, sub=sub
),
arg=arg,
)
return render_template("sample.html", title="%s - %s" % (P.package_name, sub))
2022-02-08 23:17:30 +09:00
# @staticmethod
def process_ajax(self, sub, req):
try:
2022-04-06 23:45:15 +09:00
if sub == "analysis":
2022-02-08 23:17:30 +09:00
# code = req.form['code']
2022-04-06 23:45:15 +09:00
code = request.form["code"]
wr_id = request.form.get("wr_id", None)
bo_table = request.form.get("bo_table", None)
2022-02-08 23:17:30 +09:00
data = []
2022-02-10 00:34:14 +09:00
# print(code)
# logger.info("code::: %s", code)
2022-04-06 23:45:15 +09:00
P.ModelSetting.set("ohli24_current_code", code)
data = self.get_series_info(code, wr_id, bo_table)
2022-02-08 23:17:30 +09:00
self.current_data = data
2022-04-06 23:45:15 +09:00
return jsonify({"ret": "success", "data": data, "code": code})
elif sub == "anime_list":
data = []
cate = request.form["type"]
page = request.form["page"]
data = self.get_anime_info(cate, page)
# self.current_data = data
return jsonify(
{"ret": "success", "cate": cate, "page": page, "data": data}
)
elif sub == "complete_list":
data = []
cate = request.form["type"]
logger.debug("cate", cate)
page = request.form["page"]
2022-04-06 23:45:15 +09:00
data = self.get_anime_info(cate, page)
# self.current_data = data
return jsonify(
{"ret": "success", "cate": cate, "page": page, "data": data}
)
elif sub == "search":
data = []
# cate = request.form["type"]
# page = request.form["page"]
query = request.form["query"]
data = self.get_search_result(query)
# self.current_data = data
return jsonify({"ret": "success", "query": query, "data": data})
2022-04-06 23:45:15 +09:00
elif sub == "add_queue":
2022-02-08 23:17:30 +09:00
ret = {}
2022-04-06 23:45:15 +09:00
info = json.loads(request.form["data"])
logger.info("info:: %s", info)
ret["ret"] = self.add(info)
2022-02-08 23:17:30 +09:00
return jsonify(ret)
2022-04-06 23:45:15 +09:00
elif sub == "entity_list":
2022-03-27 18:26:18 +09:00
return jsonify(self.queue.get_entity_list())
2022-04-06 23:45:15 +09:00
elif sub == "queue_command":
ret = self.queue.command(
req.form["command"], int(req.form["entity_id"])
)
2022-03-27 18:26:18 +09:00
return jsonify(ret)
2022-04-06 23:45:15 +09:00
elif sub == "add_queue_checked_list":
data = json.loads(request.form["data"])
2022-03-28 18:52:51 +09:00
2022-03-27 18:26:18 +09:00
def func():
count = 0
for tmp in data:
add_ret = self.add(tmp)
2022-04-06 23:45:15 +09:00
if add_ret.startswith("enqueue"):
self.socketio_callback("list_refresh", "")
2022-03-27 18:26:18 +09:00
count += 1
2022-04-06 23:45:15 +09:00
notify = {
"type": "success",
"msg": "%s 개의 에피소드를 큐에 추가 하였습니다." % count,
}
socketio.emit(
"notify", notify, namespace="/framework", broadcast=True
)
2022-03-28 18:52:51 +09:00
2022-03-27 18:26:18 +09:00
thread = threading.Thread(target=func, args=())
thread.daemon = True
thread.start()
2022-04-06 23:45:15 +09:00
return jsonify("")
elif sub == "web_list":
2022-03-27 18:26:18 +09:00
return jsonify(ModelOhli24Item.web_list(request))
2022-04-06 23:45:15 +09:00
elif sub == "db_remove":
return jsonify(ModelOhli24Item.delete_by_id(req.form["id"]))
2022-02-08 23:17:30 +09:00
except Exception as e:
2022-04-06 23:45:15 +09:00
P.logger.error("Exception:%s", e)
2022-02-08 23:17:30 +09:00
P.logger.error(traceback.format_exc())
2022-03-27 18:26:18 +09:00
def setting_save_after(self):
2022-04-06 23:45:15 +09:00
if self.queue.get_max_ffmpeg_count() != P.ModelSetting.get_int(
"ohli24_max_ffmpeg_process_count"
):
self.queue.set_max_ffmpeg_count(
P.ModelSetting.get_int("ohli24_max_ffmpeg_process_count")
)
2022-03-27 18:26:18 +09:00
def get_series_info(self, code, wr_id, bo_table):
2022-04-06 23:45:15 +09:00
code_type = "c"
2022-02-10 00:34:14 +09:00
try:
2022-04-06 23:45:15 +09:00
if (
self.current_data is not None
and "code" in self.current_data
and self.current_data["code"] == code
):
2022-02-10 00:34:14 +09:00
return self.current_data
2022-04-06 23:45:15 +09:00
if code.startswith("http"):
2022-03-28 14:40:38 +09:00
2022-03-28 19:14:25 +09:00
# if code.split('c/')[1] is not None:
# code = code.split('c/')[1]
# code_type = 'c'
# elif code.split('e/')[1] is not None:
# code_type = 'e'
# code = code.split('e/')[1]
2022-04-06 23:45:15 +09:00
if "/c/" in code:
code = code.split("c/")[1]
code_type = "c"
elif "/e/" in code:
code = code.split("e/")[1]
code_type = "e"
logger.info(f"code:::: {code}")
if code_type == "c":
url = P.ModelSetting.get("ohli24_url") + "/c/" + code
elif code_type == "e":
url = P.ModelSetting.get("ohli24_url") + "/e/" + code
2022-03-28 18:43:12 +09:00
else:
2022-04-06 23:45:15 +09:00
url = P.ModelSetting.get("ohli24_url") + "/e/" + code
if wr_id is not None:
# print(len(wr_id))
if len(wr_id) > 0:
url = (
P.ModelSetting.get("ohli24_url")
+ "/bbs/board.php?bo_table="
+ bo_table
+ "&wr_id="
+ wr_id
)
else:
pass
2022-04-06 23:45:15 +09:00
logger.debug("url:::> %s", url)
2022-02-10 00:34:14 +09:00
# self.current_headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)
# AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36',
# 'Referer': url }
response_data = LogicOhli24.get_html(url, timeout=10)
tree = html.fromstring(response_data)
title = tree.xpath('//div[@class="view-title"]/h1/text()')[0]
# image = tree.xpath('//div[@class="view-info"]/div[@class="image"]/div/img')[0]['src']
image = tree.xpath('//div[@class="image"]/div/img/@src')[0]
2022-04-06 23:45:15 +09:00
image = image.replace("..", P.ModelSetting.get("ohli24_url"))
2022-02-10 00:34:14 +09:00
des_items = tree.xpath('//div[@class="list"]/p')
des = {}
2022-04-06 23:45:15 +09:00
des_key = [
"_otit",
"_dir",
"_pub",
"_tag",
"_classifi",
"_country",
"_grade",
"_total_chapter",
"_show_time",
"_release_year",
2022-04-06 23:45:15 +09:00
]
2022-03-27 18:26:18 +09:00
description_dict = {
2022-04-06 23:45:15 +09:00
"원제": "_otit",
"원작": "_org",
"감독": "_dir",
"각본": "_scr",
"캐릭터 디자인": "_character_design",
"음악": "_sound",
"제작사": "_pub",
"장르": "_tag",
"분류": "_classifi",
"제작국가": "_country",
"방영일": "_date",
"등급": "_grade",
"총화수": "_total_chapter",
"상영시간": "_show_time",
"개봉년도": "_release_year",
2022-03-27 18:26:18 +09:00
}
list_body_li = tree.xpath('//ul[@class="list-body"]/li')
logger.info(list_body_li)
episodes = []
vi = None
for li in list_body_li:
2022-04-06 23:45:15 +09:00
title = li.xpath(".//a/text()")[0].strip()
2022-03-27 18:26:18 +09:00
thumbnail = image
logger.info(li.xpath('//a[@class="item-subject"]/@href'))
2022-04-06 23:45:15 +09:00
link = (
P.ModelSetting.get("ohli24_url")
+ li.xpath('//a[@class="item-subject"]/@href')[0]
)
2022-03-27 18:26:18 +09:00
date = li.xpath('.//div[@class="wr-date"]/text()')[0]
2022-04-06 23:45:15 +09:00
m = hashlib.md5(title.encode("utf-8"))
2022-03-27 18:26:18 +09:00
# _vi = hashlib.md5(title.encode('utf-8').hexdigest())
logger.info(m.hexdigest())
_vi = m.hexdigest()
2022-04-06 23:45:15 +09:00
episodes.append(
{
"title": title,
"link": link,
"thumbnail": image,
"date": date,
"day": date,
"_id": title,
"va": link,
"_vi": _vi,
"content_code": code,
}
)
logger.info("des_items length:: %s", len(des_items))
2022-02-10 00:34:14 +09:00
for idx, item in enumerate(des_items):
2022-03-27 18:26:18 +09:00
# key = des_key[idx]
2022-04-06 23:45:15 +09:00
span = item.xpath(".//span//text()")
2022-02-10 00:34:14 +09:00
logger.info(span)
2022-03-27 18:26:18 +09:00
key = description_dict[span[0]]
2022-04-13 15:18:08 +09:00
try:
des[key] = item.xpath(".//span/text()")[1]
2022-04-13 15:19:20 +09:00
except IndexError:
2022-04-13 15:18:08 +09:00
des[key] = ""
2022-02-10 00:34:14 +09:00
2022-04-06 23:45:15 +09:00
logger.info(f"des::>> {des}")
image = image.replace("..", P.ModelSetting.get("ohli24_url"))
logger.info("images:: %s", image)
logger.info("title:: %s", title)
2022-02-10 00:34:14 +09:00
2022-04-06 23:45:15 +09:00
ser_description = tree.xpath(
'//div[@class="view-stocon"]/div[@class="c"]/text()'
)
2022-02-10 00:34:14 +09:00
data = {
2022-04-06 23:45:15 +09:00
"title": title,
"image": image,
"date": "2022.01.11 00:30 (화)",
"ser_description": ser_description,
"des": des,
"episode": episodes,
2022-02-10 00:34:14 +09:00
}
2022-04-06 23:45:15 +09:00
if P.ModelSetting.get_bool("ohli24_order_desc"):
data["episode"] = list(reversed(data["episode"]))
data["list_order"] = "desc"
2022-03-27 18:26:18 +09:00
2022-02-10 00:34:14 +09:00
return data
# logger.info(response_text)
except Exception as e:
2022-04-06 23:45:15 +09:00
P.logger.error("Exception:%s", e)
2022-02-10 00:34:14 +09:00
P.logger.error(traceback.format_exc())
2022-04-06 23:45:15 +09:00
return {"ret": "exception", "log": str(e)}
def get_anime_info(self, cate, page):
try:
if cate == "ing":
url = (
P.ModelSetting.get("ohli24_url")
+ "/bbs/board.php?bo_table="
+ cate
+ "&page="
+ page
)
elif cate == "movie":
url = (
P.ModelSetting.get("ohli24_url")
+ "/bbs/board.php?bo_table="
+ cate
+ "&page="
+ page
)
else:
url = (
P.ModelSetting.get("ohli24_url")
+ "/bbs/board.php?bo_table="
+ cate
+ "&page="
+ page
)
# cate == "complete":
logger.info("url:::> %s", url)
data = {}
response_data = LogicOhli24.get_html(url, timeout=10)
tree = html.fromstring(response_data)
tmp_items = tree.xpath('//div[@class="list-row"]')
data["anime_count"] = len(tmp_items)
data["anime_list"] = []
for item in tmp_items:
entity = {}
entity["link"] = item.xpath(".//a/@href")[0]
entity["code"] = entity["link"].split("/")[-1]
entity["title"] = item.xpath(".//div[@class='post-title']/text()")[
0
].strip()
2022-04-06 23:45:15 +09:00
entity["image_link"] = item.xpath(".//div[@class='img-item']/img/@src")[
0
].replace("..", P.ModelSetting.get("ohli24_url"))
data["ret"] = "success"
data["anime_list"].append(entity)
return data
except Exception as e:
P.logger.error("Exception:%s", e)
P.logger.error(traceback.format_exc())
return {"ret": "exception", "log": str(e)}
# @staticmethod
def get_search_result(self, query):
try:
_query = urllib.parse.quote(query)
url = (
P.ModelSetting.get("ohli24_url")
+ "/bbs/search.php?srows=24&gr_id=&sfl=wr_subject&stx="
+ _query
)
logger.info("url:::> %s", url)
data = {}
response_data = LogicOhli24.get_html(url, timeout=10)
tree = html.fromstring(response_data)
tmp_items = tree.xpath('//div[@class="list-row"]')
data["anime_count"] = len(tmp_items)
data["anime_list"] = []
for item in tmp_items:
entity = {}
entity["link"] = item.xpath(".//a/@href")[0]
# entity["code"] = entity["link"].split("/")[-1]
entity["wr_id"] = entity["link"].split("=")[-1]
# logger.debug(item.xpath(".//div[@class='post-title']/text()").join())
entity["title"] = "".join(
item.xpath(".//div[@class='post-title']/text()")
).strip()
entity["image_link"] = item.xpath(".//div[@class='img-item']/img/@src")[
0
].replace("..", P.ModelSetting.get("ohli24_url"))
entity["code"] = item.xpath(".//div[@class='img-item']/img/@alt")[0]
data["ret"] = "success"
data["anime_list"].append(entity)
return data
except Exception as e:
P.logger.error("Exception:%s", e)
2022-04-06 23:45:15 +09:00
P.logger.error(traceback.format_exc())
return {"ret": "exception", "log": str(e)}
2022-02-10 00:34:14 +09:00
2022-03-27 18:26:18 +09:00
# @staticmethod
def plugin_load(self):
2022-02-08 23:17:30 +09:00
try:
2022-04-06 23:45:15 +09:00
logger.debug("%s plugin_load", P.package_name)
self.queue = FfmpegQueue(
P, P.ModelSetting.get_int("ohli24_max_ffmpeg_process_count")
)
2022-03-27 18:26:18 +09:00
self.current_data = None
self.queue.queue_start()
2022-02-08 23:17:30 +09:00
except Exception as e:
2022-04-06 23:45:15 +09:00
logger.error("Exception:%s", e)
2022-02-08 23:17:30 +09:00
logger.error(traceback.format_exc())
@staticmethod
def plugin_unload():
try:
2022-04-06 23:45:15 +09:00
logger.debug("%s plugin_unload", P.package_name)
scheduler.remove_job("%s_recent" % P.package_name)
2022-02-08 23:17:30 +09:00
except Exception as e:
2022-04-06 23:45:15 +09:00
logger.error("Exception:%s", e)
2022-02-08 23:17:30 +09:00
logger.error(traceback.format_exc())
@staticmethod
def reset_db() -> bool:
db.session.query(ModelOhli24Item).delete()
db.session.commit()
return True
2022-02-10 00:34:14 +09:00
@staticmethod
def get_html(url, referer=None, stream=False, timeout=5):
2022-04-06 23:45:15 +09:00
data = ""
2022-02-10 00:34:14 +09:00
try:
2022-03-27 18:26:18 +09:00
2022-02-10 00:34:14 +09:00
if LogicOhli24.session is None:
LogicOhli24.session = requests.session()
# logger.debug('get_html :%s', url)
2022-04-06 23:45:15 +09:00
headers["Referer"] = "" if referer is None else referer
page_content = LogicOhli24.session.get(
url, headers=headers, timeout=timeout
)
2022-02-10 00:34:14 +09:00
data = page_content.text
except Exception as e:
2022-04-06 23:45:15 +09:00
logger.error("Exception:%s", e)
2022-02-10 00:34:14 +09:00
logger.error(traceback.format_exc())
return data
2022-03-27 18:26:18 +09:00
#########################################################
def add(self, episode_info):
if self.is_exist(episode_info):
2022-04-06 23:45:15 +09:00
return "queue_exist"
2022-03-27 18:26:18 +09:00
else:
2022-04-06 23:45:15 +09:00
db_entity = ModelOhli24Item.get_by_ohli24_id(episode_info["_id"])
logger.debug("db_entity:::> %s", db_entity)
2022-03-27 18:26:18 +09:00
if db_entity is None:
entity = Ohli24QueueEntity(P, self, episode_info)
2022-04-06 23:45:15 +09:00
logger.debug("entity:::> %s", entity.as_dict())
2022-03-27 18:26:18 +09:00
ModelOhli24Item.append(entity.as_dict())
2022-04-06 23:45:15 +09:00
logger.debug("entity:: type >> %s", type(entity))
2022-03-27 18:26:18 +09:00
self.queue.add_queue(entity)
2022-04-06 23:45:15 +09:00
return "enqueue_db_append"
elif db_entity.status != "completed":
2022-03-27 18:26:18 +09:00
entity = Ohli24QueueEntity(P, self, episode_info)
self.queue.add_queue(entity)
2022-04-06 23:45:15 +09:00
return "enqueue_db_exist"
2022-03-27 18:26:18 +09:00
else:
2022-04-06 23:45:15 +09:00
return "db_completed"
2022-03-27 18:26:18 +09:00
def is_exist(self, info):
for e in self.queue.entity_list:
2022-04-06 23:45:15 +09:00
if e.info["_id"] == info["_id"]:
2022-03-27 18:26:18 +09:00
return True
return False
2022-02-08 23:17:30 +09:00
class Ohli24QueueEntity(FfmpegQueueEntity):
def __init__(self, P, module_logic, info):
super(Ohli24QueueEntity, self).__init__(P, module_logic, info)
2022-03-27 18:26:18 +09:00
self._vi = None
self.url = None
self.epi_queue = None
self.filepath = None
self.savepath = None
self.quality = None
self.filename = None
2022-02-08 23:17:30 +09:00
self.vtt = None
self.season = 1
self.content_title = None
2022-03-27 18:26:18 +09:00
self.srt_url = None
self.headers = None
# Todo::: 임시 주석 처리
self.make_episode_info()
def refresh_status(self):
2022-04-06 23:45:15 +09:00
self.module_logic.socketio_callback("status", self.as_dict())
2022-03-27 18:26:18 +09:00
def info_dict(self, tmp):
# logger.debug('self.info::> %s', self.info)
for key, value in self.info.items():
tmp[key] = value
2022-04-06 23:45:15 +09:00
tmp["vtt"] = self.vtt
tmp["season"] = self.season
tmp["content_title"] = self.content_title
tmp["ohli24_info"] = self.info
tmp["epi_queue"] = self.epi_queue
2022-03-27 18:26:18 +09:00
return tmp
def donwload_completed(self):
2022-04-06 23:45:15 +09:00
db_entity = ModelOhli24Item.get_by_ohli24_id(self.info["_id"])
2022-03-27 18:26:18 +09:00
if db_entity is not None:
2022-04-06 23:45:15 +09:00
db_entity.status = "completed"
2022-03-27 18:26:18 +09:00
db_entity.complated_time = datetime.now()
db_entity.save()
# Get episode info from OHLI24 site
2022-02-08 23:17:30 +09:00
def make_episode_info(self):
2022-03-27 18:26:18 +09:00
try:
# url = 'https://ohli24.net/e/' + self.info['va']
2022-04-06 23:45:15 +09:00
base_url = "https://ohli24.net"
iframe_url = ""
2022-03-27 18:26:18 +09:00
# https://ohli24.net/e/%EB%85%B9%EC%9D%84%20%EB%A8%B9%EB%8A%94%20%EB%B9%84%EC%8A%A4%EC%BD%94%206%ED%99%94
2022-04-06 23:45:15 +09:00
url = self.info["va"]
2022-03-27 18:26:18 +09:00
ourls = parse.urlparse(url)
headers = {
2022-04-06 23:45:15 +09:00
"referer": f"{ourls.scheme}://{ourls.netloc}",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36",
2022-03-27 18:26:18 +09:00
}
2022-04-06 23:45:15 +09:00
logger.debug("url:::> %s", url)
logger.info("self.info:::> %s", self.info)
2022-03-27 18:26:18 +09:00
text = requests.get(url, headers=headers).text
# logger.debug(text)
soup1 = BeautifulSoup(text, "lxml")
2022-04-06 23:45:15 +09:00
pattern = re.compile(r"url : \"\.\.(.*)\"")
2022-03-27 18:26:18 +09:00
script = soup1.find("script", text=pattern)
if script:
match = pattern.search(script.text)
if match:
iframe_url = match.group(1)
2022-04-06 23:45:15 +09:00
logger.info("iframe_url::> %s", iframe_url)
2022-03-27 18:26:18 +09:00
resp = requests.get(base_url + iframe_url, headers=headers, timeout=20).text
2022-04-06 23:45:15 +09:00
soup2 = BeautifulSoup(resp, "lxml")
iframe_src = soup2.find("iframe")["src"]
2022-03-27 18:26:18 +09:00
# print(resp1)
2022-04-06 23:45:15 +09:00
logger.debug("iframe_src:::> %s", iframe_src)
2022-03-27 18:26:18 +09:00
resp1 = requests.get(iframe_src, headers=headers, timeout=600).text
# logger.info('resp1::>> %s', resp1)
soup3 = BeautifulSoup(resp1, "lxml")
# packed_pattern = re.compile(r'\\{*(eval.+)*\\}', re.MULTILINE | re.DOTALL)
2022-04-06 23:45:15 +09:00
s_pattern = re.compile(r"(eval.+)", re.MULTILINE | re.DOTALL)
packed_pattern = re.compile(
r"if?.([^{}]+)\{.*(eval.+)\}.+else?.{.(eval.+)\}", re.DOTALL
)
packed_script = soup3.find("script", text=s_pattern)
2022-03-27 18:26:18 +09:00
# packed_script = soup3.find('script')
# logger.info('packed_script>>> %s', packed_script.text)
unpack_script = None
if packed_script is not None:
# logger.debug('zzzzzzzzzzzz')
match = packed_pattern.search(packed_script.text)
# match = re.search(packed_pattern, packed_script.text)
# logger.debug("match::: %s", match.group())
unpack_script = jsbeautifier.beautify(match.group(3))
# logger.info('match groups:: %s', match.groups())
# logger.info('match group3:: %s', match.group(3))
# print('packed_script==>', packed_script)
logger.debug(unpack_script)
2022-04-06 23:45:15 +09:00
p1 = re.compile(r"(\"tracks\".*\])\,\"captions\"", re.MULTILINE | re.DOTALL)
m2 = re.search(
r"(\"tracks\".*\]).*\"captions\"",
unpack_script,
flags=re.MULTILINE | re.DOTALL,
)
2022-03-27 18:26:18 +09:00
# print(m2.group(1))
2022-04-06 23:45:15 +09:00
dict_string = "{" + m2.group(1) + "}"
2022-03-27 18:26:18 +09:00
2022-04-06 23:45:15 +09:00
logger.info("dict_string::> %s", dict_string)
2022-03-27 18:26:18 +09:00
tracks = json.loads(dict_string)
2022-04-06 23:45:15 +09:00
self.srt_url = tracks["tracks"][0]["file"]
2022-03-27 18:26:18 +09:00
2022-04-06 23:45:15 +09:00
logger.debug("srt_url::: %s", tracks["tracks"][0]["file"])
2022-03-27 18:26:18 +09:00
2022-04-06 23:45:15 +09:00
video_hash = iframe_src.split("/")
video_hashcode = re.sub(r"index\.php\?data=", "", video_hash[-1])
2022-03-27 18:26:18 +09:00
self._vi = video_hashcode
2022-04-06 23:45:15 +09:00
video_info_url = f"{video_hash[0]}//{video_hash[2]}/player/index.php?data={video_hashcode}&do=getVideo"
2022-03-27 18:26:18 +09:00
# print('hash:::', video_hash)
2022-04-06 23:45:15 +09:00
logger.debug("video_info_url::: %s", video_info_url)
2022-03-27 18:26:18 +09:00
headers = {
2022-04-06 23:45:15 +09:00
"referer": f"{iframe_src}",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36"
"Mozilla/5.0 (Macintosh; Intel "
"Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 "
"Whale/3.12.129.46 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
2022-03-27 18:26:18 +09:00
}
# print(headers)
payload = {
"hash": video_hash[-1],
}
2022-04-06 23:45:15 +09:00
resp2 = requests.post(
video_info_url, headers=headers, data=payload, timeout=20
).json()
2022-03-27 18:26:18 +09:00
2022-04-06 23:45:15 +09:00
logger.debug("resp2::> %s", resp2)
2022-03-27 18:26:18 +09:00
2022-04-06 23:45:15 +09:00
hls_url = resp2["videoSource"]
logger.debug("video_url::> %s", hls_url)
2022-03-27 18:26:18 +09:00
resp3 = requests.get(hls_url, headers=headers).text
# logger.debug(resp3)
# stream_url = hls_url.split('\n')[-1].strip()
2022-04-06 23:45:15 +09:00
stream_info = resp3.split("\n")[-2:]
2022-03-27 18:26:18 +09:00
# logger.debug('stream_url:: %s', stream_url)
2022-04-06 23:45:15 +09:00
logger.debug("stream_info:: %s", stream_info)
2022-03-27 18:26:18 +09:00
self.headers = {
2022-04-06 23:45:15 +09:00
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/71.0.3554.0 Safari/537.36Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36",
"Referer": "https://ndoodle.xyz/video/03a3655fff3e9bdea48de9f49e938e32",
2022-03-27 18:26:18 +09:00
}
self.url = stream_info[1].strip()
match = re.compile(r'NAME="(?P<quality>.*?)"').search(stream_info[0])
2022-04-06 23:45:15 +09:00
self.quality = match.group("quality")
2022-03-27 18:26:18 +09:00
logger.info(self.quality)
2022-04-06 23:45:15 +09:00
match = re.compile(
r"(?P<title>.*?)\s*((?P<season>\d+)%s)?\s*((?P<epi_no>\d+)%s)"
% ("", "")
).search(self.info["title"])
2022-03-28 14:52:38 +09:00
# epi_no 초기값
epi_no = 1
2022-03-27 18:26:18 +09:00
if match:
2022-04-06 23:45:15 +09:00
self.content_title = match.group("title").strip()
if "season" in match.groupdict() and match.group("season") is not None:
self.season = int(match.group("season"))
2022-03-28 14:52:38 +09:00
# epi_no = 1
2022-04-06 23:45:15 +09:00
epi_no = int(match.group("epi_no"))
ret = "%s.S%sE%s.%s-OHNI24.mp4" % (
self.content_title,
"0%s" % self.season if self.season < 10 else self.season,
"0%s" % epi_no if epi_no < 10 else epi_no,
self.quality,
)
2022-03-27 18:26:18 +09:00
else:
2022-04-06 23:45:15 +09:00
self.content_title = self.info["title"]
P.logger.debug("NOT MATCH")
ret = "%s.720p-OHNI24.mp4" % self.info["title"]
2022-03-27 18:26:18 +09:00
# logger.info('self.content_title:: %s', self.content_title)
self.epi_queue = epi_no
self.filename = Util.change_text_for_use_filename(ret)
2022-04-06 23:45:15 +09:00
logger.info("self.filename::> %s", self.filename)
self.savepath = P.ModelSetting.get("ohli24_download_path")
logger.info("self.savepath::> %s", self.savepath)
2022-03-27 18:26:18 +09:00
# TODO: 완결 처리
2022-04-06 23:45:15 +09:00
if P.ModelSetting.get_bool("ohli24_auto_make_folder"):
if self.info["day"].find("완결") != -1:
folder_name = "%s %s" % (
P.ModelSetting.get("ohli24_finished_insert"),
self.content_title,
)
2022-03-27 18:26:18 +09:00
else:
folder_name = self.content_title
folder_name = Util.change_text_for_use_filename(folder_name.strip())
self.savepath = os.path.join(self.savepath, folder_name)
2022-04-06 23:45:15 +09:00
if P.ModelSetting.get_bool("ohli24_auto_make_season_folder"):
self.savepath = os.path.join(
self.savepath, "Season %s" % int(self.season)
)
2022-03-27 18:26:18 +09:00
self.filepath = os.path.join(self.savepath, self.filename)
if not os.path.exists(self.savepath):
os.makedirs(self.savepath)
from framework.common.util import write_file, convert_vtt_to_srt
2022-04-06 23:45:15 +09:00
srt_filepath = os.path.join(
self.savepath, self.filename.replace(".mp4", ".ko.srt")
)
2022-03-27 18:26:18 +09:00
if self.srt_url is not None and not os.path.exists(srt_filepath):
# vtt_data = requests.get(self.vtt, headers=headers).text
# srt_data = convert_vtt_to_srt(vtt_data)
srt_data = requests.get(self.srt_url, headers=headers).text
write_file(srt_data, srt_filepath)
except Exception as e:
2022-04-06 23:45:15 +09:00
P.logger.error("Exception:%s", e)
2022-03-27 18:26:18 +09:00
P.logger.error(traceback.format_exc())
2022-02-08 23:17:30 +09:00
pass
class ModelOhli24Item(db.Model):
2022-04-06 23:45:15 +09:00
__tablename__ = "{package_name}_ohli24_item".format(package_name=P.package_name)
__table_args__ = {"mysql_collate": "utf8_general_ci"}
2022-02-08 23:17:30 +09:00
__bind_key__ = P.package_name
id = db.Column(db.Integer, primary_key=True)
created_time = db.Column(db.DateTime)
completed_time = db.Column(db.DateTime)
reserved = db.Column(db.JSON)
content_code = db.Column(db.String)
season = db.Column(db.Integer)
episode_no = db.Column(db.Integer)
title = db.Column(db.String)
episode_title = db.Column(db.String)
2022-03-27 18:26:18 +09:00
ohli24_va = db.Column(db.String)
ohli24_vi = db.Column(db.String)
ohli24_id = db.Column(db.String)
2022-02-08 23:17:30 +09:00
quality = db.Column(db.String)
filepath = db.Column(db.String)
filename = db.Column(db.String)
savepath = db.Column(db.String)
video_url = db.Column(db.String)
vtt_url = db.Column(db.String)
thumbnail = db.Column(db.String)
status = db.Column(db.String)
ohli24_info = db.Column(db.JSON)
def __init__(self):
self.created_time = datetime.now()
def __repr__(self):
return repr(self.as_dict())
def as_dict(self):
ret = {x.name: getattr(self, x.name) for x in self.__table__.columns}
2022-04-06 23:45:15 +09:00
ret["created_time"] = self.created_time.strftime("%Y-%m-%d %H:%M:%S")
ret["completed_time"] = (
self.completed_time.strftime("%Y-%m-%d %H:%M:%S")
if self.completed_time is not None
else None
)
2022-02-08 23:17:30 +09:00
return ret
def save(self):
db.session.add(self)
db.session.commit()
@classmethod
2022-03-27 18:26:18 +09:00
def get_by_id(cls, idx):
return db.session.query(cls).filter_by(id=idx).first()
2022-02-08 23:17:30 +09:00
@classmethod
2022-03-27 18:26:18 +09:00
def get_by_ohli24_id(cls, ohli24_id):
return db.session.query(cls).filter_by(ohli24_id=ohli24_id).first()
2022-02-08 23:17:30 +09:00
@classmethod
2022-03-27 18:26:18 +09:00
def delete_by_id(cls, idx):
db.session.query(cls).filter_by(id=idx).delete()
2022-02-08 23:17:30 +09:00
db.session.commit()
return True
@classmethod
def web_list(cls, req):
ret = {}
2022-04-06 23:45:15 +09:00
page = int(req.form["page"]) if "page" in req.form else 1
2022-02-08 23:17:30 +09:00
page_size = 30
2022-04-06 23:45:15 +09:00
job_id = ""
search = req.form["search_word"] if "search_word" in req.form else ""
option = req.form["option"] if "option" in req.form else "all"
order = req.form["order"] if "order" in req.form else "desc"
2022-02-08 23:17:30 +09:00
query = cls.make_query(search=search, order=order, option=option)
count = query.count()
query = query.limit(page_size).offset((page - 1) * page_size)
lists = query.all()
2022-04-06 23:45:15 +09:00
ret["list"] = [item.as_dict() for item in lists]
ret["paging"] = Util.get_paging_info(count, page, page_size)
2022-02-08 23:17:30 +09:00
return ret
@classmethod
2022-04-06 23:45:15 +09:00
def make_query(cls, search="", order="desc", option="all"):
2022-02-08 23:17:30 +09:00
query = db.session.query(cls)
2022-04-06 23:45:15 +09:00
if search is not None and search != "":
if search.find("|") != -1:
tmp = search.split("|")
2022-02-08 23:17:30 +09:00
conditions = []
for tt in tmp:
2022-04-06 23:45:15 +09:00
if tt != "":
conditions.append(cls.filename.like("%" + tt.strip() + "%"))
2022-02-08 23:17:30 +09:00
query = query.filter(or_(*conditions))
2022-04-06 23:45:15 +09:00
elif search.find(",") != -1:
tmp = search.split(",")
2022-02-08 23:17:30 +09:00
for tt in tmp:
2022-04-06 23:45:15 +09:00
if tt != "":
query = query.filter(cls.filename.like("%" + tt.strip() + "%"))
2022-02-08 23:17:30 +09:00
else:
2022-04-06 23:45:15 +09:00
query = query.filter(cls.filename.like("%" + search + "%"))
if option == "completed":
query = query.filter(cls.status == "completed")
2022-02-08 23:17:30 +09:00
2022-04-06 23:45:15 +09:00
query = (
query.order_by(desc(cls.id)) if order == "desc" else query.order_by(cls.id)
)
2022-02-08 23:17:30 +09:00
return query
@classmethod
def get_list_uncompleted(cls):
2022-04-06 23:45:15 +09:00
return db.session.query(cls).filter(cls.status != "completed").all()
2022-02-08 23:17:30 +09:00
@classmethod
def append(cls, q):
item = ModelOhli24Item()
2022-04-06 23:45:15 +09:00
item.content_code = q["content_code"]
item.season = q["season"]
item.episode_no = q["epi_queue"]
item.title = q["content_title"]
item.episode_title = q["title"]
item.ohli24_va = q["va"]
item.ohli24_vi = q["_vi"]
item.ohli24_id = q["_id"]
item.quality = q["quality"]
item.filepath = q["filepath"]
item.filename = q["filename"]
item.savepath = q["savepath"]
item.video_url = q["url"]
item.vtt_url = q["vtt"]
item.thumbnail = q["thumbnail"]
item.status = "wait"
item.ohli24_info = q["ohli24_info"]
2022-02-08 23:17:30 +09:00
item.save()