2022-08-20 23:12:58 +09:00
import os
import sys
2022-10-23 23:23:46 +09:00
import threading
2022-08-20 23:12:58 +09:00
import traceback
import json
from datetime import datetime
import hashlib
import re
2022-08-26 17:21:32 +09:00
import asyncio
2022-09-27 13:08:25 +09:00
import platform
2022-08-20 23:12:58 +09:00
import lxml . etree
# third-party
import requests
from lxml import html
from urllib import parse
import urllib
2022-09-27 13:08:25 +09:00
packages = [
" beautifulsoup4 " ,
" requests-cache " ,
" cloudscraper " ,
" selenium_stealth " ,
" webdriver_manager " ,
]
2022-08-20 23:12:58 +09:00
for package in packages :
try :
import package
except ImportError :
# main(["install", package])
os . system ( f " pip install { package } " )
2022-08-26 17:21:32 +09:00
from bs4 import BeautifulSoup
2022-08-20 23:12:58 +09:00
import cloudscraper
# third-party
from flask import request , render_template , jsonify
2022-08-26 17:21:32 +09:00
from sqlalchemy import or_ , and_ , func , not_ , desc
2022-08-20 23:12:58 +09:00
# sjva 공용
from framework import db , scheduler , path_data , socketio
from framework . util import Util
from framework . common . util import headers
from plugin import (
LogicModuleBase ,
FfmpegQueueEntity ,
FfmpegQueue ,
default_route_socketio ,
)
from tool_base import d
# 패키지
from . plugin import P
logger = P . logger
# =================================================================#
# 패키지
class LogicAniLife ( LogicModuleBase ) :
db_default = {
" anilife_db_version " : " 1 " ,
" anilife_url " : " https://anilife.live " ,
" anilife_download_path " : os . path . join ( path_data , P . package_name , " ohli24 " ) ,
" anilife_auto_make_folder " : " True " ,
" anilife_auto_make_season_folder " : " True " ,
" anilife_finished_insert " : " [완결] " ,
" anilife_max_ffmpeg_process_count " : " 1 " ,
" anilife_order_desc " : " False " ,
" anilife_auto_start " : " False " ,
" anilife_interval " : " * 5 * * * " ,
" anilife_auto_mode_all " : " False " ,
" anilife_auto_code_list " : " all " ,
" anilife_current_code " : " " ,
" anilife_uncompleted_auto_enqueue " : " False " ,
" anilife_image_url_prefix_series " : " https://www.jetcloud.cc/series/ " ,
" anilife_image_url_prefix_episode " : " https://www.jetcloud-list.cc/thumbnail/ " ,
}
current_headers = None
current_data = None
referer = None
2022-08-26 17:21:32 +09:00
origin_url = None
2022-09-12 15:22:03 +09:00
episode_url = None
cookies = None
2022-08-20 23:12:58 +09:00
2022-09-27 13:08:25 +09:00
os_platform = platform . system ( )
2022-08-20 23:12:58 +09:00
session = requests . Session ( )
headers = {
2022-08-26 17:21:32 +09:00
" User-Agent " : " Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36 " ,
" Accept " : " text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9 " ,
2022-08-20 23:12:58 +09:00
" Accept-Language " : " ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7 " ,
" Referer " : " " ,
2022-09-12 15:22:03 +09:00
" Cookie " : " SPSI=ef307b8c976fac3363cdf420c9ca40a9; SPSE=+PhK0/uGUBMCZIgXplNjzqW3K2kXLybiElDTtOOiboHiBXO7Tp/9roMW7FplGZuGCUo3i4Fwx5VIUG57Zj6VVw==; anilife_csrf=b1eb92529839d7486169cd91e4e60cd2; UTGv2=h45f897818578a5664b31004b95a9992d273; _ga=GA1.1.281412913.1662803695; _ga_56VYJJ7FTM=GS1.1.1662803695.1.0.1662803707.0.0.0; DCST=pE9; DSR=w2XdPUpwLWDqkLpWXfs/5TiO4mtNv5O3hqNhEr7GP1kFoRBBzbFRpR+xsJd9A+E29M+we7qIvJxQmHQTjDNLuQ==; DCSS=696763EB4EA5A67C4E39CFA510FE36F19B0912C; DGCC=RgP; spcsrf=8a6b943005d711258f2f145a8404d873; sp_lit=F9PWLXyxvZbOyk3eVmtTlg==; PRLST=wW; adOtr=70fbCc39867 "
2022-08-26 17:21:32 +09:00
# "Cookie": ""
# "Cookie": "_ga=GA1.1.578607927.1660813724; __gads=ID=10abb8b98b6828ae-2281c943a9d500fd:T=1660813741:RT=1660813741:S=ALNI_MYU_iB2lBgSrEQUBwhKpNsToaqQ8A; SL_G_WPT_TO=ko; SL_GWPT_Show_Hide_tmp=1; SL_wptGlobTipTmp=1; SPSI=944c237cdd8606d80e5e330a0f332d03; SPSE=itZcXMDuso0ktWnDkV2G0HVwWEctCgDjrcFMlEQ5C745wqvp1pEEddrsAsjPUBjl6/8+9Njpq1IG3wt/tVag7w==; sbtsck=jav9aILa6Ofn0dEQr5DhDq5rpbd1JUoNgKwxBpZrqYd+CM=; anilife_csrf=54ee9d15c87864ee5e2538a63d894ad6; UTGv2=h46b326af644f4ac5d0eb1502881136b3750; DCST=pE9; __gpi=UID=000008ba227e99e0:T=1660813741:RT=1661170429:S=ALNI_MaJHIVJIGpQ5nTE9lvypKQxJnn10A; DSR=GWyTLTvSMF/lQD77ojQkGyl+7JvTudkSwV1GKeNVUcWEBa/msln9zzsBj7lj+89ywSRBM34Ol73AKf+KHZ9bZA==; DCSS=9D44115EC4CE12CADB88A005DC65A3CD74A211E; DGCC=zdV; spcsrf=fba136251afc6b5283109fc920322c70; sp_lit=kw0Xkp66eQ7bV0f0tNClhg==; PRLST=gt; adOtr=2C4H9c4d78d; _ga_56VYJJ7FTM=GS1.1.1661168661.18.1.1661173389.0.0.0",
2022-08-20 23:12:58 +09:00
}
useragent = {
" User-Agent " : " Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, "
" like Gecko) Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36 "
}
def __init__ ( self , P ) :
super ( LogicAniLife , self ) . __init__ ( P , " setting " , scheduler_desc = " 애니라이프 자동 다운로드 " )
self . name = " anilife "
self . queue = None
default_route_socketio ( P , self )
@staticmethod
def get_html ( url , referer = None , stream = False , timeout = 5 ) :
data = " "
try :
print ( " cloudflare protection bypass ================== " )
# return LogicAniLife.get_html_cloudflare(url)
2022-08-26 17:21:32 +09:00
return LogicAniLife . get_html_selenium ( url , referer )
2022-08-20 23:12:58 +09:00
# return LogicAniLife.get_html_playwright(url)
# import browser_cookie3
# cj = browser_cookie3.chrome(domain_name="anilife.live")
referer = " https://anilife.live/ "
if LogicAniLife . session is None :
LogicAniLife . session = requests . session ( )
# logger.debug('get_html :%s', url)
LogicAniLife . headers [ " Referer " ] = " " if referer is None else referer
LogicAniLife . headers [
" Cookie "
] = " _ga=GA1.1.578607927.1660813724; __gads=ID=10abb8b98b6828ae-2281c943a9d500fd:T=1660813741:RT=1660813741:S=ALNI_MYU_iB2lBgSrEQUBwhKpNsToaqQ8A; sbtsck=javuwDzcOJqUyweM1OQeNGzHbjoHp7Cgw44XnPdM738c3E=; SPSI=e48379959d54a6a62cc7abdcafdb2761; SPSE=h5HfMGLJzLqzNafMD3YaOvHSC9xfh77CcWdKvexp/z5N5OsTkIiYSCudQhFffEfk/0pcOTVf0DpeV0RoNopzig==; anilife_csrf=b93b9f25a12a51cf185805ec4de7cf9d; UTGv2=h46b326af644f4ac5d0eb1502881136b3750; __gpi=UID=000008ba227e99e0:T=1660813741:RT=1660912282:S=ALNI_MaJHIVJIGpQ5nTE9lvypKQxJnn10A; DSR=SXPX8ELcRgh6N/9rNgjpQoNfaX2DRceeKYR0/ul7qTI9gApWQpZxr8jgymf/r0HsUT551vtOv2CMWpIn0Hd26A==; DCSS=89508000A76BBD939F6DDACE5BD9EB902D2212A; DGCC=Wdm; adOtr=7L4Xe58995d; spcsrf=6554fa003bf6a46dd9b7417acfacc20a; _ga_56VYJJ7FTM=GS1.1.1660912281.10.1.1660912576.0.0.0; PRLST=EO "
page_content = LogicAniLife . session . get (
url , headers = headers , timeout = timeout , allow_redirects = True
)
data = page_content . text
except Exception as e :
logger . error ( " Exception: %s " , e )
logger . error ( traceback . format_exc ( ) )
return data
2022-09-12 15:22:03 +09:00
@staticmethod
def get_html_requests ( url , referer = None , stream = False , timeout = 5 ) :
data = " "
try :
print ( " get_html_requests ================== " )
# cj = browser_cookie3.chrome(domain_name="anilife.live")
referer = " https://anilife.live/ "
if LogicAniLife . session is None :
LogicAniLife . session = requests . session ( )
# logger.debug('get_html :%s', url)
LogicAniLife . headers [ " Referer " ] = " " if referer is None else referer
LogicAniLife . headers [
" Cookie "
] = " _ga=GA1.1.578607927.1660813724; __gads=ID=10abb8b98b6828ae-2281c943a9d500fd:T=1660813741:RT=1660813741:S=ALNI_MYU_iB2lBgSrEQUBwhKpNsToaqQ8A; sbtsck=javuwDzcOJqUyweM1OQeNGzHbjoHp7Cgw44XnPdM738c3E=; SPSI=e48379959d54a6a62cc7abdcafdb2761; SPSE=h5HfMGLJzLqzNafMD3YaOvHSC9xfh77CcWdKvexp/z5N5OsTkIiYSCudQhFffEfk/0pcOTVf0DpeV0RoNopzig==; anilife_csrf=b93b9f25a12a51cf185805ec4de7cf9d; UTGv2=h46b326af644f4ac5d0eb1502881136b3750; __gpi=UID=000008ba227e99e0:T=1660813741:RT=1660912282:S=ALNI_MaJHIVJIGpQ5nTE9lvypKQxJnn10A; DSR=SXPX8ELcRgh6N/9rNgjpQoNfaX2DRceeKYR0/ul7qTI9gApWQpZxr8jgymf/r0HsUT551vtOv2CMWpIn0Hd26A==; DCSS=89508000A76BBD939F6DDACE5BD9EB902D2212A; DGCC=Wdm; adOtr=7L4Xe58995d; spcsrf=6554fa003bf6a46dd9b7417acfacc20a; _ga_56VYJJ7FTM=GS1.1.1660912281.10.1.1660912576.0.0.0; PRLST=EO "
LogicAniLife . headers [ " Referer " ] = referer
page_content = LogicAniLife . session . get (
url , headers = headers , timeout = timeout , allow_redirects = True
)
data = page_content . text
except Exception as e :
logger . error ( " Exception: %s " , e )
logger . error ( traceback . format_exc ( ) )
return data
2022-08-20 23:12:58 +09:00
@staticmethod
2022-08-26 17:21:32 +09:00
async def get_html_playwright (
url , headless = False , referer = None , engine = " chrome " , stealth = False
) :
2022-08-20 23:12:58 +09:00
from playwright . sync_api import sync_playwright
2022-08-26 17:21:32 +09:00
from playwright . async_api import async_playwright
from playwright_stealth import stealth_sync , stealth_async
2022-08-20 23:12:58 +09:00
import time
2022-10-23 23:01:19 +09:00
browser_args = [
2022-10-23 23:01:29 +09:00
" --window-size=1300,570 " ,
" --window-position=000,000 " ,
" --disable-dev-shm-usage " ,
" --no-sandbox " ,
" --disable-web-security " ,
" --disable-features=site-per-process " ,
" --disable-setuid-sandbox " ,
" --disable-accelerated-2d-canvas " ,
" --no-first-run " ,
" --no-zygote " ,
2022-10-23 23:01:19 +09:00
# '--single-process',
2022-10-23 23:01:29 +09:00
" --disable-gpu " ,
" --use-gl=egl " ,
" --disable-blink-features=AutomationControlled " ,
" --disable-background-networking " ,
" --enable-features=NetworkService,NetworkServiceInProcess " ,
" --disable-background-timer-throttling " ,
" --disable-backgrounding-occluded-windows " ,
" --disable-breakpad " ,
" --disable-client-side-phishing-detection " ,
" --disable-component-extensions-with-background-pages " ,
" --disable-default-apps " ,
" --disable-extensions " ,
" --disable-features=Translate " ,
" --disable-hang-monitor " ,
" --disable-ipc-flooding-protection " ,
" --disable-popup-blocking " ,
" --disable-prompt-on-repost " ,
" --disable-renderer-backgrounding " ,
" --disable-sync " ,
" --force-color-profile=srgb " ,
" --metrics-recording-only " ,
" --enable-automation " ,
" --password-store=basic " ,
" --use-mock-keychain " ,
" --hide-scrollbars " ,
" --mute-audio " ,
2022-10-23 23:01:19 +09:00
]
2022-08-20 23:12:58 +09:00
# scraper = cloudscraper.create_scraper(
# browser={"browser": "chrome", "platform": "windows", "desktop": True},
# debug=False,
# # sess=LogicAniLife.session,
# delay=10,
# )
#
# cookie_value, user_agent = scraper.get_cookie_string(url)
#
# logger.debug(f"cookie_value:: {cookie_value}")
start = time . time ( )
ua = (
" Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
" AppleWebKit/537.36 (KHTML, like Gecko) "
" Chrome/69.0.3497.100 Safari/537.36 "
)
# from playwright_stealth import stealth_sync
2022-08-26 17:21:32 +09:00
cookie = None
def set_cookie ( req ) :
nonlocal cookie
if " cookie " in req . headers :
cookie = req . headers [ " cookie " ]
async with async_playwright ( ) as p :
if engine == " chrome " :
2022-10-23 23:01:29 +09:00
browser = await p . chromium . launch (
channel = " chrome " , args = browser_args , headless = headless
)
2022-08-26 17:21:32 +09:00
elif engine == " webkit " :
2022-10-23 23:01:29 +09:00
browser = await p . webkit . launch (
headless = headless ,
args = browser_args ,
)
2022-08-26 17:21:32 +09:00
else :
2022-10-23 23:01:29 +09:00
browser = await p . firefox . launch (
headless = headless ,
args = browser_args ,
)
2022-08-26 17:21:32 +09:00
# context = browser.new_context(
# user_agent=ua,
# )
2022-08-20 23:12:58 +09:00
2022-08-26 17:21:32 +09:00
LogicAniLife . headers [ " Referer " ] = " https://anilife.live/detail/id/471 "
# print(LogicAniLife.headers)
2022-09-12 15:22:03 +09:00
LogicAniLife . headers [ " Referer " ] = LogicAniLife . episode_url
2022-08-26 17:21:32 +09:00
if referer is not None :
LogicAniLife . headers [ " Referer " ] = referer
2022-09-12 15:22:03 +09:00
logger . debug ( f " LogicAniLife.headers::: { LogicAniLife . headers } " )
2022-08-26 17:21:32 +09:00
context = await browser . new_context ( extra_http_headers = LogicAniLife . headers )
2022-09-12 15:22:03 +09:00
await context . add_cookies ( LogicAniLife . cookies )
2022-08-20 23:12:58 +09:00
# LogicAniLife.headers["Cookie"] = cookie_value
2022-08-26 17:21:32 +09:00
# context.set_extra_http_headers(LogicAniLife.headers)
page = await context . new_page ( )
# page.set_extra_http_headers(LogicAniLife.headers)
if stealth :
await stealth_async ( page )
# page.on("request", set_cookie)
# stealth_sync(page)
print ( LogicAniLife . headers [ " Referer " ] )
page . on ( " request " , set_cookie )
print ( f ' Referer:: { LogicAniLife . headers [ " Referer " ] } ' )
2022-09-12 15:22:03 +09:00
# await page.set_extra_http_headers(LogicAniLife.headers)
2022-08-26 17:21:32 +09:00
await page . goto (
url , wait_until = " load " , referer = LogicAniLife . headers [ " Referer " ]
)
# page.wait_for_timeout(10000)
2022-09-12 15:22:03 +09:00
await asyncio . sleep ( 2.9 )
2022-08-26 17:21:32 +09:00
# await page.reload()
# time.sleep(10)
# cookies = context.cookies
# print(cookies)
print ( f " page.url:: { page . url } " )
LogicAniLife . origin_url = page . url
# print(page.content())
print ( f " run at { time . time ( ) - start } sec " )
return await page . content ( )
@staticmethod
2022-10-23 21:22:33 +09:00
async def get_vod_url_v1 (
url , headless = False , referer = None , engine = " chrome " , stealth = False
) :
2022-08-26 17:21:32 +09:00
from playwright . sync_api import sync_playwright
2022-10-23 21:22:33 +09:00
from playwright . async_api import async_playwright
from playwright_har_tracer import HarTracer
from playwright_stealth import stealth_sync , stealth_async
import time
# scraper = cloudscraper.create_scraper(
# browser={"browser": "chrome", "platform": "windows", "desktop": True},
# debug=False,
# # sess=LogicAniLife.session,
# delay=10,
# )
#
# cookie_value, user_agent = scraper.get_cookie_string(url)
#
# logger.debug(f"cookie_value:: {cookie_value}")
start = time . time ( )
ua = (
" Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
" AppleWebKit/537.36 (KHTML, like Gecko) "
" Chrome/69.0.3497.100 Safari/537.36 "
)
# from playwright_stealth import stealth_sync
cookie = None
def set_cookie ( req ) :
nonlocal cookie
if " cookie " in req . headers :
cookie = req . headers [ " cookie " ]
async with async_playwright ( ) as p :
if engine == " chrome " :
browser = await p . chromium . launch ( channel = " chrome " , headless = headless )
elif engine == " webkit " :
browser = await p . webkit . launch ( headless = headless )
else :
browser = await p . firefox . launch ( headless = headless )
# context = browser.new_context(
# user_agent=ua,
# )
LogicAniLife . headers [ " Referer " ] = " https://anilife.live/detail/id/471 "
# print(LogicAniLife.headers)
LogicAniLife . headers [ " Referer " ] = LogicAniLife . episode_url
if referer is not None :
LogicAniLife . headers [ " Referer " ] = referer
logger . debug ( f " LogicAniLife.headers::: { LogicAniLife . headers } " )
context = await browser . new_context ( extra_http_headers = LogicAniLife . headers )
await context . add_cookies ( LogicAniLife . cookies )
# LogicAniLife.headers["Cookie"] = cookie_value
# context.set_extra_http_headers(LogicAniLife.headers)
tracer = HarTracer ( context = context , browser_name = p . webkit . name )
page = await context . new_page ( )
# page.set_extra_http_headers(LogicAniLife.headers)
if stealth :
await stealth_async ( page )
# page.on("request", set_cookie)
# stealth_sync(page)
print ( LogicAniLife . headers [ " Referer " ] )
page . on ( " request " , set_cookie )
print ( f ' Referer:: { LogicAniLife . headers [ " Referer " ] } ' )
# await page.set_extra_http_headers(LogicAniLife.headers)
await page . goto (
url , wait_until = " load " , referer = LogicAniLife . headers [ " Referer " ]
)
har = await tracer . flush ( )
# page.wait_for_timeout(10000)
await asyncio . sleep ( 10 )
# await page.reload()
# time.sleep(10)
# cookies = context.cookies
# print(cookies)
2022-08-26 17:21:32 +09:00
2022-10-23 21:22:33 +09:00
print ( f " page.url:: { page . url } " )
LogicAniLife . origin_url = page . url
# print(page.content())
print ( f " run at { time . time ( ) - start } sec " )
return await page . content ( )
@staticmethod
async def get_vod_url ( url , headless = False ) :
from playwright . sync_api import sync_playwright
from playwright . async_api import async_playwright
from playwright_stealth import stealth_async
import html_to_json
from playwright_har_tracer import HarTracer
2022-08-26 17:21:32 +09:00
import time
# scraper = cloudscraper.create_scraper(
# browser={"browser": "chrome", "platform": "windows", "desktop": True},
# debug=False,
# # sess=LogicAniLife.session,
# delay=10,
# )
#
# cookie_value, user_agent = scraper.get_cookie_string(url)
#
# logger.debug(f"cookie_value:: {cookie_value}")
2022-10-23 22:44:58 +09:00
browser_args = [
" --window-size=1300,570 " ,
" --window-position=000,000 " ,
" --disable-dev-shm-usage " ,
" --no-sandbox " ,
" --disable-web-security " ,
" --disable-features=site-per-process " ,
" --disable-setuid-sandbox " ,
" --disable-accelerated-2d-canvas " ,
" --no-first-run " ,
" --no-zygote " ,
2022-10-23 22:51:55 +09:00
# "--single-process",
2022-10-24 00:18:12 +09:00
" --disable-gpu " ,
2022-10-23 22:44:58 +09:00
" --use-gl=egl " ,
" --disable-blink-features=AutomationControlled " ,
" --disable-background-networking " ,
" --enable-features=NetworkService,NetworkServiceInProcess " ,
" --disable-background-timer-throttling " ,
" --disable-backgrounding-occluded-windows " ,
" --disable-breakpad " ,
" --disable-client-side-phishing-detection " ,
" --disable-component-extensions-with-background-pages " ,
" --disable-default-apps " ,
" --disable-extensions " ,
" --disable-features=Translate " ,
" --disable-hang-monitor " ,
" --disable-ipc-flooding-protection " ,
" --disable-popup-blocking " ,
" --disable-prompt-on-repost " ,
" --disable-renderer-backgrounding " ,
" --disable-sync " ,
" --force-color-profile=srgb " ,
" --metrics-recording-only " ,
" --enable-automation " ,
" --password-store=basic " ,
" --use-mock-keychain " ,
" --hide-scrollbars " ,
" --mute-audio " ,
]
2022-08-26 17:21:32 +09:00
start = time . time ( )
ua = (
" Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
" AppleWebKit/537.36 (KHTML, like Gecko) "
" Chrome/69.0.3497.100 Safari/537.36 "
)
# from playwright_stealth import stealth_sync
2022-10-23 21:22:33 +09:00
async with async_playwright ( ) as p :
2022-10-23 22:51:55 +09:00
# browser = await p.chromium.launch(headless=headless, args=browser_args)
2022-10-23 22:44:58 +09:00
browser = await p . chromium . launch ( headless = headless , args = browser_args )
# browser = await p.webkit.launch(headless=headless)
2022-08-26 17:21:32 +09:00
# context = browser.new_context(
# user_agent=ua,
# )
LogicAniLife . headers [
" Referer "
] = " https://anilife.live/g/l?id=14344143-040a-4e40-9399-a7d22d94554b "
2022-10-23 21:22:33 +09:00
# print(LogicAniLife.headers)
# context = await browser.new_context(extra_http_headers=LogicAniLife.headers)
context = await browser . new_context ( )
await context . set_extra_http_headers ( LogicAniLife . headers )
2022-08-26 17:21:32 +09:00
2022-10-23 21:22:33 +09:00
# await context.add_cookies(LogicAniLife.cookies)
2022-08-26 17:21:32 +09:00
2022-10-23 21:22:33 +09:00
# tracer = HarTracer(context=context, browser_name=p.chromium.name)
tracer = HarTracer ( context = context , browser_name = p . webkit . name )
2022-09-12 15:22:03 +09:00
2022-08-26 17:21:32 +09:00
# LogicAniLife.headers["Cookie"] = cookie_value
# context.set_extra_http_headers(LogicAniLife.headers)
2022-08-20 23:12:58 +09:00
2022-10-23 21:22:33 +09:00
page = await context . new_page ( )
2022-08-26 17:21:32 +09:00
2022-10-23 21:22:33 +09:00
# await page.set_extra_http_headers(LogicAniLife.headers)
2022-08-26 17:21:32 +09:00
2022-10-23 21:22:33 +09:00
# await stealth_async(page)
# logger.debug(url)
2022-08-20 23:12:58 +09:00
2022-08-26 17:21:32 +09:00
# page.on("request", set_cookie)
2022-08-20 23:12:58 +09:00
# stealth_sync(page)
2022-10-23 21:22:33 +09:00
# await page.goto(
# url, wait_until="load", referer=LogicAniLife.headers["Referer"]
# )
# await page.goto(url, wait_until="load")
await page . goto ( url , wait_until = " domcontentloaded " )
har = await tracer . flush ( )
2022-08-26 17:21:32 +09:00
# page.wait_for_timeout(10000)
2022-10-23 21:22:33 +09:00
await asyncio . sleep ( 2 )
# logger.debug(har)
2022-08-26 17:21:32 +09:00
# page.reload()
# time.sleep(10)
2022-10-23 21:22:33 +09:00
# cookies = context.cookies
2022-08-26 17:21:32 +09:00
# print(cookies)
2022-08-20 23:12:58 +09:00
# print(page.content())
2022-08-26 17:21:32 +09:00
# vod_url = page.evaluate(
# """() => {
# return console.log(vodUrl_1080p) }"""
# )
2022-10-23 21:22:33 +09:00
# vod_url = page.evaluate(
# """async () =>{
# return _0x55265f(0x99) + alJson[_0x55265f(0x91)]
# }"""
# )
result_har_json = har . to_json ( )
result_har_dict = har . to_dict ( )
# logger.debug(result_har_dict)
tmp_video_url = [ ]
for i , elem in enumerate ( result_har_dict [ " log " ] [ " entries " ] ) :
if " m3u8 " in elem [ " request " ] [ " url " ] :
logger . debug ( elem [ " request " ] [ " url " ] )
tmp_video_url . append ( elem [ " request " ] [ " url " ] )
vod_url = tmp_video_url [ - 1 ]
2022-08-26 17:21:32 +09:00
2022-09-12 15:22:03 +09:00
logger . debug ( f " vod_url:: { vod_url } " )
2022-08-20 23:12:58 +09:00
2022-10-23 21:22:33 +09:00
logger . debug ( f " run at { time . time ( ) - start } sec " )
2022-08-20 23:12:58 +09:00
2022-08-26 17:21:32 +09:00
# html_content = LogicAniLife.get_html_selenium(
# vod_url, "https://anilife.live"
# )
2022-10-23 21:22:33 +09:00
# html_content = LogicAniLife.get_html_playwright(
# vod_url, False, referer="https://anilife.live"
# )
2022-09-12 15:22:03 +09:00
# html_content = LogicAniLife.get_html(
# vod_url, referer="https://anilife.live"
# )
# html_content = LogicAniLife.get_html_requests(
# vod_url, referer="https://anilife.live"
# )
2022-10-23 21:22:33 +09:00
# print(f"html_content:: {html_content}")
# output_json = html_to_json.convert(html_content)
# resolution = output_json["html"][0]["body"][0]["_value"]
# logger.debug(f"output_json:: {resolution}")
2022-08-26 17:21:32 +09:00
return vod_url
2022-08-20 23:12:58 +09:00
@staticmethod
2022-08-26 17:21:32 +09:00
def get_html_selenium ( url , referer ) :
2022-08-20 23:12:58 +09:00
from selenium . webdriver . common . by import By
from selenium import webdriver
from selenium_stealth import stealth
2022-09-09 20:30:23 +09:00
from webdriver_manager . chrome import ChromeDriverManager
2022-08-20 23:12:58 +09:00
import time
options = webdriver . ChromeOptions ( )
# 크롬드라이버 헤더 옵션추가 (리눅스에서 실행시 필수)
options . add_argument ( " start-maximized " )
options . add_argument ( " --headless " )
options . add_argument ( " --no-sandbox " )
2022-10-24 00:49:35 +09:00
options . add_argument ( " window-size=1920x1080 " )
options . add_argument ( " disable-gpu " )
2022-10-23 22:23:56 +09:00
# options.add_argument('--no-sandbox')
options . add_argument ( " --disable-dev-shm-usage " )
2022-08-20 23:12:58 +09:00
options . add_experimental_option ( " excludeSwitches " , [ " enable-automation " ] )
options . add_experimental_option ( " useAutomationExtension " , False )
2022-09-27 13:08:25 +09:00
2022-09-27 13:15:55 +09:00
if LogicAniLife . os_platform == " Darwin " :
2022-09-27 13:08:25 +09:00
# 크롬드라이버 경로
driver_path = " ./bin/Darwin/chromedriver "
# driver = webdriver.Chrome(executable_path=driver_path, chrome_options=options)
driver = webdriver . Chrome (
ChromeDriverManager ( ) . install ( ) , chrome_options = options
)
else :
2022-09-27 13:15:55 +09:00
driver_bin_path = os . path . join (
os . path . dirname ( __file__ ) , " bin " , f " { LogicAniLife . os_platform } "
)
driver_path = f " { driver_bin_path } /chromedriver "
driver = webdriver . Chrome (
executable_path = driver_path , chrome_options = options
)
2022-09-27 13:08:25 +09:00
2022-08-20 23:12:58 +09:00
stealth (
driver ,
languages = [ " en-US " , " en " ] ,
vendor = " Google Inc. " ,
platform = " Win32 " ,
webgl_vendor = " Intel Inc. " ,
renderer = " Intel Iris OpenGL Engine " ,
fix_hairline = True ,
)
driver . get ( url )
2022-09-12 15:22:03 +09:00
2022-08-20 23:12:58 +09:00
driver . refresh ( )
2022-09-12 15:22:03 +09:00
logger . debug ( f " current_url:: { driver . current_url } " )
# logger.debug(f"current_cookie:: {driver.get_cookies()}")
cookies_list = driver . get_cookies ( )
cookies_dict = { }
for cookie in cookies_list :
cookies_dict [ cookie [ " name " ] ] = cookie [ " value " ]
logger . debug ( cookies_dict )
LogicAniLife . cookies = cookies_list
# LogicAniLife.headers["Cookie"] = driver.get_cookies()
LogicAniLife . episode_url = driver . current_url
2022-08-20 23:12:58 +09:00
time . sleep ( 1 )
elem = driver . find_element ( By . XPATH , " //* " )
source_code = elem . get_attribute ( " outerHTML " )
return source_code . encode ( " utf-8 " )
2022-08-26 17:21:32 +09:00
# Create a request interceptor
@staticmethod
def interceptor ( request ) :
del request . headers [ " Referer " ] # Delete the header first
request . headers [
" Referer "
] = " https://anilife.live/g/l?id=0a36917f-39cc-43ea-b0c6-0c86d27c2408 "
@staticmethod
def get_html_seleniumwire ( url , referer , wired = False ) :
from selenium import webdriver
from selenium . webdriver . common . by import By
from seleniumwire import webdriver as wired_webdriver
from selenium_stealth import stealth
import time
options = webdriver . ChromeOptions ( )
# 크롬드라이버 헤더 옵션추가 (리눅스에서 실행시 필수)
options . add_argument ( " start-maximized " )
options . add_argument ( " --headless " )
options . add_argument ( " --no-sandbox " )
options . add_experimental_option ( " excludeSwitches " , [ " enable-automation " ] )
options . add_experimental_option ( " useAutomationExtension " , False )
# 크롬드라이버 경로
driver_path = " ./bin/Darwin/chromedriver "
if wired :
driver = wired_webdriver . Chrome (
executable_path = driver_path , chrome_options = options
)
else :
driver = webdriver . Chrome (
executable_path = driver_path , chrome_options = options
)
# stealth ======================================
# stealth(
# driver,
# languages=["en-US", "en"],
# vendor="Google Inc.",
# platform="Win32",
# webgl_vendor="Intel Inc.",
# renderer="Intel Iris OpenGL Engine",
# fix_hairline=True,
# )
if wired :
driver . request_interceptor = LogicAniLife . interceptor
driver . get ( url )
driver . refresh ( )
time . sleep ( 1 )
elem = driver . find_element ( By . XPATH , " //* " )
source_code = elem . get_attribute ( " outerHTML " )
return source_code . encode ( " utf-8 " )
2022-08-20 23:12:58 +09:00
@staticmethod
def get_html_cloudflare ( url , cached = False ) :
# scraper = cloudscraper.create_scraper(
# # disableCloudflareV1=True,
# # captcha={"provider": "return_response"},
# delay=10,
# browser="chrome",
# )
# scraper = cfscrape.create_scraper(
# browser={"browser": "chrome", "platform": "android", "desktop": False}
# )
# scraper = cloudscraper.create_scraper(
# browser={"browser": "chrome", "platform": "windows", "mobile": False},
# debug=True,
# )
# LogicAniLife.headers["referer"] = LogicAniLife.referer
LogicAniLife . headers [ " Referer " ] = " https://anilife.live/ "
LogicAniLife . headers [
" Cookie "
] = " _ga=GA1.1.578607927.1660813724; __gads=ID=10abb8b98b6828ae-2281c943a9d500fd:T=1660813741:RT=1660813741:S=ALNI_MYU_iB2lBgSrEQUBwhKpNsToaqQ8A; sbtsck=javuwDzcOJqUyweM1OQeNGzHbjoHp7Cgw44XnPdM738c3E=; SPSI=e48379959d54a6a62cc7abdcafdb2761; SPSE=h5HfMGLJzLqzNafMD3YaOvHSC9xfh77CcWdKvexp/z5N5OsTkIiYSCudQhFffEfk/0pcOTVf0DpeV0RoNopzig==; anilife_csrf=b93b9f25a12a51cf185805ec4de7cf9d; UTGv2=h46b326af644f4ac5d0eb1502881136b3750; __gpi=UID=000008ba227e99e0:T=1660813741:RT=1660912282:S=ALNI_MaJHIVJIGpQ5nTE9lvypKQxJnn10A; DSR=SXPX8ELcRgh6N/9rNgjpQoNfaX2DRceeKYR0/ul7qTI9gApWQpZxr8jgymf/r0HsUT551vtOv2CMWpIn0Hd26A==; DCSS=89508000A76BBD939F6DDACE5BD9EB902D2212A; DGCC=Wdm; adOtr=7L4Xe58995d; spcsrf=6554fa003bf6a46dd9b7417acfacc20a; _ga_56VYJJ7FTM=GS1.1.1660912281.10.1.1660912576.0.0.0; PRLST=EO "
# logger.debug(f"headers:: {LogicAniLife.headers}")
if LogicAniLife . session is None :
LogicAniLife . session = requests . Session ( )
LogicAniLife . session . headers = LogicAniLife . headers
# LogicAniLife.session = requests.Session()
sess = cloudscraper . create_scraper (
browser = { " browser " : " firefox " , " platform " : " windows " , " desktop " : True } ,
debug = False ,
sess = LogicAniLife . session ,
delay = 10 ,
)
# print(scraper.get(url, headers=LogicAniLife.headers).content)
# print(scraper.get(url).content)
# return scraper.get(url, headers=LogicAniLife.headers).content
print ( LogicAniLife . headers )
return sess . get (
url , headers = LogicAniLife . session . headers , timeout = 10 , allow_redirects = True
) . content . decode ( " utf8 " , errors = " replace " )
@staticmethod
def db_init ( ) :
pass
def process_menu ( self , sub , req ) :
arg = P . ModelSetting . to_dict ( )
arg [ " sub " ] = self . name
if sub in [ " setting " , " queue " , " list " , " category " , " request " ] :
if sub == " setting " :
job_id = " %s _ %s " % ( self . P . package_name , self . name )
arg [ " scheduler " ] = str ( scheduler . is_include ( job_id ) )
arg [ " is_running " ] = str ( scheduler . is_running ( job_id ) )
return render_template (
" {package_name} _ {module_name} _ {sub} .html " . format (
package_name = P . package_name , module_name = self . name , sub = sub
) ,
arg = arg ,
)
return render_template ( " sample.html " , title = " %s - %s " % ( P . package_name , sub ) )
def process_ajax ( self , sub , req ) :
try :
if sub == " analysis " :
# code = req.form['code']
logger . debug ( req )
code = request . form [ " code " ]
wr_id = request . form . get ( " wr_id " , None )
bo_table = request . form . get ( " bo_table " , None )
data = [ ]
# logger.info("code::: %s", code)
P . ModelSetting . set ( " anilife_current_code " , code )
2022-10-23 21:22:33 +09:00
data = self . get_series_info ( code )
2022-08-20 23:12:58 +09:00
self . current_data = data
return jsonify ( { " ret " : " success " , " data " : data , " code " : code } )
elif sub == " anime_list " :
data = [ ]
cate = request . form [ " type " ]
page = request . form [ " page " ]
data = self . get_anime_info ( cate , page )
# self.current_data = data
return jsonify (
{ " ret " : " success " , " cate " : cate , " page " : page , " data " : data }
)
elif sub == " add_queue " :
logger . debug ( f " add_queue routine =============== " )
ret = { }
info = json . loads ( request . form [ " data " ] )
logger . info ( f " info:: { info } " )
ret [ " ret " ] = self . add ( info )
return jsonify ( ret )
2022-08-26 17:21:32 +09:00
elif sub == " entity_list " :
return jsonify ( self . queue . get_entity_list ( ) )
elif sub == " queue_command " :
ret = self . queue . command (
req . form [ " command " ] , int ( req . form [ " entity_id " ] )
)
return jsonify ( ret )
2022-10-23 23:23:46 +09:00
elif sub == " add_queue_checked_list " :
data = json . loads ( request . form [ " data " ] )
def func ( ) :
count = 0
for tmp in data :
add_ret = self . add ( tmp )
if add_ret . startswith ( " enqueue " ) :
self . socketio_callback ( " list_refresh " , " " )
count + = 1
notify = {
" type " : " success " ,
" msg " : " %s 개의 에피소드를 큐에 추가 하였습니다. " % count ,
}
socketio . emit (
" notify " , notify , namespace = " /framework " , broadcast = True
)
thread = threading . Thread ( target = func , args = ( ) )
thread . daemon = True
thread . start ( )
return jsonify ( " " )
2022-10-23 23:36:05 +09:00
elif sub == " web_list " :
return jsonify ( ModelAniLifeItem . web_list ( request ) )
2022-08-20 23:12:58 +09:00
except Exception as e :
P . logger . error ( " Exception: %s " , e )
P . logger . error ( traceback . format_exc ( ) )
def setting_save_after ( self ) :
if self . queue . get_max_ffmpeg_count ( ) != P . ModelSetting . get_int (
" anilife_max_ffmpeg_process_count "
) :
self . queue . set_max_ffmpeg_count (
P . ModelSetting . get_int ( " anilife_max_ffmpeg_process_count " )
)
def scheduler_function ( self ) :
pass
def plugin_load ( self ) :
self . queue = FfmpegQueue (
P , P . ModelSetting . get_int ( " anilife_max_ffmpeg_process_count " )
)
self . current_data = None
self . queue . queue_start ( )
def reset_db ( self ) :
db . session . query ( ModelAniLifeItem ) . delete ( )
db . session . commit ( )
return True
# 시리즈 정보를 가져오는 함수
2022-10-23 21:22:33 +09:00
def get_series_info ( self , code ) :
2022-08-20 23:12:58 +09:00
try :
if code . isdigit ( ) :
url = P . ModelSetting . get ( " anilife_url " ) + " /detail/id/ " + code
else :
url = P . ModelSetting . get ( " anilife_url " ) + " /g/l?id= " + code
logger . debug ( " url::: > %s " , url )
response_data = LogicAniLife . get_html ( url , timeout = 10 )
tree = html . fromstring ( response_data )
# logger.debug(response_data)
main_title = tree . xpath ( ' //div[@class= " infox " ]/h1/text() ' ) [ 0 ]
image = tree . xpath ( ' //div[@class= " thumb " ]/img/@src ' ) [ 0 ]
des_items = tree . xpath (
' //div[@class= " info-content " ]/div[@class= " spe " ]/span '
)
des_items1 = (
tree . xpath ( ' //div[@class= " info-content " ]/div[@class= " spe " ] ' ) [ 0 ]
. text_content ( )
. strip ( )
)
# print(des_items1)
# print(len(des_items))
des = { }
des_key = [
" _otit " ,
" _dir " ,
" _pub " ,
" _tag " ,
" _classifi " ,
" _country " ,
" _season " ,
" _grade " ,
" _total_chapter " ,
" _show_time " ,
" _release_year " ,
" _recent_date " ,
" _air_date " ,
]
description_dict = {
" 상태 " : " _status " ,
" 원제 " : " _otit " ,
" 원작 " : " _org " ,
" 감독 " : " _dir " ,
" 각본 " : " _scr " ,
" 시즌 " : " _season " ,
" 캐릭터 디자인 " : " _character_design " ,
" 음악 " : " _sound " ,
" 제작사 " : " _pub " ,
" 장르 " : " _tag " ,
" 분류 " : " _classifi " ,
" 제작국가 " : " _country " ,
" 방영일 " : " _date " ,
" 등급 " : " _grade " ,
" 유형 " : " _type " ,
" 에피소드 " : " _total_chapter " ,
" 상영시간 " : " _show_time " ,
" 공식 방영일 " : " _release_date " ,
" 방영 시작일 " : " _air_date " ,
" 최근 방영일 " : " _recent_date " ,
" 개봉년도 " : " _release_year " ,
}
print ( main_title )
print ( image )
# print(des_items)
list_body_li = tree . xpath ( ' //div[@class= " eplister " ]/ul/li ' )
# logger.debug(f"list_body_li:: {list_body_li}")
episodes = [ ]
vi = None
for li in list_body_li :
# logger.debug(li)
ep_num = li . xpath ( ' .//a/div[@class= " epl-num " ]/text() ' ) [ 0 ] . strip ( )
title = li . xpath ( ' .//a/div[@class= " epl-title " ]/text() ' ) [ 0 ] . strip ( )
thumbnail = image
link = li . xpath ( " .//a/@href " ) [ 0 ]
date = " "
m = hashlib . md5 ( title . encode ( " utf-8 " ) )
_vi = m . hexdigest ( )
episodes . append (
{
" ep_num " : ep_num ,
2022-10-23 21:22:33 +09:00
" title " : f " { main_title } { ep_num } 화 - { title } " ,
2022-08-20 23:12:58 +09:00
" link " : link ,
" thumbnail " : image ,
" date " : date ,
" day " : date ,
" _id " : title ,
" va " : link ,
" _vi " : _vi ,
" content_code " : code ,
2022-09-12 15:22:03 +09:00
" ep_url " : url ,
2022-08-20 23:12:58 +09:00
}
)
# print(lxml.etree.tostring(des_items, method="text"))
#
# for idx, item in enumerate(des_items):
# span = item.xpath(".//b/text()")
# logger.info(f"0: {span[0]}")
# key = description_dict[span[0].replace(":", "")]
# logger.debug(f"key:: {key}")
# try:
# print(item.xpath(".//text()")[1].strip())
# des[key] = item.xpath(".//text()")[1].strip()
# except IndexError:
# if item.xpath(".//a"):
# des[key] = item.xpath(".//a")[0]
# des[key] = ""
ser_description = " 작품 설명 부분 "
des = " "
des1 = " "
data = {
" title " : main_title ,
" image " : image ,
" date " : " 2022.01.11 00:30 (화) " ,
" ser_description " : ser_description ,
# "des": des,
" des1 " : des_items1 ,
" episode " : episodes ,
}
return data
except Exception as e :
P . logger . error ( " Exception: %s " , e )
P . logger . error ( traceback . format_exc ( ) )
return { " ret " : " exception " , " log " : str ( e ) }
@staticmethod
def get_real_link ( url ) :
response = requests . get ( url )
if response . history :
print ( " Request was redirected " )
for resp in response . history :
print ( resp . status_code , resp . url )
print ( " Final destination: " )
print ( response . status_code , response . url )
return response . url
else :
print ( " Request was not redirected " )
def get_anime_info ( self , cate , page ) :
logger . debug ( f " get_anime_info() routine " )
logger . debug ( f " cate:: { cate } " )
wrapper_xpath = ' //div[@class= " bsx " ] '
try :
if cate == " ing " :
url = P . ModelSetting . get ( " anilife_url " )
wrapper_xpath = (
' //div[contains(@class, " listupd " )]/*/*/div[@class= " bsx " ] '
)
elif cate == " theater " :
url = (
P . ModelSetting . get ( " anilife_url " )
+ " /vodtype/categorize/Movie/ "
+ page
)
wrapper_xpath = ' //div[@class= " bsx " ] '
else :
url = (
P . ModelSetting . get ( " anilife_url " )
+ " /vodtype/categorize/Movie/ "
+ page
)
# cate == "complete":
logger . info ( " url:::> %s " , url )
data = { }
response_data = LogicAniLife . get_html ( url , timeout = 10 )
print ( response_data )
logger . debug ( f " wrapper_xath:: { wrapper_xpath } " )
tree = html . fromstring ( response_data )
tmp_items = tree . xpath ( wrapper_xpath )
data [ " anime_count " ] = len ( tmp_items )
data [ " anime_list " ] = [ ]
for item in tmp_items :
entity = { }
entity [ " link " ] = item . xpath ( " .//a/@href " ) [ 0 ]
2022-10-23 22:44:58 +09:00
# logger.debug(entity["link"])
2022-08-20 23:12:58 +09:00
p = re . compile ( r " ^[http?s://]+[a-zA-Z0-9-]+/[a-zA-Z0-9-_.?=]+$ " )
print ( p . match ( entity [ " link " ] ) != None )
if p . match ( entity [ " link " ] ) is None :
entity [ " link " ] = P . ModelSetting . get ( " anilife_url " ) + entity [ " link " ]
# real_url = LogicAniLife.get_real_link(url=entity["link"])
2022-10-23 23:23:46 +09:00
# logger.debug(entity["link"])
2022-08-20 23:12:58 +09:00
entity [ " code " ] = entity [ " link " ] . split ( " / " ) [ - 1 ]
entity [ " title " ] = item . xpath ( " .//div[@class= ' tt ' ]/text() " ) [ 0 ] . strip ( )
entity [ " image_link " ] = item . xpath ( " .//div[@class= ' limit ' ]/img/@src " ) [
0
] . replace ( " .. " , P . ModelSetting . get ( " anilife_url " ) )
data [ " ret " ] = " success "
data [ " anime_list " ] . append ( entity )
return data
except Exception as e :
P . logger . error ( " Exception: %s " , e )
P . logger . error ( traceback . format_exc ( ) )
return { " ret " : " exception " , " log " : str ( e ) }
#########################################################
def add ( self , episode_info ) :
if self . is_exist ( episode_info ) :
return " queue_exist "
else :
db_entity = ModelAniLifeItem . get_by_anilife_id ( episode_info [ " _id " ] )
logger . debug ( f " db_entity():: => { db_entity } " )
2022-08-26 17:21:32 +09:00
if db_entity is None :
logger . debug ( f " episode_info:: { episode_info } " )
entity = AniLifeQueueEntity ( P , self , episode_info )
logger . debug ( " entity:::> %s " , entity . as_dict ( ) )
ModelAniLifeItem . append ( entity . as_dict ( ) )
self . queue . add_queue ( entity )
return " enqueue_db_append "
elif db_entity . status != " completed " :
entity = AniLifeQueueEntity ( P , self , episode_info )
self . queue . add_queue ( entity )
return " enqueue_db_exist "
else :
return " db_completed "
2022-08-20 23:12:58 +09:00
def is_exist ( self , info ) :
for e in self . queue . entity_list :
if e . info [ " _id " ] == info [ " _id " ] :
return True
return False
class AniLifeQueueEntity ( FfmpegQueueEntity ) :
2022-08-26 17:21:32 +09:00
def __init__ ( self , P , module_logic , info ) :
super ( AniLifeQueueEntity , self ) . __init__ ( P , module_logic , info )
self . _vi = None
self . url = None
self . epi_queue = None
self . filepath = None
self . savepath = None
self . quality = None
self . filename = None
self . vtt = None
self . season = 1
self . content_title = None
self . srt_url = None
self . headers = None
# Todo::: 임시 주석 처리
self . make_episode_info ( )
def refresh_status ( self ) :
self . module_logic . socketio_callback ( " status " , self . as_dict ( ) )
def info_dict ( self , tmp ) :
2022-10-24 01:42:47 +09:00
# logger.debug("self.info::> %s", self.info)
2022-08-26 17:21:32 +09:00
for key , value in self . info . items ( ) :
tmp [ key ] = value
tmp [ " vtt " ] = self . vtt
tmp [ " season " ] = self . season
tmp [ " content_title " ] = self . content_title
tmp [ " anilife_info " ] = self . info
tmp [ " epi_queue " ] = self . epi_queue
return tmp
def donwload_completed ( self ) :
db_entity = ModelAniLifeItem . get_by_anilife_id ( self . info [ " _id " ] )
if db_entity is not None :
db_entity . status = " completed "
db_entity . complated_time = datetime . now ( )
db_entity . save ( )
def make_episode_info ( self ) :
logger . debug ( " make_episode_info() routine ========== " )
try :
# 다운로드 추가
base_url = " https://anilife.live "
iframe_url = " "
url = self . info [ " va " ]
logger . debug ( f " url:: { url } " )
ourls = parse . urlparse ( url )
2022-10-23 21:22:33 +09:00
self . headers = {
2022-08-26 17:21:32 +09:00
" Referer " : f " { ourls . scheme } :// { ourls . netloc } " ,
" User-Agent " : " Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Whale/3.12.129.46 Safari/537.36 " ,
}
2022-10-23 21:22:33 +09:00
2022-08-26 17:21:32 +09:00
headers [ " Referer " ] = " https://anilife.live/detail/id/471 "
2022-09-12 15:22:03 +09:00
headers [ " Referer " ] = LogicAniLife . episode_url
2022-08-26 17:21:32 +09:00
logger . debug ( " make_episode_info()::url==> %s " , url )
logger . info ( f " self.info:::> { self . info } " )
referer = " https://anilife.live/g/l?id=13fd4d28-ff18-4764-9968-7e7ea7347c51 "
2022-09-12 15:22:03 +09:00
referer = LogicAniLife . episode_url
2022-08-26 17:21:32 +09:00
# text = requests.get(url, headers=headers).text
# text = LogicAniLife.get_html_seleniumwire(url, referer=referer, wired=True)
# https://anilife.live/ani/provider/10f60832-20d1-4918-be62-0f508bf5460c
referer_url = (
" https://anilife.live/g/l?id=d4be1e0e-301b-403b-be1b-cf19f3ccfd23 "
)
2022-09-12 15:22:03 +09:00
referer_url = LogicAniLife . episode_url
logger . debug ( f " LogicAniLife.episode_url:: { LogicAniLife . episode_url } " )
2022-08-26 17:21:32 +09:00
text = asyncio . run (
LogicAniLife . get_html_playwright (
url ,
2022-10-23 23:01:19 +09:00
headless = True ,
2022-08-26 17:21:32 +09:00
referer = referer_url ,
engine = " chrome " ,
stealth = True ,
)
)
2022-10-23 21:22:33 +09:00
# vod_1080p_url = text
2022-08-26 17:21:32 +09:00
# logger.debug(text)
soup = BeautifulSoup ( text , " lxml " )
all_scripts = soup . find_all ( " script " )
# print(all_scripts)
regex = r " (?P<jawcloud_url>http?s: \ / \ /.*=jawcloud) "
match = re . compile ( regex ) . search ( text )
2022-09-12 15:22:03 +09:00
jawcloud_url = None
2022-08-26 17:21:32 +09:00
print ( match )
if match :
jawcloud_url = match . group ( " jawcloud_url " )
2022-10-23 21:22:33 +09:00
logger . debug ( f " jawcloud_url:: { jawcloud_url } " )
# loop = asyncio.new_event_loop()
# asyncio.set_event_loop(loop)
#
logger . info ( self . info )
match = re . compile (
r " (?P<title>.*?) \ s*((?P<season> \ d+) %s )? \ s*((?P<epi_no> \ d+) %s ) "
% ( " 기 " , " 화 " )
) . search ( self . info [ " title " ] )
2022-08-26 17:21:32 +09:00
2022-10-23 21:22:33 +09:00
# epi_no 초기값
epi_no = 1
self . quality = " 1080P "
if match :
self . content_title = match . group ( " title " ) . strip ( )
if " season " in match . groupdict ( ) and match . group ( " season " ) is not None :
self . season = int ( match . group ( " season " ) )
# epi_no = 1
epi_no = int ( match . group ( " epi_no " ) )
ret = " %s .S %s E %s . %s -AL.mp4 " % (
self . content_title ,
" 0 %s " % self . season if self . season < 10 else self . season ,
" 0 %s " % epi_no if epi_no < 10 else epi_no ,
self . quality ,
)
else :
self . content_title = self . info [ " title " ]
P . logger . debug ( " NOT MATCH " )
ret = " %s .720p-AL.mp4 " % self . info [ " title " ]
# logger.info('self.content_title:: %s', self.content_title)
self . epi_queue = epi_no
self . filename = Util . change_text_for_use_filename ( ret )
logger . info ( f " self.filename::> { self . filename } " )
self . savepath = P . ModelSetting . get ( " ohli24_download_path " )
logger . info ( f " self.savepath::> { self . savepath } " )
if P . ModelSetting . get_bool ( " ohli24_auto_make_folder " ) :
if self . info [ " day " ] . find ( " 완결 " ) != - 1 :
folder_name = " %s %s " % (
P . ModelSetting . get ( " ohli24_finished_insert " ) ,
self . content_title ,
)
else :
folder_name = self . content_title
folder_name = Util . change_text_for_use_filename ( folder_name . strip ( ) )
self . savepath = os . path . join ( self . savepath , folder_name )
if P . ModelSetting . get_bool ( " ohli24_auto_make_season_folder " ) :
self . savepath = os . path . join (
self . savepath , " Season %s " % int ( self . season )
)
self . filepath = os . path . join ( self . savepath , self . filename )
if not os . path . exists ( self . savepath ) :
os . makedirs ( self . savepath )
vod_1080p_url = asyncio . run (
2022-10-23 22:51:55 +09:00
LogicAniLife . get_vod_url ( jawcloud_url , headless = True )
2022-10-23 21:22:33 +09:00
)
2022-09-12 15:22:03 +09:00
print ( f " vod_1080p_url:: { vod_1080p_url } " )
2022-10-23 21:22:33 +09:00
self . url = vod_1080p_url
logger . info ( self . url )
2022-08-26 17:21:32 +09:00
except Exception as e :
P . logger . error ( " Exception: %s " , e )
P . logger . error ( traceback . format_exc ( ) )
2022-08-20 23:12:58 +09:00
class ModelAniLifeItem ( db . Model ) :
__tablename__ = " {package_name} _anilife_item " . format ( package_name = P . package_name )
__table_args__ = { " mysql_collate " : " utf8_general_ci " }
__bind_key__ = P . package_name
id = db . Column ( db . Integer , primary_key = True )
created_time = db . Column ( db . DateTime )
completed_time = db . Column ( db . DateTime )
reserved = db . Column ( db . JSON )
content_code = db . Column ( db . String )
season = db . Column ( db . Integer )
episode_no = db . Column ( db . Integer )
title = db . Column ( db . String )
episode_title = db . Column ( db . String )
anilife_va = db . Column ( db . String )
anilife_vi = db . Column ( db . String )
anilife_id = db . Column ( db . String )
quality = db . Column ( db . String )
filepath = db . Column ( db . String )
filename = db . Column ( db . String )
savepath = db . Column ( db . String )
video_url = db . Column ( db . String )
vtt_url = db . Column ( db . String )
thumbnail = db . Column ( db . String )
status = db . Column ( db . String )
anilife_info = db . Column ( db . JSON )
def __init__ ( self ) :
self . created_time = datetime . now ( )
def __repr__ ( self ) :
return repr ( self . as_dict ( ) )
def as_dict ( self ) :
ret = { x . name : getattr ( self , x . name ) for x in self . __table__ . columns }
ret [ " created_time " ] = self . created_time . strftime ( " % Y- % m- %d % H: % M: % S " )
ret [ " completed_time " ] = (
self . completed_time . strftime ( " % Y- % m- %d % H: % M: % S " )
if self . completed_time is not None
else None
)
return ret
@classmethod
def get_by_id ( cls , idx ) :
return db . session . query ( cls ) . filter_by ( id = idx ) . first ( )
@classmethod
def get_by_anilife_id ( cls , anilife_id ) :
return db . session . query ( cls ) . filter_by ( anilife_id = anilife_id ) . first ( )
def save ( self ) :
db . session . add ( self )
db . session . commit ( )
2022-08-26 17:21:32 +09:00
2022-10-23 23:36:05 +09:00
@classmethod
def web_list ( cls , req ) :
ret = { }
page = int ( req . form [ " page " ] ) if " page " in req . form else 1
page_size = 30
job_id = " "
search = req . form [ " search_word " ] if " search_word " in req . form else " "
option = req . form [ " option " ] if " option " in req . form else " all "
order = req . form [ " order " ] if " order " in req . form else " desc "
query = cls . make_query ( search = search , order = order , option = option )
count = query . count ( )
query = query . limit ( page_size ) . offset ( ( page - 1 ) * page_size )
lists = query . all ( )
ret [ " list " ] = [ item . as_dict ( ) for item in lists ]
ret [ " paging " ] = Util . get_paging_info ( count , page , page_size )
return ret
2022-08-26 17:21:32 +09:00
@classmethod
def make_query ( cls , search = " " , order = " desc " , option = " all " ) :
query = db . session . query ( cls )
if search is not None and search != " " :
if search . find ( " | " ) != - 1 :
tmp = search . split ( " | " )
conditions = [ ]
for tt in tmp :
if tt != " " :
conditions . append ( cls . filename . like ( " % " + tt . strip ( ) + " % " ) )
query = query . filter ( or_ ( * conditions ) )
elif search . find ( " , " ) != - 1 :
tmp = search . split ( " , " )
for tt in tmp :
if tt != " " :
query = query . filter ( cls . filename . like ( " % " + tt . strip ( ) + " % " ) )
else :
query = query . filter ( cls . filename . like ( " % " + search + " % " ) )
if option == " completed " :
query = query . filter ( cls . status == " completed " )
query = (
query . order_by ( desc ( cls . id ) ) if order == " desc " else query . order_by ( cls . id )
)
return query
@classmethod
def append ( cls , q ) :
item = ModelAniLifeItem ( )
item . content_code = q [ " content_code " ]
item . season = q [ " season " ]
item . episode_no = q [ " epi_queue " ]
item . title = q [ " content_title " ]
item . episode_title = q [ " title " ]
item . ohli24_va = q [ " va " ]
item . ohli24_vi = q [ " _vi " ]
item . ohli24_id = q [ " _id " ]
item . quality = q [ " quality " ]
item . filepath = q [ " filepath " ]
item . filename = q [ " filename " ]
item . savepath = q [ " savepath " ]
item . video_url = q [ " url " ]
item . vtt_url = q [ " vtt " ]
item . thumbnail = q [ " thumbnail " ]
item . status = " wait "
item . ohli24_info = q [ " anilife_info " ]
item . save ( )