인프런 파일명 변경 테스트 for loop 병렬 처리
This commit is contained in:
@@ -18,9 +18,10 @@ import asyncio
|
||||
from urllib.parse import urlparse
|
||||
import json
|
||||
|
||||
|
||||
import aiohttp
|
||||
|
||||
packages = ["beautifulsoup4"]
|
||||
packages = ["beautifulsoup4", "joblib"]
|
||||
for package in packages:
|
||||
try:
|
||||
import package
|
||||
@@ -31,6 +32,7 @@ for package in packages:
|
||||
|
||||
# third-party
|
||||
import requests
|
||||
from joblib import Parallel, delayed
|
||||
from lxml import html, etree
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib import parse
|
||||
@@ -847,74 +849,20 @@ class LogicInflearn(object):
|
||||
#
|
||||
# logger.debug(f"ret_data():: ret_data=> {ret_data}")
|
||||
|
||||
for idx, item in enumerate(items):
|
||||
#
|
||||
temp1 = {}
|
||||
print("idx::", idx)
|
||||
data_id = item["data-id"]
|
||||
|
||||
run_time = ""
|
||||
title = item.find("div", attrs={"class": "title"}).get_text()
|
||||
if item.find("span", {"class": "runtime"}) is not None:
|
||||
run_time = item.find("span", {"class": "runtime"}).get_text()
|
||||
api_url = f"{base_url}/api/course/{code}/lecture/{data_id}"
|
||||
|
||||
temp1["season"] = "1"
|
||||
LogicInflearn.season = "1"
|
||||
# logger.debug(api_url)
|
||||
m3u8_info = LogicInflearn.getM3u8_info(
|
||||
api_url, LogicInflearn.season, idx, main_title
|
||||
temp1 = Parallel(n_jobs=4, prefer="threads")(
|
||||
delayed(LogicInflearn.parsing_info)(
|
||||
item, idx, base_url, code, main_title, data
|
||||
)
|
||||
# print(api_url)
|
||||
# print('type::::', type(m3u8_url))
|
||||
logger.debug(m3u8_info)
|
||||
# ws.append(
|
||||
# [
|
||||
# title,
|
||||
# data_id,
|
||||
# run_time,
|
||||
# api_url,
|
||||
# m3u8_info["name"],
|
||||
# m3u8_info["hlsUrl"],
|
||||
# ]
|
||||
# )
|
||||
for idx, item in enumerate(items)
|
||||
)
|
||||
|
||||
# temp.append(title, data_id, run_time, api_url,m3u8_info['name'], m3u8_info['hlsUrl'])
|
||||
# temp1['title'] = title
|
||||
temp1["save_folder"] = Util.change_text_for_use_filename(
|
||||
data["save_folder"]
|
||||
)
|
||||
|
||||
# logger.debug(temp1["save_folder"])
|
||||
|
||||
tmp_save_path = ModelSetting.get("download_path")
|
||||
if ModelSetting.get("auto_make_folder") == "True":
|
||||
program_path = os.path.join(tmp_save_path, temp1["save_folder"])
|
||||
temp1["save_path"] = program_path
|
||||
if ModelSetting.get("inflearn_auto_make_season_folder"):
|
||||
temp1["save_path"] = os.path.join(
|
||||
temp1["save_path"], "Season %s" % int(temp1["season"])
|
||||
)
|
||||
|
||||
temp1["title"] = title
|
||||
temp1["data_id"] = data_id
|
||||
temp1["item_id"] = m3u8_info["data_id"]
|
||||
temp1["code"] = temp1["item_id"]
|
||||
temp1["run_time"] = run_time
|
||||
temp1["api_url"] = api_url
|
||||
temp1["name"] = m3u8_info["name"]
|
||||
temp1["filename"] = m3u8_info["filename"]
|
||||
# logger.debug(temp1["name"])
|
||||
# logger.debug(temp1["filename"])
|
||||
temp1["url"] = m3u8_info["hlsUrl"]
|
||||
# temp1["url"] = m3u8_info["hlsUrl"]
|
||||
temp1["size"] = m3u8_info["size"]
|
||||
temp.append(temp1)
|
||||
logger.debug(temp1)
|
||||
|
||||
temp.append(temp1)
|
||||
# print(temp)
|
||||
# logger.info('data', data)
|
||||
# LogicInflearn.current_data = temp
|
||||
data["episode"] = temp
|
||||
data["episode"] = temp1
|
||||
LogicInflearn.current_data = data
|
||||
# logger.debug(data)
|
||||
|
||||
@@ -982,6 +930,62 @@ class LogicInflearn(object):
|
||||
"filename": filename,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def parsing_info(item, idx, base_url, code, main_title, data):
|
||||
|
||||
#
|
||||
temp1 = {}
|
||||
# print("idx::", idx)
|
||||
data_id = item["data-id"]
|
||||
|
||||
run_time = ""
|
||||
title = item.find("div", attrs={"class": "title"}).get_text()
|
||||
if item.find("span", {"class": "runtime"}) is not None:
|
||||
run_time = item.find("span", {"class": "runtime"}).get_text()
|
||||
|
||||
api_url = f"{base_url}/api/course/{code}/lecture/{data_id}"
|
||||
|
||||
temp1["season"] = "1"
|
||||
LogicInflearn.season = "1"
|
||||
# logger.debug(api_url)
|
||||
m3u8_info = LogicInflearn.getM3u8_info(
|
||||
api_url, LogicInflearn.season, idx, main_title
|
||||
)
|
||||
# print(api_url)
|
||||
# print('type::::', type(m3u8_url))
|
||||
logger.debug(m3u8_info)
|
||||
|
||||
# temp1['title'] = title
|
||||
temp1["save_folder"] = Util.change_text_for_use_filename(data["save_folder"])
|
||||
|
||||
# logger.debug(temp1["save_folder"])
|
||||
|
||||
tmp_save_path = ModelSetting.get("download_path")
|
||||
if ModelSetting.get("auto_make_folder") == "True":
|
||||
program_path = os.path.join(tmp_save_path, temp1["save_folder"])
|
||||
temp1["save_path"] = program_path
|
||||
if ModelSetting.get("inflearn_auto_make_season_folder"):
|
||||
temp1["save_path"] = os.path.join(
|
||||
temp1["save_path"], "Season %s" % int(temp1["season"])
|
||||
)
|
||||
|
||||
temp1["title"] = title
|
||||
temp1["data_id"] = data_id
|
||||
temp1["item_id"] = m3u8_info["data_id"]
|
||||
temp1["code"] = temp1["item_id"]
|
||||
temp1["run_time"] = run_time
|
||||
temp1["api_url"] = api_url
|
||||
temp1["name"] = m3u8_info["name"]
|
||||
temp1["filename"] = m3u8_info["filename"]
|
||||
# logger.debug(temp1["name"])
|
||||
# logger.debug(temp1["filename"])
|
||||
temp1["url"] = m3u8_info["hlsUrl"]
|
||||
# temp1["url"] = m3u8_info["hlsUrl"]
|
||||
temp1["size"] = m3u8_info["size"]
|
||||
|
||||
# temp.append(temp1)
|
||||
return temp1
|
||||
|
||||
@staticmethod
|
||||
def getHtml(url, header):
|
||||
o = parse.urlparse(url)
|
||||
|
||||
Reference in New Issue
Block a user