인프런 파일명 변경 테스트 for loop 병렬 처리

This commit is contained in:
2022-05-16 14:25:43 +09:00
parent 492611a15f
commit bc191caff1

View File

@@ -18,9 +18,10 @@ import asyncio
from urllib.parse import urlparse
import json
import aiohttp
packages = ["beautifulsoup4"]
packages = ["beautifulsoup4", "joblib"]
for package in packages:
try:
import package
@@ -31,6 +32,7 @@ for package in packages:
# third-party
import requests
from joblib import Parallel, delayed
from lxml import html, etree
from bs4 import BeautifulSoup
from urllib import parse
@@ -847,74 +849,20 @@ class LogicInflearn(object):
#
# logger.debug(f"ret_data():: ret_data=> {ret_data}")
for idx, item in enumerate(items):
#
temp1 = {}
print("idx::", idx)
data_id = item["data-id"]
run_time = ""
title = item.find("div", attrs={"class": "title"}).get_text()
if item.find("span", {"class": "runtime"}) is not None:
run_time = item.find("span", {"class": "runtime"}).get_text()
api_url = f"{base_url}/api/course/{code}/lecture/{data_id}"
temp1["season"] = "1"
LogicInflearn.season = "1"
# logger.debug(api_url)
m3u8_info = LogicInflearn.getM3u8_info(
api_url, LogicInflearn.season, idx, main_title
temp1 = Parallel(n_jobs=4, prefer="threads")(
delayed(LogicInflearn.parsing_info)(
item, idx, base_url, code, main_title, data
)
# print(api_url)
# print('type::::', type(m3u8_url))
logger.debug(m3u8_info)
# ws.append(
# [
# title,
# data_id,
# run_time,
# api_url,
# m3u8_info["name"],
# m3u8_info["hlsUrl"],
# ]
# )
# temp.append(title, data_id, run_time, api_url,m3u8_info['name'], m3u8_info['hlsUrl'])
# temp1['title'] = title
temp1["save_folder"] = Util.change_text_for_use_filename(
data["save_folder"]
for idx, item in enumerate(items)
)
# logger.debug(temp1["save_folder"])
logger.debug(temp1)
tmp_save_path = ModelSetting.get("download_path")
if ModelSetting.get("auto_make_folder") == "True":
program_path = os.path.join(tmp_save_path, temp1["save_folder"])
temp1["save_path"] = program_path
if ModelSetting.get("inflearn_auto_make_season_folder"):
temp1["save_path"] = os.path.join(
temp1["save_path"], "Season %s" % int(temp1["season"])
)
temp1["title"] = title
temp1["data_id"] = data_id
temp1["item_id"] = m3u8_info["data_id"]
temp1["code"] = temp1["item_id"]
temp1["run_time"] = run_time
temp1["api_url"] = api_url
temp1["name"] = m3u8_info["name"]
temp1["filename"] = m3u8_info["filename"]
# logger.debug(temp1["name"])
# logger.debug(temp1["filename"])
temp1["url"] = m3u8_info["hlsUrl"]
# temp1["url"] = m3u8_info["hlsUrl"]
temp1["size"] = m3u8_info["size"]
temp.append(temp1)
# print(temp)
# logger.info('data', data)
# LogicInflearn.current_data = temp
data["episode"] = temp
data["episode"] = temp1
LogicInflearn.current_data = data
# logger.debug(data)
@@ -982,6 +930,62 @@ class LogicInflearn(object):
"filename": filename,
}
@staticmethod
def parsing_info(item, idx, base_url, code, main_title, data):
#
temp1 = {}
# print("idx::", idx)
data_id = item["data-id"]
run_time = ""
title = item.find("div", attrs={"class": "title"}).get_text()
if item.find("span", {"class": "runtime"}) is not None:
run_time = item.find("span", {"class": "runtime"}).get_text()
api_url = f"{base_url}/api/course/{code}/lecture/{data_id}"
temp1["season"] = "1"
LogicInflearn.season = "1"
# logger.debug(api_url)
m3u8_info = LogicInflearn.getM3u8_info(
api_url, LogicInflearn.season, idx, main_title
)
# print(api_url)
# print('type::::', type(m3u8_url))
logger.debug(m3u8_info)
# temp1['title'] = title
temp1["save_folder"] = Util.change_text_for_use_filename(data["save_folder"])
# logger.debug(temp1["save_folder"])
tmp_save_path = ModelSetting.get("download_path")
if ModelSetting.get("auto_make_folder") == "True":
program_path = os.path.join(tmp_save_path, temp1["save_folder"])
temp1["save_path"] = program_path
if ModelSetting.get("inflearn_auto_make_season_folder"):
temp1["save_path"] = os.path.join(
temp1["save_path"], "Season %s" % int(temp1["season"])
)
temp1["title"] = title
temp1["data_id"] = data_id
temp1["item_id"] = m3u8_info["data_id"]
temp1["code"] = temp1["item_id"]
temp1["run_time"] = run_time
temp1["api_url"] = api_url
temp1["name"] = m3u8_info["name"]
temp1["filename"] = m3u8_info["filename"]
# logger.debug(temp1["name"])
# logger.debug(temp1["filename"])
temp1["url"] = m3u8_info["hlsUrl"]
# temp1["url"] = m3u8_info["hlsUrl"]
temp1["size"] = m3u8_info["size"]
# temp.append(temp1)
return temp1
@staticmethod
def getHtml(url, header):
o = parse.urlparse(url)