linkkf 로직수정중
This commit is contained in:
@@ -1,3 +1,6 @@
|
||||
import re
|
||||
import traceback
|
||||
|
||||
from . import logger
|
||||
|
||||
|
||||
@@ -5,22 +8,78 @@ class SupportString(object):
|
||||
@classmethod
|
||||
def get_cate_char_by_first(cls, title): # get_first
|
||||
value = ord(title[0].upper())
|
||||
if value >= ord('0') and value <= ord('9'): return '0Z'
|
||||
elif value >= ord('A') and value <= ord('Z'): return '0Z'
|
||||
elif value >= ord('가') and value < ord('나'): return '가'
|
||||
elif value < ord('다'): return '나'
|
||||
elif value < ord('라'): return '다'
|
||||
elif value < ord('마'): return '라'
|
||||
elif value < ord('바'): return '마'
|
||||
elif value < ord('사'): return '바'
|
||||
elif value < ord('아'): return '사'
|
||||
elif value < ord('자'): return '아'
|
||||
elif value < ord('차'): return '자'
|
||||
elif value < ord('카'): return '차'
|
||||
elif value < ord('타'): return '카'
|
||||
elif value < ord('파'): return '타'
|
||||
elif value < ord('하'): return '파'
|
||||
elif value <= ord('힣'): return '하'
|
||||
else: return '0Z'
|
||||
if ord('가') <= value < ord('나'): return '가'
|
||||
if ord('나') <= value < ord('다'): return '나'
|
||||
if ord('다') <= value < ord('라'): return '다'
|
||||
if ord('라') <= value < ord('마'): return '라'
|
||||
if ord('마') <= value < ord('바'): return '마'
|
||||
if ord('바') <= value < ord('사'): return '바'
|
||||
if ord('사') <= value < ord('아'): return '사'
|
||||
if ord('아') <= value < ord('자'): return '아'
|
||||
if ord('자') <= value < ord('차'): return '자'
|
||||
if ord('차') <= value < ord('카'): return '차'
|
||||
if ord('카') <= value < ord('타'): return '카'
|
||||
if ord('타') <= value < ord('파'): return '타'
|
||||
if ord('파') <= value < ord('하'): return '파'
|
||||
if ord('하') <= value < ord('힣'): return '하'
|
||||
return '0Z'
|
||||
|
||||
|
||||
@classmethod
|
||||
def is_include_hangul(cls, text):
|
||||
try:
|
||||
hanCount = len(re.findall(u'[\u3130-\u318F\uAC00-\uD7A3]+', text))
|
||||
return hanCount > 0
|
||||
except:
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def language_info(cls, text):
|
||||
try:
|
||||
text = text.strip().replace(' ', '')
|
||||
all_count = len(text)
|
||||
han_count = len(re.findall('[\u3130-\u318F\uAC00-\uD7A3]', text))
|
||||
eng_count = len(re.findall('[a-zA-Z]', text))
|
||||
etc_count = len(re.findall('[0-9]', text))
|
||||
etc_count += len(re.findall('[-=+,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》:]', text))
|
||||
if all_count == etc_count:
|
||||
return (0,0)
|
||||
han_percent = int(han_count * 100 / (all_count-etc_count))
|
||||
eng_percent = int(eng_count * 100 / (all_count-etc_count))
|
||||
return (han_percent, eng_percent)
|
||||
except Exception as e:
|
||||
logger.error(f"Exception:{str(e)}")
|
||||
logger.error(traceback.format_exc())
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def remove_special_char(cls, text):
|
||||
return re.sub('[-=+,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》:]', '', text)
|
||||
|
||||
|
||||
@classmethod
|
||||
def remove_emoji(cls, text, char=''):
|
||||
import re
|
||||
emoji_pattern = re.compile("["
|
||||
u"\U0001F600-\U0001F64F" # emoticons
|
||||
u"\U0001F300-\U0001F5FF" # symbols & pictographs
|
||||
u"\U0001F680-\U0001F6FF" # transport & map symbols
|
||||
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
|
||||
u"\U00002500-\U00002BEF" # chinese char
|
||||
u"\U00002702-\U000027B0"
|
||||
u"\U00002702-\U000027B0"
|
||||
#u"\U000024C2-\U0001F251"
|
||||
u"\U0001f926-\U0001f937"
|
||||
u"\U00010000-\U0010ffff"
|
||||
u"\u2640-\u2642"
|
||||
u"\u2600-\u2B55"
|
||||
u"\u200d"
|
||||
u"\u23cf"
|
||||
u"\u23e9"
|
||||
u"\u231a"
|
||||
u"\ufe0f" # dingbats
|
||||
u"\u3030"
|
||||
"]+", flags=re.UNICODE)
|
||||
# Remove emojis from the text
|
||||
text = emoji_pattern.sub(char, text)
|
||||
return text
|
||||
Reference in New Issue
Block a user