Files

85 lines
3.5 KiB
Python
Raw Permalink Normal View History

2025-12-25 19:42:32 +09:00
import re
import traceback
2022-10-02 20:18:05 +09:00
from . import logger
2022-10-07 01:48:42 +09:00
class SupportString(object):
2022-10-02 20:18:05 +09:00
@classmethod
def get_cate_char_by_first(cls, title): # get_first
value = ord(title[0].upper())
2025-12-25 19:42:32 +09:00
if ord('') <= value < ord(''): return ''
if ord('') <= value < ord(''): return ''
if ord('') <= value < ord(''): return ''
if ord('') <= value < ord(''): return ''
if ord('') <= value < ord(''): return ''
if ord('') <= value < ord(''): return ''
if ord('') <= value < ord(''): return ''
if ord('') <= value < ord(''): return ''
if ord('') <= value < ord(''): return ''
if ord('') <= value < ord(''): return ''
if ord('') <= value < ord(''): return ''
if ord('') <= value < ord(''): return ''
if ord('') <= value < ord(''): return ''
if ord('') <= value < ord(''): return ''
return '0Z'
2022-10-02 20:18:05 +09:00
2025-12-25 19:42:32 +09:00
@classmethod
def is_include_hangul(cls, text):
try:
hanCount = len(re.findall(u'[\u3130-\u318F\uAC00-\uD7A3]+', text))
return hanCount > 0
except:
return False
@classmethod
def language_info(cls, text):
try:
text = text.strip().replace(' ', '')
all_count = len(text)
han_count = len(re.findall('[\u3130-\u318F\uAC00-\uD7A3]', text))
eng_count = len(re.findall('[a-zA-Z]', text))
etc_count = len(re.findall('[0-9]', text))
etc_count += len(re.findall('[-=+,#/\?:^$.@*\"※~&%ㆍ!』\\|\(\)\[\]\<\>`\'…》:]', text))
if all_count == etc_count:
return (0,0)
han_percent = int(han_count * 100 / (all_count-etc_count))
eng_percent = int(eng_count * 100 / (all_count-etc_count))
return (han_percent, eng_percent)
except Exception as e:
logger.error(f"Exception:{str(e)}")
logger.error(traceback.format_exc())
return False
@classmethod
def remove_special_char(cls, text):
return re.sub('[-=+,#/\?:^$.@*\"※~&%ㆍ!』\\|\(\)\[\]\<\>`\'…》:]', '', text)
@classmethod
def remove_emoji(cls, text, char=''):
import re
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
u"\U00002500-\U00002BEF" # chinese char
u"\U00002702-\U000027B0"
u"\U00002702-\U000027B0"
#u"\U000024C2-\U0001F251"
u"\U0001f926-\U0001f937"
u"\U00010000-\U0010ffff"
u"\u2640-\u2642"
u"\u2600-\u2B55"
u"\u200d"
u"\u23cf"
u"\u23e9"
u"\u231a"
u"\ufe0f" # dingbats
u"\u3030"
"]+", flags=re.UNICODE)
# Remove emojis from the text
text = emoji_pattern.sub(char, text)
return text