https://github.com/suminb/hanja
python 3.x에서 설치하기
1. Download Zip 다운로드 후 압축 풀기
2. 관리자 권한으로 명령프롬프트 실행
3. setup.py 가 있는 폴더로 이동
- cd C:\source\test\pylib\hanja-develop\hanja-develop
pip install . 실행
의존성 오류가 발생하면 패키지 설치하기
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
# -*- coding: utf-8 -*-
import pandas as pd
import os
import hanja
from hanja import hangul
from datetime import datetime
datetime = datetime.now().strftime("%Y%m%d%H%M00")
def hanja_hangul_trans(file_path):
# print(file_path)
print_file_path = ""
is_hanja = False
target_file = open(file_path, mode="r", encoding="utf8")
try:
lines = target_file.readlines()
trans_item_array = []
for line in lines:
text_items = [x for x in hanja.split_hanja(line)]
for text_item in text_items:
items = text_item.strip()
is_hanja_flag = False
for item in items:
is_hanja_flag = hanja.is_hanja(str(item))
if is_hanja == False:
break
if is_hanja_flag:
if print_file_path == "":
print_file_path = file_path
print("{0}".format(file_path),file=save_file)
trans_item_text = "{0},{1}".format(items, hanja.translate(items, 'substitution'))
if trans_item_text not in trans_item_array:
trans_item_array.append(trans_item_text)
for trans_item in trans_item_array:
print("{0}".format(trans_item),file=save_file)
target_file.close()
except UnicodeDecodeError as ude:
target_file = open(file_path, mode="r", encoding="cp949") #encoding 오류가 발생하면 처리
finally:
pass
#개별 폴더
root_dir = r"{{ROOT_FOLDER}}"
save_file_path = r"{{저장파일}}-{0}.txt".format(datetime)
with open(save_file_path,"w+",encoding="utf8") as save_file:
for (root, dirs, files) in os.walk(root_dir):
if len(files) > 0:
for file_name in files:
filename, fileExtension = os.path.splitext(file_name)
if fileExtension in (".java",".jsp"):
trans_file_path = "{0}\{1}".format(root,file_name)
hanja_hangul_trans(trans_file_path)
save_file.close()
|
cs |