# -*- coding: utf-8 -*-
# 여러종류의 파일 encoding 이 존재하는 폴더를 검색 할때는 encoding별로 검색해야 제대로 검색된다.
import pandas as pd
import os
import hanja
from hanja import hangul
from datetime import datetime
file_encodings = ["cp949","utf8"]
datetime = datetime.now().strftime("%Y%m%d%H%M00")
target_texts = [] #검색할 문자열이나 배열
except_files = [] #제외할 파일명
def search_text(file_path,encoding_word):
# print(file_path,file=save_file)
print_file_path = ""
target_file = open(file_path, mode="r", encoding=encoding_word)
is_exists = False
exists_array = []
try:
lines = target_file.readlines()
for index, line in enumerate(lines):
for taret_text in target_texts:
if taret_text in line:
is_exists = True
exists_array.append("{0} - {1}".format((index+1), line.lstrip()))
else:
is_exists = False
if len(exists_array) > 0:
print(file_path,file=save_file)
for exists_item in exists_array:
print("{0}".format(exists_item),file=save_file)
target_file.close()
except UnicodeDecodeError as ude:
target_file = open(file_path, mode="r", encoding="utf8")
finally:
# print("exception : {0}".format(target_file),file=save_file)
pass
#개별 폴더
root_dir = r"{{ROOT_DIR}}"
save_file_path = r"{{SAVE_FILE_FOLDER}}\find_text_result-{0}.txt".format(datetime)
with open(save_file_path,"w+",encoding="utf8") as save_file:
for encoding_word in file_encodings:
print("encoding : {0} \n".format(encoding_word), file=save_file)
for (root, dirs, files) in os.walk(root_dir):
if len(files) > 0:
for file_name in files:
filename, fileExtension = os.path.splitext(file_name)
if fileExtension in (".java",".jsp",".js",".html") and file_name not in except_files:
trans_file_path = "{0}\{1}".format(root,file_name)
search_text(trans_file_path, encoding_word) #검색 함수 실행
save_file.close()
python 3.6 에서 테스트 됐습니다.