본문 바로가기

Web_Application/python

대상 폴더안의 파일에서 문자열 검색



# -*- coding: utf-8 -*-

# 여러종류의 파일 encoding 이 존재하는 폴더를 검색 할때는 encoding별로 검색해야 제대로 검색된다.

import pandas as pd
import os
import hanja
from hanja import hangul
from datetime import datetime

file_encodings = ["cp949","utf8"]
datetime = datetime.now().strftime("%Y%m%d%H%M00")

target_texts = [] #검색할 문자열이나 배열
except_files = [] #제외할 파일명

def search_text(file_path,encoding_word):
    # print(file_path,file=save_file)

    print_file_path = ""

    target_file = open(file_path, mode="r", encoding=encoding_word)

    is_exists = False
    exists_array = []

    try:
        lines = target_file.readlines()

        for index, line in enumerate(lines):            
            for taret_text in target_texts:
                if taret_text in line:
                    is_exists = True
                    exists_array.append("{0} - {1}".format((index+1), line.lstrip()))
                else:
                    is_exists = False
                    
        if len(exists_array) > 0:
            print(file_path,file=save_file)
            for exists_item in exists_array:
                print("{0}".format(exists_item),file=save_file)

        target_file.close()
    except UnicodeDecodeError as ude:
        target_file = open(file_path, mode="r", encoding="utf8")
    finally:
        # print("exception : {0}".format(target_file),file=save_file)    
        pass

#개별 폴더
root_dir = r"{{ROOT_DIR}}"

save_file_path = r"{{SAVE_FILE_FOLDER}}\find_text_result-{0}.txt".format(datetime)

with open(save_file_path,"w+",encoding="utf8") as save_file:

    for encoding_word in file_encodings:
        print("encoding : {0} \n".format(encoding_word), file=save_file)

        for (root, dirs, files) in os.walk(root_dir):
            if len(files) > 0:
                for file_name in files:
                    filename, fileExtension = os.path.splitext(file_name)

                    if fileExtension in (".java",".jsp",".js",".html") and file_name not in except_files:
                        trans_file_path = "{0}\{1}".format(root,file_name)

                        search_text(trans_file_path, encoding_word)            #검색 함수 실행
save_file.close()       

python 3.6 에서 테스트 됐습니다.