#删除中文和中文符号 import os, sys import re soupath = "../src" os.chdir(os.path.abspath(os.path.dirname(sys.argv[0]))) #中文汉字 ChinesePattern = re.compile(u'[\u4e00-\u9FA0]+') #中文符号 ChineseCharPattern = re.compile(u'[\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b]+') def print_files(soupath): lsdir = os.listdir(soupath) dirs = [i for i in lsdir if os.path.isdir(os.path.join(soupath, i))] if dirs: for i in dirs: print_files(soupath + "/" + i) files = [i for i in lsdir if os.path.isfile(os.path.join(soupath, i))] for f in files: if f.endswith(".cpp"): soufilename = soupath + "/" + f tmpfile = f.split(".", 1)[0] + "_tmp.cpp" os.rename(soupath + "/" + f, soupath + "/" + tmpfile) fo2 = open(soufilename, "w+", encoding = "utf-8") try: fo2.write(parse(soupath + "/" + tmpfile)) except IOError: fo2.close() print(soupath + "/" + tmpfile + " failure") exit(1) else: fo2.close() os.remove(soupath + "/" + tmpfile) def parse(filename): try: fd1 = open(filename, "r", encoding = "utf-8") dstr = "" while True: str = fd1.read(1) if not str: break ChinesePatternMatch = ChinesePattern.search(str) ChineseCharPatternMatch = ChineseCharPattern.search(str) if not ChinesePatternMatch and not ChineseCharPatternMatch: dstr += str fd1.close() except IndexError: print(filename + " error") exit(1) else: return dstr if __name__ == '__main__': print_files(soupath)