实用脚本
Convert to UTF-8
#!/usr/bin/python
import argparse
import chardet
import codecs
import os
def detect_encoding(filepath):
with open(filepath, 'rb') as f:
raw = f.read()
return chardet.detect(raw)['encoding']
def convert_file(input_path, output_path):
encoding = detect_encoding(input_path)
print(f"Converting: {input_path} (detected: {encoding})")
with codecs.open(input_path, 'r', encoding=encoding) as fi:
text = fi.read()
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as fo:
fo.write(text)
def convert_folder(input_dir, output_dir):
for root, _, files in os.walk(input_dir):
for name in files:
in_file = os.path.join(root, name)
rel_path = os.path.relpath(in_file, input_dir)
out_file = os.path.join(output_dir, rel_path)
convert_file(in_file, out_file)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert text file(s) encoding to UTF-8.')
parser.add_argument('input_path', help='Path to input file or folder')
parser.add_argument('output_path', help='Path to output file or folder')
parser.add_argument('-r', '--recursive', action='store_true', help='Process all files in the folder recursively')
args = parser.parse_args()
if args.recursive:
convert_folder(args.input_path, args.output_path)
else:
convert_file(args.input_path, args.output_path)