import os import hashlib directory = '.' for filename in os.listdir(directory): if filename.endswith('.txt'): filepath = os.path.join(directory, filename) temp_file = filepath + ".tmp" def line_hash(line): return hashlib.md5(line.encode('utf-8')).hexdigest() try: seen = set() with open(filepath, 'r', encoding='utf-8') as infile, \ open(temp_file, 'w', encoding='utf-8') as outfile: for line in infile: hash_val = line_hash(line) if hash_val not in seen: seen.add(hash_val) outfile.write(line) os.replace(temp_file, filepath) except UnicodeDecodeError: seen = set() with open(filepath, 'r', encoding='latin-1') as infile, \ open(temp_file, 'w', encoding='utf-8') as outfile: for line in infile: hash_val = line_hash(line) if hash_val not in seen: seen.add(hash_val) outfile.write(line) os.replace(temp_file, filepath)