import os
import re
def clean_image_content(file_path):
"""清理单个txt文件中的图片标签和图片链接"""
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
# 移除HTML图片标签
content = re.sub(r'
]*>', '', content)
# 移除图片URL链接(以常见图片扩展名结尾的URL)
content = re.sub(r'https?://[^\s]+\.(jpg|jpeg|png|gif|bmp|webp)', '', content)
# 移除空白行和多余空格
content = '\n'.join([line.strip() for line in content.split('\n') if line.strip()])
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
return True
except Exception as e:
print(f"处理文件 {file_path} 时出错: {e}")
return False
def clean_all_txt_files(directory):
"""清理目录下所有txt文件"""
count = 0
success = 0
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.txt'):
count += 1
file_path = os.path.join(root, file)
if clean_image_content(file_path):
success += 1
print(f"已处理: {file_path}")
print(f"\n处理完成!共 {count} 个文件,成功处理 {success} 个")
if __name__ == '__main__':
target_dir = r'f:\youxi\youxitxt'
clean_all_txt_files(target_dir)
clean_images
本文摘要: importosimportredefclean_image_content(file_path):"""清理单个txt文件中的图片标签和图片链接"""try:withopen(file_path,'r',encoding='utf-8',errors='ignore')asf:content=f....
-- 展开阅读全文 --

暂无评论,5人围观