血的教训:采用md5哈希算法+时间戳重命名,避免覆盖
合并后就能安安心心排序了
import os
import hashlib
import shutil
source_dirs = ["video/vox2/8-12/10s", "video/vox2/12-20/10s", "video/vox2/20-30/10s", "video/vox2/30-60/10s", "video/vox2/60-inf"]
target_dir = "video/vox2/10s"
if not os.path.exists(target_dir):
os.makedirs(target_dir)
for source_dir in source_dirs:
if not os.path.exists(source_dir):
print(f"Warning: Directory does not exist: {source_dir}")
continue
print(f"Processing directory: {source_dir}")
for root, dirs, files in os.walk(source_dir):
for filename in files:
print(f"Checking file: {os.path.join(root, filename)}")
if filename.lower().endswith('.mp4'):
base_name, ext = os.path.splitext(filename)
with open(os.path.join(root, filename), 'rb') as f:
file_hash = hashlib.md5(f.read()).hexdigest()
modification_time = os.path.getmtime(os.path.join(root, filename))
dst_file_path = os.path.join(target_dir, f"{file_hash}_{modification_time}{ext}")
while os.path.exists(dst_file_path):
modification_time += 0.001
dst_file_path = os.path.join(target_dir, f"{file_hash}_{modification_time}{ext}")
src_file_path = os.path.join(root, filename)
shutil.copy2(src_file_path, dst_file_path)
print(f'Copied "{filename}" from "{root}" to "{dst_file_path}"')
else:
print(f"Ignored file: {os.path.join(root, filename)}")
print("All images have been merged successfully.")