#!/usr/bin/env python3 """ Sync group modification times in a directory tree. New Mode: --collect-suffixes Scans the tree and prints a list of possible variant suffixes extracted from filenames (thumbnail/websize indicators, etc.). Normal Mode: - Recursively finds files with matching prefix names. - Uses EXIF DateTimeOriginal or mtime (fallback). - Sets all mtimes in a group to the earliest timestamp. """ import argparse import os from pathlib import Path from datetime import datetime from typing import Dict, List, Tuple, Optional, Set from PIL import Image from PIL.ExifTags import TAGS # ------------------------------------------------------------ # EXIF TIME # ------------------------------------------------------------ def get_exif_datetime_original(path: Path) -> Optional[datetime]: """Try to read EXIF DateTimeOriginal.""" try: with Image.open(path) as img: exif = img._getexif() if not exif: return None exif_data = {TAGS.get(tag_id, tag_id): value for tag_id, value in exif.items()} dto = exif_data.get("DateTimeOriginal") if not dto: return None return datetime.strptime(dto, "%Y:%m:%d %H:%M:%S") except Exception: return None def get_logical_timestamp(path: Path) -> float: """Return EXIF datetime if available, else mtime.""" exif_dt = get_exif_datetime_original(path) if exif_dt is not None: return exif_dt.timestamp() return os.path.getmtime(path) # ------------------------------------------------------------ # SUFFIX COLLECTION MODE # ------------------------------------------------------------ def extract_suffixes(stem: str) -> List[str]: """ Extract variant suffixes from a stem. Rules: - If the stem has '_' or '-', the part *after* that segment may be a variant. - We collect suffixes such as: IMG_1234_tn -> _tn IMG_1234_ws-800 -> _ws-800 dsc_0001-thumb -> -thumb - If multiple underscores/dashes exist, collect each trailing segment. """ suffixes = [] # Collect suffixes after last "_" and last "-" for sep in ["_", "-"]: if sep in stem: idx = stem.lower().find(sep) if idx > 0 and idx < len(stem) - 1: suffix = stem[idx:] # include the separator suffixes.append(suffix) return suffixes def collect_possible_suffixes(root: Path) -> Dict[str, int]: """ Scan directory tree and collect all observed suffixes. Returns dict: suffix -> count """ counts: Dict[str, int] = {} for path in root.rglob("*"): if not path.is_file(): continue stem = path.stem for s in extract_suffixes(stem): counts[s] = counts.get(s, 0) + 1 return counts # ------------------------------------------------------------ # NORMAL GROUPING MODE # ------------------------------------------------------------ def derive_group_key(stem: str, variant_suffixes: List[str]) -> str: """Strip known variant suffixes from the stem to find a grouping prefix.""" s_lower = stem.lower() positions = [] for marker in variant_suffixes: marker_l = marker.lower() idx = s_lower.find(marker_l) if idx != -1: positions.append(idx) if not positions: return stem cutoff = min(positions) if cutoff <= 0: return stem return stem[:cutoff] def collect_groups(root: Path, variant_suffixes: List[str]) -> Dict[str, List[Tuple[Path, float]]]: """Walk tree and group files by derived key.""" groups: Dict[str, List[Tuple[Path, float]]] = {} for path in root.rglob("*"): if not path.is_file(): continue key = derive_group_key(path.stem, variant_suffixes) ts = get_logical_timestamp(path) groups.setdefault(key, []).append((path, ts)) return groups def adjust_group_mtimes(groups: Dict[str, List[Tuple[Path, float]]], dry_run: bool = False) -> None: """Set all mtimes in each group to the earliest timestamp.""" for group_key, items in sorted(groups.items(), key=lambda kv: kv[0]): earliest_ts = min(ts for _, ts in items) earliest_iso = datetime.fromtimestamp(earliest_ts).isoformat(sep=" ") print(f"\nGroup '{group_key}': {len(items)} file(s)") print(f" Earliest logical timestamp: {earliest_iso}") for path, _ in items: st = os.stat(path) old_iso = datetime.fromtimestamp(st.st_mtime).isoformat(sep=" ") if dry_run: print(f" [DRY-RUN] {path} mtime {old_iso} -> {earliest_iso}") else: os.utime(path, (st.st_atime, earliest_ts)) print(f" Updated {path} mtime {old_iso} -> {earliest_iso}") # ------------------------------------------------------------ # MAIN # ------------------------------------------------------------ def main(): parser = argparse.ArgumentParser( description=( "Normalize mtimes of related photo/video/audio files by grouping them " "via basename prefixes.\n\n" "New: --collect-suffixes to scan for possible variant suffixes." ) ) parser.add_argument("root_dir", help="Root directory to scan recursively") parser.add_argument("--suffixes", nargs="*", default=["_tn", "_ws"], help="Known variant suffix markers") parser.add_argument("--dry-run", action="store_true", help="Show changes but do not modify file timestamps") parser.add_argument("--collect-suffixes", action="store_true", help="Scan and list all observed filename suffix variants") args = parser.parse_args() root = Path(args.root_dir).expanduser().resolve() if not root.is_dir(): raise SystemExit(f"Not a directory: {root}") if args.collect_suffixes: print(f"Collecting possible suffixes in: {root}") counts = collect_possible_suffixes(root) if not counts: print("No suffixes detected.") return print("\nSuffixes found (sorted by frequency):") for suf, cnt in sorted(counts.items(), key=lambda kv: -kv[1]): print(f" {suf:20} {cnt} files") print("\nSuggested suffix list (deduplicated):") unique = sorted(counts.keys(), key=str.lower) print(" ", " ".join(unique)) return # Normal mode print(f"Grouping using suffixes: {args.suffixes}") groups = collect_groups(root, args.suffixes) total_files = sum(len(v) for v in groups.values()) print(f"Found {total_files} files across {len(groups)} groups.") adjust_group_mtimes(groups, dry_run=args.dry_run) if __name__ == "__main__": main()