221 lines
6.7 KiB
Python
221 lines
6.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Sync group modification times in a directory tree.
|
|
|
|
New Mode:
|
|
--collect-suffixes
|
|
Scans the tree and prints a list of possible variant suffixes
|
|
extracted from filenames (thumbnail/websize indicators, etc.).
|
|
|
|
Normal Mode:
|
|
- Recursively finds files with matching prefix names.
|
|
- Uses EXIF DateTimeOriginal or mtime (fallback).
|
|
- Sets all mtimes in a group to the earliest timestamp.
|
|
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Dict, List, Tuple, Optional, Set
|
|
|
|
from PIL import Image
|
|
from PIL.ExifTags import TAGS
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# EXIF TIME
|
|
# ------------------------------------------------------------
|
|
|
|
def get_exif_datetime_original(path: Path) -> Optional[datetime]:
|
|
"""Try to read EXIF DateTimeOriginal."""
|
|
try:
|
|
with Image.open(path) as img:
|
|
exif = img._getexif()
|
|
if not exif:
|
|
return None
|
|
|
|
exif_data = {TAGS.get(tag_id, tag_id): value for tag_id, value in exif.items()}
|
|
dto = exif_data.get("DateTimeOriginal")
|
|
if not dto:
|
|
return None
|
|
|
|
return datetime.strptime(dto, "%Y:%m:%d %H:%M:%S")
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def get_logical_timestamp(path: Path) -> float:
|
|
"""Return EXIF datetime if available, else mtime."""
|
|
exif_dt = get_exif_datetime_original(path)
|
|
if exif_dt is not None:
|
|
return exif_dt.timestamp()
|
|
return os.path.getmtime(path)
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# SUFFIX COLLECTION MODE
|
|
# ------------------------------------------------------------
|
|
|
|
def extract_suffixes(stem: str) -> List[str]:
|
|
"""
|
|
Extract variant suffixes from a stem.
|
|
|
|
Rules:
|
|
- If the stem has '_' or '-', the part *after* that segment may be a variant.
|
|
- We collect suffixes such as:
|
|
IMG_1234_tn -> _tn
|
|
IMG_1234_ws-800 -> _ws-800
|
|
dsc_0001-thumb -> -thumb
|
|
- If multiple underscores/dashes exist, collect each trailing segment.
|
|
"""
|
|
suffixes = []
|
|
|
|
# Collect suffixes after last "_" and last "-"
|
|
for sep in ["_", "-"]:
|
|
if sep in stem:
|
|
idx = stem.lower().find(sep)
|
|
if idx > 0 and idx < len(stem) - 1:
|
|
suffix = stem[idx:] # include the separator
|
|
suffixes.append(suffix)
|
|
|
|
return suffixes
|
|
|
|
|
|
def collect_possible_suffixes(root: Path) -> Dict[str, int]:
|
|
"""
|
|
Scan directory tree and collect all observed suffixes.
|
|
Returns dict: suffix -> count
|
|
"""
|
|
counts: Dict[str, int] = {}
|
|
|
|
for path in root.rglob("*"):
|
|
if not path.is_file():
|
|
continue
|
|
|
|
stem = path.stem
|
|
for s in extract_suffixes(stem):
|
|
counts[s] = counts.get(s, 0) + 1
|
|
|
|
return counts
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# NORMAL GROUPING MODE
|
|
# ------------------------------------------------------------
|
|
|
|
def derive_group_key(stem: str, variant_suffixes: List[str]) -> str:
|
|
"""Strip known variant suffixes from the stem to find a grouping prefix."""
|
|
s_lower = stem.lower()
|
|
positions = []
|
|
|
|
for marker in variant_suffixes:
|
|
marker_l = marker.lower()
|
|
idx = s_lower.find(marker_l)
|
|
if idx != -1:
|
|
positions.append(idx)
|
|
|
|
if not positions:
|
|
return stem
|
|
|
|
cutoff = min(positions)
|
|
if cutoff <= 0:
|
|
return stem
|
|
|
|
return stem[:cutoff]
|
|
|
|
|
|
def collect_groups(root: Path, variant_suffixes: List[str]) -> Dict[str, List[Tuple[Path, float]]]:
|
|
"""Walk tree and group files by derived key."""
|
|
groups: Dict[str, List[Tuple[Path, float]]] = {}
|
|
|
|
for path in root.rglob("*"):
|
|
if not path.is_file():
|
|
continue
|
|
|
|
key = derive_group_key(path.stem, variant_suffixes)
|
|
ts = get_logical_timestamp(path)
|
|
|
|
groups.setdefault(key, []).append((path, ts))
|
|
|
|
return groups
|
|
|
|
|
|
def adjust_group_mtimes(groups: Dict[str, List[Tuple[Path, float]]], dry_run: bool = False) -> None:
|
|
"""Set all mtimes in each group to the earliest timestamp."""
|
|
for group_key, items in sorted(groups.items(), key=lambda kv: kv[0]):
|
|
earliest_ts = min(ts for _, ts in items)
|
|
earliest_iso = datetime.fromtimestamp(earliest_ts).isoformat(sep=" ")
|
|
|
|
print(f"\nGroup '{group_key}': {len(items)} file(s)")
|
|
print(f" Earliest logical timestamp: {earliest_iso}")
|
|
|
|
for path, _ in items:
|
|
st = os.stat(path)
|
|
old_iso = datetime.fromtimestamp(st.st_mtime).isoformat(sep=" ")
|
|
|
|
if dry_run:
|
|
print(f" [DRY-RUN] {path} mtime {old_iso} -> {earliest_iso}")
|
|
else:
|
|
os.utime(path, (st.st_atime, earliest_ts))
|
|
print(f" Updated {path} mtime {old_iso} -> {earliest_iso}")
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
# MAIN
|
|
# ------------------------------------------------------------
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description=(
|
|
"Normalize mtimes of related photo/video/audio files by grouping them "
|
|
"via basename prefixes.\n\n"
|
|
"New: --collect-suffixes to scan for possible variant suffixes."
|
|
)
|
|
)
|
|
parser.add_argument("root_dir", help="Root directory to scan recursively")
|
|
parser.add_argument("--suffixes", nargs="*", default=["_tn", "_ws"],
|
|
help="Known variant suffix markers")
|
|
parser.add_argument("--dry-run", action="store_true",
|
|
help="Show changes but do not modify file timestamps")
|
|
parser.add_argument("--collect-suffixes", action="store_true",
|
|
help="Scan and list all observed filename suffix variants")
|
|
|
|
args = parser.parse_args()
|
|
|
|
root = Path(args.root_dir).expanduser().resolve()
|
|
if not root.is_dir():
|
|
raise SystemExit(f"Not a directory: {root}")
|
|
|
|
if args.collect_suffixes:
|
|
print(f"Collecting possible suffixes in: {root}")
|
|
counts = collect_possible_suffixes(root)
|
|
|
|
if not counts:
|
|
print("No suffixes detected.")
|
|
return
|
|
|
|
print("\nSuffixes found (sorted by frequency):")
|
|
for suf, cnt in sorted(counts.items(), key=lambda kv: -kv[1]):
|
|
print(f" {suf:20} {cnt} files")
|
|
|
|
print("\nSuggested suffix list (deduplicated):")
|
|
unique = sorted(counts.keys(), key=str.lower)
|
|
print(" ", " ".join(unique))
|
|
return
|
|
|
|
# Normal mode
|
|
print(f"Grouping using suffixes: {args.suffixes}")
|
|
groups = collect_groups(root, args.suffixes)
|
|
|
|
total_files = sum(len(v) for v in groups.values())
|
|
print(f"Found {total_files} files across {len(groups)} groups.")
|
|
|
|
adjust_group_mtimes(groups, dry_run=args.dry_run)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|