MemorySharing/ImageEditing/ms_group_mtime.py

221 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""
Sync group modification times in a directory tree.
New Mode:
--collect-suffixes
Scans the tree and prints a list of possible variant suffixes
extracted from filenames (thumbnail/websize indicators, etc.).
Normal Mode:
- Recursively finds files with matching prefix names.
- Uses EXIF DateTimeOriginal or mtime (fallback).
- Sets all mtimes in a group to the earliest timestamp.
"""
import argparse
import os
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Tuple, Optional, Set
from PIL import Image
from PIL.ExifTags import TAGS
# ------------------------------------------------------------
# EXIF TIME
# ------------------------------------------------------------
def get_exif_datetime_original(path: Path) -> Optional[datetime]:
"""Try to read EXIF DateTimeOriginal."""
try:
with Image.open(path) as img:
exif = img._getexif()
if not exif:
return None
exif_data = {TAGS.get(tag_id, tag_id): value for tag_id, value in exif.items()}
dto = exif_data.get("DateTimeOriginal")
if not dto:
return None
return datetime.strptime(dto, "%Y:%m:%d %H:%M:%S")
except Exception:
return None
def get_logical_timestamp(path: Path) -> float:
"""Return EXIF datetime if available, else mtime."""
exif_dt = get_exif_datetime_original(path)
if exif_dt is not None:
return exif_dt.timestamp()
return os.path.getmtime(path)
# ------------------------------------------------------------
# SUFFIX COLLECTION MODE
# ------------------------------------------------------------
def extract_suffixes(stem: str) -> List[str]:
"""
Extract variant suffixes from a stem.
Rules:
- If the stem has '_' or '-', the part *after* that segment may be a variant.
- We collect suffixes such as:
IMG_1234_tn -> _tn
IMG_1234_ws-800 -> _ws-800
dsc_0001-thumb -> -thumb
- If multiple underscores/dashes exist, collect each trailing segment.
"""
suffixes = []
# Collect suffixes after last "_" and last "-"
for sep in ["_", "-"]:
if sep in stem:
idx = stem.lower().find(sep)
if idx > 0 and idx < len(stem) - 1:
suffix = stem[idx:] # include the separator
suffixes.append(suffix)
return suffixes
def collect_possible_suffixes(root: Path) -> Dict[str, int]:
"""
Scan directory tree and collect all observed suffixes.
Returns dict: suffix -> count
"""
counts: Dict[str, int] = {}
for path in root.rglob("*"):
if not path.is_file():
continue
stem = path.stem
for s in extract_suffixes(stem):
counts[s] = counts.get(s, 0) + 1
return counts
# ------------------------------------------------------------
# NORMAL GROUPING MODE
# ------------------------------------------------------------
def derive_group_key(stem: str, variant_suffixes: List[str]) -> str:
"""Strip known variant suffixes from the stem to find a grouping prefix."""
s_lower = stem.lower()
positions = []
for marker in variant_suffixes:
marker_l = marker.lower()
idx = s_lower.find(marker_l)
if idx != -1:
positions.append(idx)
if not positions:
return stem
cutoff = min(positions)
if cutoff <= 0:
return stem
return stem[:cutoff]
def collect_groups(root: Path, variant_suffixes: List[str]) -> Dict[str, List[Tuple[Path, float]]]:
"""Walk tree and group files by derived key."""
groups: Dict[str, List[Tuple[Path, float]]] = {}
for path in root.rglob("*"):
if not path.is_file():
continue
key = derive_group_key(path.stem, variant_suffixes)
ts = get_logical_timestamp(path)
groups.setdefault(key, []).append((path, ts))
return groups
def adjust_group_mtimes(groups: Dict[str, List[Tuple[Path, float]]], dry_run: bool = False) -> None:
"""Set all mtimes in each group to the earliest timestamp."""
for group_key, items in sorted(groups.items(), key=lambda kv: kv[0]):
earliest_ts = min(ts for _, ts in items)
earliest_iso = datetime.fromtimestamp(earliest_ts).isoformat(sep=" ")
print(f"\nGroup '{group_key}': {len(items)} file(s)")
print(f" Earliest logical timestamp: {earliest_iso}")
for path, _ in items:
st = os.stat(path)
old_iso = datetime.fromtimestamp(st.st_mtime).isoformat(sep=" ")
if dry_run:
print(f" [DRY-RUN] {path} mtime {old_iso} -> {earliest_iso}")
else:
os.utime(path, (st.st_atime, earliest_ts))
print(f" Updated {path} mtime {old_iso} -> {earliest_iso}")
# ------------------------------------------------------------
# MAIN
# ------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description=(
"Normalize mtimes of related photo/video/audio files by grouping them "
"via basename prefixes.\n\n"
"New: --collect-suffixes to scan for possible variant suffixes."
)
)
parser.add_argument("root_dir", help="Root directory to scan recursively")
parser.add_argument("--suffixes", nargs="*", default=["_tn", "_ws"],
help="Known variant suffix markers")
parser.add_argument("--dry-run", action="store_true",
help="Show changes but do not modify file timestamps")
parser.add_argument("--collect-suffixes", action="store_true",
help="Scan and list all observed filename suffix variants")
args = parser.parse_args()
root = Path(args.root_dir).expanduser().resolve()
if not root.is_dir():
raise SystemExit(f"Not a directory: {root}")
if args.collect_suffixes:
print(f"Collecting possible suffixes in: {root}")
counts = collect_possible_suffixes(root)
if not counts:
print("No suffixes detected.")
return
print("\nSuffixes found (sorted by frequency):")
for suf, cnt in sorted(counts.items(), key=lambda kv: -kv[1]):
print(f" {suf:20} {cnt} files")
print("\nSuggested suffix list (deduplicated):")
unique = sorted(counts.keys(), key=str.lower)
print(" ", " ".join(unique))
return
# Normal mode
print(f"Grouping using suffixes: {args.suffixes}")
groups = collect_groups(root, args.suffixes)
total_files = sum(len(v) for v in groups.values())
print(f"Found {total_files} files across {len(groups)} groups.")
adjust_group_mtimes(groups, dry_run=args.dry_run)
if __name__ == "__main__":
main()