MemorySharing/ImageEditing/ms_group_mtime.py

#!/usr/bin/env python3
"""
Sync group modification times in a directory tree.

New Mode:
    --collect-suffixes
      Scans the tree and prints a list of possible variant suffixes
      extracted from filenames (thumbnail/websize indicators, etc.).

Normal Mode:
    - Recursively finds files with matching prefix names.
    - Uses EXIF DateTimeOriginal or mtime (fallback).
    - Sets all mtimes in a group to the earliest timestamp.

"""

import argparse
import os
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Tuple, Optional, Set

from PIL import Image
from PIL.ExifTags import TAGS


# ------------------------------------------------------------
# EXIF TIME
# ------------------------------------------------------------

def get_exif_datetime_original(path: Path) -> Optional[datetime]:
    """Try to read EXIF DateTimeOriginal."""
    try:
        with Image.open(path) as img:
            exif = img._getexif()
            if not exif:
                return None

            exif_data = {TAGS.get(tag_id, tag_id): value for tag_id, value in exif.items()}
            dto = exif_data.get("DateTimeOriginal")
            if not dto:
                return None

            return datetime.strptime(dto, "%Y:%m:%d %H:%M:%S")
    except Exception:
        return None


def get_logical_timestamp(path: Path) -> float:
    """Return EXIF datetime if available, else mtime."""
    exif_dt = get_exif_datetime_original(path)
    if exif_dt is not None:
        return exif_dt.timestamp()
    return os.path.getmtime(path)


# ------------------------------------------------------------
# SUFFIX COLLECTION MODE
# ------------------------------------------------------------

def extract_suffixes(stem: str) -> List[str]:
    """
    Extract variant suffixes from a stem.

    Rules:
    - If the stem has '_' or '-', the part *after* that segment may be a variant.
    - We collect suffixes such as:
        IMG_1234_tn       -> _tn
        IMG_1234_ws-800   -> _ws-800
        dsc_0001-thumb    -> -thumb
    - If multiple underscores/dashes exist, collect each trailing segment.
    """
    suffixes = []

    # Collect suffixes after last "_" and last "-"
    for sep in ["_", "-"]:
        if sep in stem:
            idx = stem.lower().find(sep)
            if idx > 0 and idx < len(stem) - 1:
                suffix = stem[idx:]  # include the separator
                suffixes.append(suffix)

    return suffixes


def collect_possible_suffixes(root: Path) -> Dict[str, int]:
    """
    Scan directory tree and collect all observed suffixes.
    Returns dict: suffix -> count
    """
    counts: Dict[str, int] = {}

    for path in root.rglob("*"):
        if not path.is_file():
            continue

        stem = path.stem
        for s in extract_suffixes(stem):
            counts[s] = counts.get(s, 0) + 1

    return counts


# ------------------------------------------------------------
# NORMAL GROUPING MODE
# ------------------------------------------------------------

def derive_group_key(stem: str, variant_suffixes: List[str]) -> str:
    """Strip known variant suffixes from the stem to find a grouping prefix."""
    s_lower = stem.lower()
    positions = []

    for marker in variant_suffixes:
        marker_l = marker.lower()
        idx = s_lower.find(marker_l)
        if idx != -1:
            positions.append(idx)

    if not positions:
        return stem

    cutoff = min(positions)
    if cutoff <= 0:
        return stem

    return stem[:cutoff]


def collect_groups(root: Path, variant_suffixes: List[str]) -> Dict[str, List[Tuple[Path, float]]]:
    """Walk tree and group files by derived key."""
    groups: Dict[str, List[Tuple[Path, float]]] = {}

    for path in root.rglob("*"):
        if not path.is_file():
            continue

        key = derive_group_key(path.stem, variant_suffixes)
        ts = get_logical_timestamp(path)

        groups.setdefault(key, []).append((path, ts))

    return groups


def adjust_group_mtimes(groups: Dict[str, List[Tuple[Path, float]]], dry_run: bool = False) -> None:
    """Set all mtimes in each group to the earliest timestamp."""
    for group_key, items in sorted(groups.items(), key=lambda kv: kv[0]):
        earliest_ts = min(ts for _, ts in items)
        earliest_iso = datetime.fromtimestamp(earliest_ts).isoformat(sep=" ")

        print(f"\nGroup '{group_key}': {len(items)} file(s)")
        print(f"  Earliest logical timestamp: {earliest_iso}")

        for path, _ in items:
            st = os.stat(path)
            old_iso = datetime.fromtimestamp(st.st_mtime).isoformat(sep=" ")

            if dry_run:
                print(f"    [DRY-RUN] {path}  mtime {old_iso} -> {earliest_iso}")
            else:
                os.utime(path, (st.st_atime, earliest_ts))
                print(f"    Updated {path}  mtime {old_iso} -> {earliest_iso}")


# ------------------------------------------------------------
# MAIN
# ------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser(
        description=(
            "Normalize mtimes of related photo/video/audio files by grouping them "
            "via basename prefixes.\n\n"
            "New: --collect-suffixes to scan for possible variant suffixes."
        )
    )
    parser.add_argument("root_dir", help="Root directory to scan recursively")
    parser.add_argument("--suffixes", nargs="*", default=["_tn", "_ws"],
                        help="Known variant suffix markers")
    parser.add_argument("--dry-run", action="store_true",
                        help="Show changes but do not modify file timestamps")
    parser.add_argument("--collect-suffixes", action="store_true",
                        help="Scan and list all observed filename suffix variants")

    args = parser.parse_args()

    root = Path(args.root_dir).expanduser().resolve()
    if not root.is_dir():
        raise SystemExit(f"Not a directory: {root}")

    if args.collect_suffixes:
        print(f"Collecting possible suffixes in: {root}")
        counts = collect_possible_suffixes(root)

        if not counts:
            print("No suffixes detected.")
            return

        print("\nSuffixes found (sorted by frequency):")
        for suf, cnt in sorted(counts.items(), key=lambda kv: -kv[1]):
            print(f"  {suf:20}  {cnt} files")

        print("\nSuggested suffix list (deduplicated):")
        unique = sorted(counts.keys(), key=str.lower)
        print(" ", " ".join(unique))
        return

    # Normal mode
    print(f"Grouping using suffixes: {args.suffixes}")
    groups = collect_groups(root, args.suffixes)

    total_files = sum(len(v) for v in groups.values())
    print(f"Found {total_files} files across {len(groups)} groups.")

    adjust_group_mtimes(groups, dry_run=args.dry_run)


if __name__ == "__main__":
    main()