from __future__ import annotations from collections import Counter, defaultdict from dataclasses import dataclass import math @dataclass(frozen=True) class CompositionAnalysis: note_count: int alphabet_size: int unigram_entropy_bits: float conditional_entropy_bits: float normalized_entropy: float predictability: float redundancy: float def shannon_entropy(sequence: tuple[int, ...] | list[int]) -> float: if not sequence: return 0.0 counts = Counter(sequence) total = len(sequence) return -sum((count / total) * math.log2(count / total) for count in counts.values()) def first_order_conditional_entropy(sequence: tuple[int, ...] | list[int]) -> float: if len(sequence) < 2: return 0.0 transitions: dict[int, Counter[int]] = defaultdict(Counter) source_counts = Counter(sequence[:-1]) for left, right in zip(sequence[:-1], sequence[1:]): transitions[left][right] += 1 total_transitions = len(sequence) - 1 entropy = 0.0 for source, next_counts in transitions.items(): source_prob = source_counts[source] / total_transitions total = sum(next_counts.values()) source_entropy = -sum( (count / total) * math.log2(count / total) for count in next_counts.values() ) entropy += source_prob * source_entropy return entropy def analyze_composition( sequence: tuple[int, ...] | list[int], *, alphabet_size: int = 8, ) -> CompositionAnalysis: notes = tuple(int(note) for note in sequence) unigram_entropy = shannon_entropy(notes) conditional_entropy = first_order_conditional_entropy(notes) max_entropy = math.log2(alphabet_size) if alphabet_size > 1 else 0.0 normalized_entropy = unigram_entropy / max_entropy if max_entropy else 0.0 predictability = 1.0 - (conditional_entropy / max_entropy if max_entropy else 0.0) redundancy = 1.0 - normalized_entropy return CompositionAnalysis( note_count=len(notes), alphabet_size=alphabet_size, unigram_entropy_bits=unigram_entropy, conditional_entropy_bits=conditional_entropy, normalized_entropy=normalized_entropy, predictability=predictability, redundancy=redundancy, )