67 lines
2.2 KiB
Python
67 lines
2.2 KiB
Python
from __future__ import annotations
|
|
|
|
from collections import Counter, defaultdict
|
|
from dataclasses import dataclass
|
|
import math
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class CompositionAnalysis:
|
|
note_count: int
|
|
alphabet_size: int
|
|
unigram_entropy_bits: float
|
|
conditional_entropy_bits: float
|
|
normalized_entropy: float
|
|
predictability: float
|
|
redundancy: float
|
|
|
|
|
|
def shannon_entropy(sequence: tuple[int, ...] | list[int]) -> float:
|
|
if not sequence:
|
|
return 0.0
|
|
counts = Counter(sequence)
|
|
total = len(sequence)
|
|
return -sum((count / total) * math.log2(count / total) for count in counts.values())
|
|
|
|
|
|
def first_order_conditional_entropy(sequence: tuple[int, ...] | list[int]) -> float:
|
|
if len(sequence) < 2:
|
|
return 0.0
|
|
transitions: dict[int, Counter[int]] = defaultdict(Counter)
|
|
source_counts = Counter(sequence[:-1])
|
|
for left, right in zip(sequence[:-1], sequence[1:]):
|
|
transitions[left][right] += 1
|
|
total_transitions = len(sequence) - 1
|
|
entropy = 0.0
|
|
for source, next_counts in transitions.items():
|
|
source_prob = source_counts[source] / total_transitions
|
|
total = sum(next_counts.values())
|
|
source_entropy = -sum(
|
|
(count / total) * math.log2(count / total) for count in next_counts.values()
|
|
)
|
|
entropy += source_prob * source_entropy
|
|
return entropy
|
|
|
|
|
|
def analyze_composition(
|
|
sequence: tuple[int, ...] | list[int],
|
|
*,
|
|
alphabet_size: int = 8,
|
|
) -> CompositionAnalysis:
|
|
notes = tuple(int(note) for note in sequence)
|
|
unigram_entropy = shannon_entropy(notes)
|
|
conditional_entropy = first_order_conditional_entropy(notes)
|
|
max_entropy = math.log2(alphabet_size) if alphabet_size > 1 else 0.0
|
|
normalized_entropy = unigram_entropy / max_entropy if max_entropy else 0.0
|
|
predictability = 1.0 - (conditional_entropy / max_entropy if max_entropy else 0.0)
|
|
redundancy = 1.0 - normalized_entropy
|
|
return CompositionAnalysis(
|
|
note_count=len(notes),
|
|
alphabet_size=alphabet_size,
|
|
unigram_entropy_bits=unigram_entropy,
|
|
conditional_entropy_bits=conditional_entropy,
|
|
normalized_entropy=normalized_entropy,
|
|
predictability=predictability,
|
|
redundancy=redundancy,
|
|
)
|