diff --git a/README.md b/README.md index b303898..f53c680 100644 --- a/README.md +++ b/README.md @@ -80,4 +80,4 @@ docs/ project notes, provenance, and architecture documents ## Status -This repository is now past the pure-scaffold stage. It contains the first generic runtime, reporting, serialization, graph, backpropagation, ART1, and Hopfield layers, plus internal mixed-family demos built on the generic orchestration model. The exporter can emit artifacts for more than one internal demo and can now save checkpointable snapshot artifacts for later resume, while the thesis-derived Python implementation remains the historical reference for the first complete hybrid system. +This repository is now past the pure-scaffold stage. It contains the first generic runtime, reporting, serialization, graph, Adaline/Madaline, backpropagation, ART1, and Hopfield layers, plus internal mixed-family demos built on the generic orchestration model. The exporter can emit artifacts for more than one internal demo and can now save checkpointable snapshot artifacts for later resume, while the thesis-derived Python implementation remains the historical reference for the first complete hybrid system. diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index a801c7a..596501e 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -67,8 +67,10 @@ The result is still emitted as a standard `StepTrace`, so all downstream reporti ## Architecture Families -At present, Synaptopus contains three reusable architecture families: +At present, Synaptopus contains five reusable architecture families: +- Adaline +- Madaline - multilayer feedforward backpropagation - ART1 category learning - Hopfield-style recurrent dynamics diff --git a/docs/HISTORY.md b/docs/HISTORY.md index 02ff10e..10bf70e 100644 --- a/docs/HISTORY.md +++ b/docs/HISTORY.md @@ -2,7 +2,7 @@ ## Provenance -Synaptopus grows out of a much earlier line of work: a 1989 master's thesis project at The University of Texas at Arlington by Wesley Royce Elsberry on hybrid artificial neural network modelling. +Synaptopus grows out of a much earlier line of work: a 1988-1989 master's thesis project at The University of Texas at Arlington by Wesley Royce Elsberry on hybrid artificial neural network modelling. That original system combined multiple architecture families in a single loop: diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index 4208abc..d76d495 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -21,6 +21,7 @@ The repository already contains: - information-theoretic sequence analysis - generic reporting helpers - graph schema and trace serialization +- Adaline and a small Madaline-style extension - multilayer backpropagation - ART1 - Hopfield-style dynamics and generic Hopfield matrix preparation @@ -45,6 +46,7 @@ This is the first point at which Synaptopus is more than a scaffold. - Add more robust trace viewers and summarized execution statistics - Build a TypeScript mirror of the graph schema and trace model - Prototype a browser-based workbench that can visualize execution traces and graph structure +- Add a distinct recurrent backpropagation family rather than overloading the current feedforward reference BP ## Longer Term diff --git a/src/synaptopus/__init__.py b/src/synaptopus/__init__.py index e3a0960..6671551 100644 --- a/src/synaptopus/__init__.py +++ b/src/synaptopus/__init__.py @@ -1,5 +1,6 @@ """Synaptopus: a multi-architecture artificial neural systems lab.""" +from .adaline import AdalineNetwork, AdalineResult, MadalineNetwork, MadalineResult, bipolar_sign from .analysis import SequenceAnalysis, analyze_sequence, first_order_conditional_entropy, shannon_entropy from .artifacts import ( ARTIFACT_SCHEMA_VERSION, @@ -80,6 +81,8 @@ from .types import RunReport __all__ = [ "AcceptancePolicy", + "AdalineNetwork", + "AdalineResult", "ARTIFACT_SCHEMA_VERSION", "ART1Category", "ART1Network", @@ -110,6 +113,8 @@ __all__ = [ "HopfieldNetworkState", "HopfieldParams", "HopfieldRunResult", + "MadalineNetwork", + "MadalineResult", "PolicyDecision", "RunReport", "SequenceAnalysis", @@ -122,6 +127,7 @@ __all__ = [ "apply_grid_inhibition", "build_parity_pressure_demo", "build_xor_novelty_demo", + "bipolar_sign", "available_demo_names", "categorizer_node", "clear_diagonal", diff --git a/src/synaptopus/adaline.py b/src/synaptopus/adaline.py new file mode 100644 index 0000000..4f7ade7 --- /dev/null +++ b/src/synaptopus/adaline.py @@ -0,0 +1,243 @@ +from __future__ import annotations + +from dataclasses import dataclass +import json +import random +from typing import Iterable + + +def bipolar_sign(value: float, *, threshold: float = 0.0) -> int: + return 1 if value >= threshold else -1 + + +@dataclass(frozen=True) +class AdalineResult: + activation: float + output: float + error: float + + +class AdalineNetwork: + def __init__( + self, + *, + input_size: int, + learning_rate: float = 0.1, + threshold: float = 0.0, + weights: Iterable[float] | None = None, + bias: float = 0.0, + ) -> None: + if input_size <= 0: + raise ValueError("input_size must be positive") + if learning_rate <= 0.0: + raise ValueError("learning_rate must be positive") + self.input_size = input_size + self.learning_rate = learning_rate + self.threshold = threshold + self.weights = ( + [float(value) for value in weights] + if weights is not None + else [0.0 for _ in range(input_size)] + ) + if len(self.weights) != input_size: + raise ValueError("weights length must match input_size") + self.bias = float(bias) + + @classmethod + def random( + cls, + *, + input_size: int, + learning_rate: float = 0.1, + threshold: float = 0.0, + rng: random.Random | None = None, + weight_scale: float = 0.25, + ) -> "AdalineNetwork": + generator = rng or random.Random() + return cls( + input_size=input_size, + learning_rate=learning_rate, + threshold=threshold, + weights=[generator.uniform(-weight_scale, weight_scale) for _ in range(input_size)], + bias=generator.uniform(-weight_scale, weight_scale), + ) + + def activation(self, inputs: Iterable[float]) -> float: + values = tuple(float(value) for value in inputs) + if len(values) != self.input_size: + raise ValueError(f"expected {self.input_size} inputs, got {len(values)}") + return sum(weight * value for weight, value in zip(self.weights, values)) + self.bias + + def predict(self, inputs: Iterable[float], *, thresholded: bool = False) -> AdalineResult: + activation = self.activation(inputs) + output = ( + float(bipolar_sign(activation, threshold=self.threshold)) + if thresholded + else activation + ) + return AdalineResult( + activation=activation, + output=output, + error=0.0, + ) + + def classify(self, inputs: Iterable[float]) -> int: + return bipolar_sign(self.activation(inputs), threshold=self.threshold) + + def train_step(self, inputs: Iterable[float], target: float) -> AdalineResult: + values = tuple(float(value) for value in inputs) + if len(values) != self.input_size: + raise ValueError(f"expected {self.input_size} inputs, got {len(values)}") + activation = self.activation(values) + error = float(target) - activation + for index, value in enumerate(values): + self.weights[index] += self.learning_rate * error * value + self.bias += self.learning_rate * error + updated = self.predict(values) + return AdalineResult( + activation=updated.activation, + output=updated.output, + error=error, + ) + + def to_dict(self) -> dict[str, object]: + return { + "input_size": self.input_size, + "learning_rate": self.learning_rate, + "threshold": self.threshold, + "weights": list(self.weights), + "bias": self.bias, + } + + @classmethod + def from_dict(cls, data: dict[str, object]) -> "AdalineNetwork": + return cls( + input_size=int(data["input_size"]), + learning_rate=float(data["learning_rate"]), + threshold=float(data["threshold"]), + weights=[float(value) for value in data["weights"]], # type: ignore[index] + bias=float(data["bias"]), + ) + + def save_json(self, path: str) -> None: + with open(path, "w", encoding="utf-8") as handle: + json.dump(self.to_dict(), handle, indent=2) + + @classmethod + def load_json(cls, path: str) -> "AdalineNetwork": + with open(path, "r", encoding="utf-8") as handle: + return cls.from_dict(json.load(handle)) + + +@dataclass(frozen=True) +class MadalineResult: + hidden_results: tuple[AdalineResult, ...] + hidden_outputs: tuple[int, ...] + output_result: AdalineResult + output: int + + +class MadalineNetwork: + """ + A small bipolar MADALINE-style classifier. + + This implementation is intentionally modest: one hidden layer of Adaline + units with thresholded bipolar outputs, followed by one output Adaline. + The training rule is deliberately conservative rather than a claim of + exhaustive historical fidelity: hidden units are adapted toward the target, + then the output Adaline is trained over the hidden bipolar responses. + """ + + def __init__( + self, + *, + hidden_units: tuple[AdalineNetwork, ...], + output_unit: AdalineNetwork, + ) -> None: + if not hidden_units: + raise ValueError("hidden_units must not be empty") + if output_unit.input_size != len(hidden_units): + raise ValueError("output_unit input size must match the hidden unit count") + self.hidden_units = list(hidden_units) + self.output_unit = output_unit + + @classmethod + def random( + cls, + *, + input_size: int, + hidden_unit_count: int, + learning_rate: float = 0.1, + rng: random.Random | None = None, + weight_scale: float = 0.25, + ) -> "MadalineNetwork": + generator = rng or random.Random() + hidden_units = tuple( + AdalineNetwork.random( + input_size=input_size, + learning_rate=learning_rate, + rng=generator, + weight_scale=weight_scale, + ) + for _ in range(hidden_unit_count) + ) + output_unit = AdalineNetwork.random( + input_size=hidden_unit_count, + learning_rate=learning_rate, + rng=generator, + weight_scale=weight_scale, + ) + return cls(hidden_units=hidden_units, output_unit=output_unit) + + def predict(self, inputs: Iterable[float]) -> MadalineResult: + values = tuple(float(value) for value in inputs) + hidden_results = tuple( + unit.predict(values, thresholded=True) + for unit in self.hidden_units + ) + hidden_outputs = tuple(int(result.output) for result in hidden_results) + linear_output = self.output_unit.predict(hidden_outputs) + return MadalineResult( + hidden_results=hidden_results, + hidden_outputs=hidden_outputs, + output_result=linear_output, + output=bipolar_sign(linear_output.activation, threshold=self.output_unit.threshold), + ) + + def train_step(self, inputs: Iterable[float], target: int) -> MadalineResult: + if target not in (-1, 1): + raise ValueError("target must be bipolar: -1 or 1") + values = tuple(float(value) for value in inputs) + for hidden_unit in self.hidden_units: + hidden_unit.train_step(values, float(target)) + hidden_result = self.predict(values) + self.output_unit.train_step(hidden_result.hidden_outputs, float(target)) + return self.predict(values) + + def classify(self, inputs: Iterable[float]) -> int: + return self.predict(inputs).output + + def to_dict(self) -> dict[str, object]: + return { + "hidden_units": [unit.to_dict() for unit in self.hidden_units], + "output_unit": self.output_unit.to_dict(), + } + + @classmethod + def from_dict(cls, data: dict[str, object]) -> "MadalineNetwork": + return cls( + hidden_units=tuple( + AdalineNetwork.from_dict(unit_data) + for unit_data in data["hidden_units"] # type: ignore[index] + ), + output_unit=AdalineNetwork.from_dict(data["output_unit"]), # type: ignore[arg-type] + ) + + def save_json(self, path: str) -> None: + with open(path, "w", encoding="utf-8") as handle: + json.dump(self.to_dict(), handle, indent=2) + + @classmethod + def load_json(cls, path: str) -> "MadalineNetwork": + with open(path, "r", encoding="utf-8") as handle: + return cls.from_dict(json.load(handle)) diff --git a/src/synaptopus/backprop.py b/src/synaptopus/backprop.py index 3a57b58..d647b07 100644 --- a/src/synaptopus/backprop.py +++ b/src/synaptopus/backprop.py @@ -7,8 +7,8 @@ import random from typing import Iterable -def sigmoid(x: float) -> float: - clamped = max(min(-x, 80.0), -80.0) +def sigmoid(x: float, *, slope: float = 1.0) -> float: + clamped = max(min(-(slope * x), 80.0), -80.0) return 1.0 / (1.0 + math.exp(clamped)) @@ -33,6 +33,7 @@ class BackpropNetwork: layer_sizes: tuple[int, ...], learning_rate: float, momentum: float, + activation_slope: float, weights: list[list[list[float]]], biases: list[list[float]], ) -> None: @@ -40,6 +41,8 @@ class BackpropNetwork: raise ValueError("layer_sizes must include at least input and output layers") if any(size <= 0 for size in layer_sizes): raise ValueError("all layer sizes must be positive") + if activation_slope <= 0.0: + raise ValueError("activation_slope must be positive") if len(weights) != len(layer_sizes) - 1: raise ValueError("weights must connect each adjacent layer") if len(biases) != len(layer_sizes) - 1: @@ -48,6 +51,7 @@ class BackpropNetwork: self.layer_sizes = layer_sizes self.learning_rate = learning_rate self.momentum = momentum + self.activation_slope = activation_slope self.weights = weights self.biases = biases self.last_weight_updates = [ @@ -80,6 +84,7 @@ class BackpropNetwork: output_size: int, learning_rate: float = 0.5, momentum: float = 0.1, + activation_slope: float = 1.0, rng: random.Random | None = None, ) -> "BackpropNetwork": generator = rng or random.Random() @@ -98,6 +103,7 @@ class BackpropNetwork: layer_sizes=layer_sizes, learning_rate=learning_rate, momentum=momentum, + activation_slope=activation_slope, weights=weights, biases=biases, ) @@ -138,7 +144,9 @@ class BackpropNetwork: for activation, target in zip(output_activations, target_values): error = target - activation losses.append(0.5 * error * error) - output_deltas.append(error * activation * (1.0 - activation)) + output_deltas.append( + error * self.activation_slope * activation * (1.0 - activation) + ) deltas[-1] = output_deltas for layer_index in range(len(deltas) - 2, -1, -1): @@ -150,7 +158,9 @@ class BackpropNetwork: downstream = 0.0 for next_neuron_index, next_delta in enumerate(next_deltas): downstream += next_delta * next_weights[next_neuron_index][neuron_index] - current_deltas.append(activation * (1.0 - activation) * downstream) + current_deltas.append( + self.activation_slope * activation * (1.0 - activation) * downstream + ) deltas[layer_index] = current_deltas for layer_index, (layer_weights, layer_biases) in enumerate(zip(self.weights, self.biases)): @@ -202,7 +212,7 @@ class BackpropNetwork: next_values: list[float] = [] for neuron_weights, bias in zip(layer_weights, layer_biases): total = sum(weight * value for weight, value in zip(neuron_weights, current)) + bias - next_values.append(sigmoid(total)) + next_values.append(sigmoid(total, slope=self.activation_slope)) activations.append(next_values) current = next_values return activations @@ -212,6 +222,7 @@ class BackpropNetwork: "layer_sizes": list(self.layer_sizes), "learning_rate": self.learning_rate, "momentum": self.momentum, + "activation_slope": self.activation_slope, "weights": self.weights, "biases": self.biases, "last_weight_updates": self.last_weight_updates, @@ -224,6 +235,7 @@ class BackpropNetwork: layer_sizes=tuple(int(value) for value in data["layer_sizes"]), # type: ignore[index] learning_rate=float(data["learning_rate"]), momentum=float(data["momentum"]), + activation_slope=float(data.get("activation_slope", 1.0)), weights=[ [ [float(weight) for weight in neuron] diff --git a/tests/test_adaline.py b/tests/test_adaline.py new file mode 100644 index 0000000..e8f8583 --- /dev/null +++ b/tests/test_adaline.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +import random + +from synaptopus.adaline import AdalineNetwork, MadalineNetwork + + +def test_adaline_learns_bipolar_and() -> None: + network = AdalineNetwork.random( + input_size=2, + learning_rate=0.2, + rng=random.Random(5), + ) + samples = ( + ((-1.0, -1.0), -1.0), + ((-1.0, 1.0), -1.0), + ((1.0, -1.0), -1.0), + ((1.0, 1.0), 1.0), + ) + + for _ in range(30): + for inputs, target in samples: + network.train_step(inputs, target) + + predictions = { + inputs: network.classify(inputs) + for inputs, _ in samples + } + + assert predictions[(-1.0, -1.0)] == -1 + assert predictions[(-1.0, 1.0)] == -1 + assert predictions[(1.0, -1.0)] == -1 + assert predictions[(1.0, 1.0)] == 1 + + +def test_adaline_round_trips_through_dict() -> None: + network = AdalineNetwork.random( + input_size=3, + learning_rate=0.15, + rng=random.Random(8), + ) + + restored = AdalineNetwork.from_dict(network.to_dict()) + + assert restored.input_size == network.input_size + assert restored.weights == network.weights + assert restored.bias == network.bias + + +def test_madaline_learns_bipolar_or() -> None: + network = MadalineNetwork.random( + input_size=2, + hidden_unit_count=2, + learning_rate=0.15, + rng=random.Random(9), + ) + samples = ( + ((-1.0, -1.0), -1), + ((-1.0, 1.0), 1), + ((1.0, -1.0), 1), + ((1.0, 1.0), 1), + ) + + for _ in range(60): + for inputs, target in samples: + network.train_step(inputs, target) + + predictions = { + inputs: network.classify(inputs) + for inputs, _ in samples + } + + assert predictions[(-1.0, -1.0)] == -1 + assert predictions[(-1.0, 1.0)] == 1 + assert predictions[(1.0, -1.0)] == 1 + assert predictions[(1.0, 1.0)] == 1 diff --git a/tests/test_backprop.py b/tests/test_backprop.py index 56cf010..79a3889 100644 --- a/tests/test_backprop.py +++ b/tests/test_backprop.py @@ -59,11 +59,13 @@ def test_backprop_round_trips_through_dict() -> None: input_size=2, hidden_layers=(3, 2), output_size=1, + activation_slope=1.5, rng=random.Random(3), ) restored = BackpropNetwork.from_dict(network.to_dict()) assert restored.layer_sizes == network.layer_sizes + assert restored.activation_slope == network.activation_slope assert restored.weights == network.weights assert restored.biases == network.biases