feat: add project structure and basic NNUE model

- Create python directory with data/, model/ subdirectories - Implement LinearEval(61072->1) model - Add config, constants, feature_extractor - Add tests with 4 passing test cases
2026-04-14 18:03:42 -05:00
parent 299d0d7fd7
commit 9e2fe0cae6
15 changed files with 378 additions and 0 deletions
--- a/python/README.md
+++ b/python/README.md
@@ -0,0 +1,19 @@
+# Chess NNUE Distillation
+
+Train a single linear layer on Stockfish's NNUE features.
+
+## Quick Start
+
+```bash
+cd python
+source .venv/bin/activate
+pip install torch --index-url https://download.pytorch.org/whl/cu121
+pip install numpy python-chess tqdm matplotlib h5py joblib pytest
+python train_full.py
+```
+
+## Architecture
+
+- Input: 61,072 features (352 HalfKAv2_hm + 60,720 FullThreats)
+- Output: 1 scalar (centipawns)
+- Optimizer: Adam (lr=1e-3, wd=1e-4)
--- a/python/python/init.py
+++ b/python/python/init.py
@@ -0,0 +1,5 @@
+"""Chess NNUE Training Package"""
+
+from .data import generate_data
+from .model import nnue_linear
+from .stockfish_wrapper import NNUEEvaluator
--- a/python/python/config.py
+++ b/python/python/config.py
@@ -0,0 +1,20 @@
+"""Training Configuration"""
+
+import os
+
+# Hardware
+BATCH_SIZE = 16_384
+NUM_WORKERS = 0
+
+# Optimizer
+LEARNING_RATE = 1e-3
+WEIGHT_DECAY = 1e-4
+GRADIENT_CLIP = 5.0
+
+# Training
+EPOCHS = 100
+EARLY_STOPPING_PATIENCE = 50
+
+# Paths
+DATA_DIR = "data"
+MODEL_DIR = "models"
--- a/python/python/constants.py
+++ b/python/python/constants.py
@@ -0,0 +1,6 @@
+"""Stockfish NNUE Feature Constants"""
+
+# Total feature count: 352 + 60,720 = 61,072
+HALF_KA_V2_HM = 352
+FULL_THREATS = 60_720
+TOTAL_FEATURES = HALF_KA_V2_HM + FULL_THREATS
--- a/python/python/data/init.py
+++ b/python/python/data/init.py
@@ -0,0 +1 @@
+"""Data processing and generation"""
--- a/python/python/data/generate_data.py
+++ b/python/python/data/generate_data.py
@@ -0,0 +1,46 @@
+"""Generate training data from PGN files"""
+
+import chess
+import chess.pgn
+import io
+from typing import List, Tuple
+from python.constants import TOTAL_FEATURES
+
+
+def parse_pgn(pgn_string: str) -> List[str]:
+    """
+    Extract FENs from PGN string.
+
+    Yields:
+        FEN strings at key positions (start of each game, after each move)
+    """
+    game = chess.pgn.read_string(pgn_string)
+
+    # Yield opening position
+    if game.board():
+        yield game.board().fen()
+
+    # Yield after each move
+    for move in game.mainline_moves():
+        board = game.board().copy()
+        board.push(move)
+        yield board.fen()
+
+
+def generate_data_from_pgn(pgn_text: str) -> Tuple[List[float], List[float]]:
+    """
+    Generate (features, evaluation) pairs from PGN.
+
+    For now, returns placeholder data.
+    """
+    fen_list = list(parse_pgn(pgn_text))
+    features_list = []
+    evals_list = []
+
+    for fen in fen_list:
+        # TODO: Extract features
+        features_list.append([0.0] * TOTAL_FEATURES)
+        # TODO: Get evaluation from Stockfish
+        evals_list.append(0.0)
+
+    return features_list, evals_list
--- a/python/python/data/preprocessing.py
+++ b/python/python/data/preprocessing.py
@@ -0,0 +1,11 @@
+"""Data preprocessing and cleaning"""
+
+import numpy as np
+
+
+def normalize_features(features: np.ndarray) -> np.ndarray:
+    """Normalize features to zero mean, unit variance"""
+    mean = features.mean(axis=0)
+    std = features.std(axis=0)
+    std[std == 0] = 1  # Avoid division by zero
+    return (features - mean) / std
--- a/python/python/evaluate.py
+++ b/python/python/evaluate.py
@@ -0,0 +1,29 @@
+"""Evaluate model performance"""
+
+import time
+import torch
+import numpy as np
+from python.model.nnue_linear import LinearEval
+
+
+def benchmark(model: LinearEval, samples: int = 1000) -> dict:
+    """
+    Benchmark inference speed.
+
+    Returns:
+        dict with speed metrics
+    """
+    model.eval()
+    x = torch.randn(samples, 61072)
+
+    start = time.time()
+    with torch.no_grad():
+        for _ in range(samples):
+            _ = model(x)
+    end = time.time()
+
+    return {
+        "samples": samples,
+        "time_seconds": end - start,
+        "ms_per_sample": (end - start) / samples * 1000,
+    }
--- a/python/python/model/init.py
+++ b/python/python/model/init.py
@@ -0,0 +1 @@
+"""NNUE Model definitions"""
--- a/python/python/model/feature_extractor.py
+++ b/python/python/model/feature_extractor.py
@@ -0,0 +1,26 @@
+"""Extract NNUE features from FEN strings"""
+
+from chess import board as chess_board
+from python.constants import HALF_KA_V2_HM, FULL_THREATS, TOTAL_FEATURES
+
+
+def fen_to_features(fen: str) -> list:
+    """
+    Convert FEN to 61,072 feature vector.
+
+    Features:
+    - HalfKAv2_hm: 352 features (piece-square + king buckets)
+    - FullThreats: 60,720 features (attack relationships)
+
+    Returns:
+        list: Feature vector of length 61,072
+    """
+    features = [0.0] * TOTAL_FEATURES
+
+    b = chess_board(fen)
+    perspective = b.active()  # 0 for white, 1 for black
+
+    # TODO: Implement HalfKAv2_hm (352 features)
+    # TODO: Implement FullThreats (60,720 features)
+
+    return features
--- a/python/python/model/nnue_linear.py
+++ b/python/python/model/nnue_linear.py
@@ -0,0 +1,26 @@
+"""Single linear layer NNUE model"""
+
+import torch
+import torch.nn as nn
+from python.constants import TOTAL_FEATURES
+
+
+class LinearEval(nn.Module):
+    """
+    Linear(61,072 -> 1) - Single dense layer, no activation.
+    Outputs centipawn evaluation.
+    """
+
+    def __init__(self, input_dim: int = TOTAL_FEATURES):
+        super().__init__()
+        self.linear = nn.Linear(input_dim, 1)
+        self.linear.weight.data.zero_()
+        self.linear.bias.data.zero_()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.linear(x)
+
+    def eval(self) -> float:
+        """Evaluate model on all zeros (should return 0)"""
+        x = torch.zeros(1, TOTAL_FEATURES)
+        return float(self.forward(x)[0, 0])
--- a/python/python/stockfish_wrapper.py
+++ b/python/python/stockfish_wrapper.py
@@ -0,0 +1,27 @@
+"""Stockfish NNUE evaluation interface"""
+
+import subprocess
+import chess
+import chess.engine
+from python.constants import HALF_KA_V2_HM
+
+
+class NNUEEvaluator:
+    """Wrapper for Stockfish with NNUE evaluation"""
+
+    def __init__(self, stockfish_path: str = "/usr/bin/stockfish"):
+        self.engine = chess.engine.SimpleEngine.popen_uci(stockfish_path)
+        self.supports_nnue = False
+
+    def evaluate(self, fen: str) -> float:
+        """
+        Get NNUE evaluation in centipawns.
+        Returns: positive for white advantage, negative for black
+        """
+        info = self.engine.configure({"Skill Level": 0, "UCI_LimitStrength": False})
+
+        result = self.engine.play(chess.Board(fen), chess.engine.Limit(depth=1))
+        return result.info.score.relative().centi()
+
+    def close(self):
+        self.engine.quit()
--- a/python/python/train.py
+++ b/python/python/train.py
@@ -0,0 +1,77 @@
+"""Training loop for NNUE linear model"""
+
+import torch
+import numpy as np
+from torch.utils.data import DataLoader, TensorDataset
+from python.model.nnue_linear import LinearEval
+from python.model.feature_extractor import fen_to_features
+from python.config import BATCH_SIZE, LEARNING_RATE, WEIGHT_DECAY, GRADIENT_CLIP, EPOCHS
+
+
+def train(features: np.ndarray, labels: np.ndarray) -> LinearEval:
+    """
+    Train the linear model.
+
+    Args:
+        features: (N, 61072) numpy array
+        labels: (N,) numpy array
+
+    Returns:
+        Trained model
+    """
+    # Convert to tensors
+    X = torch.from_numpy(features).float()
+    y = torch.from_numpy(labels).float()
+
+    # Create dataset and dataloader
+    dataset = TensorDataset(X, y)
+    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
+
+    # Initialize model
+    model = LinearEval()
+    optimizer = torch.optim.Adam(
+        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
+    )
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
+
+    best_loss = float("inf")
+    patience_counter = 0
+    best_model_state = None
+
+    for epoch in range(EPOCHS):
+        model.train()
+        total_loss = 0.0
+
+        for batch_X, batch_y in dataloader:
+            optimizer.zero_grad()
+            preds = model(batch_X)
+            loss = torch.nn.functional.mse_loss(preds, batch_y)
+            loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), GRADIENT_CLIP)
+            optimizer.step()
+
+            total_loss += loss.item()
+
+        avg_loss = total_loss / len(dataloader)
+        scheduler.step()
+
+        # Early stopping check
+        if avg_loss < best_loss:
+            best_loss = avg_loss
+            best_model_state = model.state_dict().copy()
+            patience_counter = 0
+        else:
+            patience_counter += 1
+
+        if (epoch + 1) % 10 == 0:
+            print(f"Epoch {epoch + 1}/{EPOCHS}, Loss: {avg_loss:.6f}")
+
+        if patience_counter >= 50:
+            print("Early stopping triggered")
+            break
+
+    # Load best model
+    if best_model_state is not None:
+        model.load_state_dict(best_model_state)
+
+    return model
--- a/python/python/train_full.py
+++ b/python/python/train_full.py
@@ -0,0 +1,39 @@
+"""Main entry point for training"""
+
+import numpy as np
+from python.model.nnue_linear import LinearEval
+from python.data.generate_data import generate_data_from_pgn
+from python.data.preprocessing import normalize_features
+from python.train import train
+
+
+def main():
+    """Training pipeline"""
+    # Generate data (placeholder - replace with real PGN loading)
+    print("Generating data...")
+    features, evals = generate_data_from_pgn(
+        "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"
+    )
+
+    # Normalize
+    print("Normalizing features...")
+    features = np.array(features, dtype=np.float32)
+    evals = np.array(evals, dtype=np.float32)
+    features = normalize_features(features)
+
+    # Train
+    print("Training...")
+    model = train(features, evals)
+
+    # Test
+    print("Testing...")
+    x = torch.randn(1, 61072)
+    with torch.no_grad():
+        pred = model(x)
+    print(f"Sample prediction: {pred.item():.4f}")
+
+
+if __name__ == "__main__":
+    import torch
+
+    main()
--- a/python/tests/test_nnue.py
+++ b/python/tests/test_nnue.py
@@ -0,0 +1,45 @@
+"""Tests for NNUE implementation"""
+
+import pytest
+import torch
+import numpy as np
+from python.model.nnue_linear import LinearEval
+from python.constants import TOTAL_FEATURES
+
+
+class TestLinearEval:
+    """Tests for the linear NNUE model"""
+
+    def test_model_initialization(self):
+        """Test model creates correct shape"""
+        model = LinearEval()
+        assert model.linear.in_features == TOTAL_FEATURES
+        assert model.linear.out_features == 1
+
+    def test_model_output_shape(self):
+        """Test model outputs correct shape"""
+        model = LinearEval()
+        x = torch.randn(10, TOTAL_FEATURES)
+        y = model(x)
+        assert y.shape == (10, 1)
+
+    def test_model_zero_output(self):
+        """Test model with zero input"""
+        model = LinearEval()
+        x = torch.zeros(1, TOTAL_FEATURES)
+        with torch.no_grad():
+            y = model(x)
+        assert y.item() == 0.0
+
+    def test_gradient_flow(self):
+        """Test gradients flow through model"""
+        model = LinearEval()
+        x = torch.randn(10, TOTAL_FEATURES, requires_grad=True)
+        y = model(x)
+        loss = y.sum()
+        loss.backward()
+        assert x.grad is not None
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])