chess-engine/python/python/model/feature_extractor.py

"""Extract NNUE features from FEN strings"""

import chess
from chess import Board as chess_board
from python.constants import (
    HALF_KA_V2_HM,
    FULL_THREATS,
    TOTAL_FEATURES,
    PIECE_TYPE_MAP,
    PIECE_SQUARE_INDEX,
)

# King bucket indices (56 squares / 8 buckets = 7 squares per bucket)
# Each bucket maps 7 consecutive squares to the same bucket index (0-7)
KING_BUCKETS = [
    0,
    0,
    0,
    0,
    0,
    0,
    0,  # Bucket 0: squares 0-6
    1,
    1,
    1,
    1,
    1,
    1,
    1,  # Bucket 1: squares 7-13
    2,
    2,
    2,
    2,
    2,
    2,
    2,  # Bucket 2: squares 14-20
    3,
    3,
    3,
    3,
    3,
    3,
    3,  # Bucket 3: squares 21-27
    4,
    4,
    4,
    4,
    4,
    4,
    4,  # Bucket 4: squares 28-34
    5,
    5,
    5,
    5,
    5,
    5,
    5,  # Bucket 5: squares 35-41
    6,
    6,
    6,
    6,
    6,
    6,
    6,  # Bucket 6: squares 42-48
    7,
    7,
    7,
    7,
    7,
    7,
    7,  # Bucket 7: squares 49-55
]


def fen_to_features(fen: str) -> list:
    """
    Convert FEN to 61,072 feature vector.

    Features:
    - HalfKAv2_hm: 352 features (piece-square + king buckets)
    - FullThreats: 60,720 features (attack relationships)

    Returns:
        list: Feature vector of length 61,072
    """
    features = [0.0] * TOTAL_FEATURES

    b = chess_board(fen)
    perspective = int(b.turn)  # 0 for white, 1 for black

    # Compute orientation offset based on king position
    ksq = None
    for sq in range(64):
        piece = b.piece_at(sq)
        if piece and piece.unicode_symbol() in (
            "\u265a",
            "\u2654",
        ):  # White or black king
            ksq = sq
            break

    # Compute orientation offset (based on Stockfish NNUE formula)
    PIECE_SQUARE_INDEX_OFFSET = PIECE_SQUARE_INDEX[perspective][0]
    orient_offset = PIECE_SQUARE_INDEX_OFFSET ^ (56 * perspective)

    # Extract HalfKAv2_hm features (352 features)
    # Encoding: oriented_piece_sq * 6 + piece_type for pieces (56 squares * 6 = 336 features)
    # King buckets: 16 features (8 buckets * 2 perspectives)

    # Compute orientation offset for perspective
    PIECE_SQUARE_INDEX_OFFSET = PIECE_SQUARE_INDEX[perspective][0]
    orient_offset = PIECE_SQUARE_INDEX_OFFSET ^ (56 * perspective)

    # Piece-square encoding (336 features) using oriented squares
    for piece_sq in range(64):  # All 64 squares
        piece = b.piece_at(piece_sq)
        if piece is None:
            continue

        piece_type = PIECE_TYPE_MAP.get(piece.unicode_symbol())
        if piece_type is None:
            continue

        # Compute oriented square
        oriented_sq = piece_sq ^ PIECE_SQUARE_INDEX_OFFSET ^ (56 * perspective)
        oriented_sq = oriented_sq ^ (56 * perspective)

        # Use oriented square as index (0-55 for HalfKAv2_hm)
        if oriented_sq < 56:
            feature_idx = oriented_sq * 6 + piece_type
            features[feature_idx] = 1.0

    # King bucket encoding (16 features)
    # Set king bucket features based on actual king position
    king_buckets = {}  # bucket_idx -> perspective
    for sq in range(64):  # All squares
        piece = b.piece_at(sq)
        if piece and piece.unicode_symbol() in ("\u265a", "\u2654"):  # King
            perspective_king = 1 if piece.color == chess.WHITE else 0
            # Compute oriented king square
            oriented_ksq = sq ^ PIECE_SQUARE_INDEX_OFFSET ^ (56 * perspective)
            oriented_ksq = oriented_ksq ^ (56 * perspective)
            # Get bucket index (0-7)
            bucket_idx = oriented_ksq % 8  # Use mod 8 to keep in range
            # Only set if not already set (prefer white king perspective)
            if bucket_idx not in king_buckets:
                king_buckets[bucket_idx] = perspective_king

    # Set king bucket features
    for bucket_idx, perspective_king in king_buckets.items():
        feature_idx = 336 + bucket_idx * 8 + perspective_king
        features[feature_idx] = 1.0

    return features

    # Skip FullThreats for now - requires exact Stockfish formula
    # FullThreats: 60,720 features encoding attack relationships
    # Formula: Index = lut1[attacker][attacked][from<to] + offsets[from] + lut2[from][to]
    # This requires careful study of Stockfish NNUE source code

    return features