foreignthon-core/src/foreignthon/transpiler.py

from __future__ import annotations

import io
import tokenize
from pathlib import Path

from .pack import load_pack


def transpile(source: str, lang_code: str) -> str:
    """
    Transpile foreign-language Python source to standard Python.
    Uses the tokenizer so strings and comments are never touched.
    """
    pack = load_pack(lang_code)

    # Build a single flat lookup: foreign token -> English token
    mapping: dict[str, str] = {}
    mapping.update(pack["keywords"])
    mapping.update(pack["builtins"])
    mapping.update(pack["exceptions"])
    mapping.update(pack["stdlib"])

    tokens_in = tokenize.generate_tokens(io.StringIO(source).readline)
    result: list[str] = []

    prev_end = (1, 0)

    for tok in tokens_in:
        tok_type, tok_string, tok_start, tok_end, _ = tok

        # Preserve original whitespace/indentation between tokens
        start_row, start_col = tok_start
        end_row, end_col = prev_end

        if start_row == end_row:
            result.append(" " * (start_col - end_col))
        else:
            result.append("\n" * (start_row - end_row))
            result.append(" " * start_col)

        # Only swap NAME tokens — leaves strings, comments, ops untouched
        if tok_type == tokenize.NAME and tok_string in mapping:
            result.append(mapping[tok_string])
        else:
            result.append(tok_string)

        prev_end = tok_end

    return "".join(result)


def transpile_file(path: Path) -> str:
    """
    Detect language from file extension (.es.py -> es),
    read the file, and return transpiled Python source.
    """
    lang_code = _detect_lang(path)
    source = path.read_text(encoding="utf-8")

    # Allow shebang-style override: # foreignthon: fr
    lang_code = _check_shebang(source, lang_code)

    return transpile(source, lang_code)


def run_transpiled(original_path: Path, transpiled: str) -> None:
    """
    Execute transpiled source while making tracebacks point
    to the original .es.py file, not a temp file.
    """
    import linecache

    filename = str(original_path.resolve())

    # Register original source lines so traceback displays them correctly
    original_lines = original_path.read_text(encoding="utf-8").splitlines(keepends=True)
    linecache.cache[filename] = (
        len(original_lines),
        None,
        original_lines,
        filename,
    )

    # Compile with original filename — this is what sets it in the traceback
    code = compile(transpiled, filename, "exec")

    glob = {"__file__": filename, "__name__": "__main__"}
    exec(code, glob)


def _detect_lang(path: Path) -> str:
    """Extract lang code from extension, e.g. script.es.py -> es."""
    suffixes = path.suffixes  # e.g. ['.es', '.py']
    if len(suffixes) >= 2 and suffixes[-1] == ".py":
        return suffixes[-2].lstrip(".")
    raise ValueError(
        f"Cannot detect language from filename '{path.name}'. "
        "Expected format: script.<lang>.py (e.g. script.es.py)"
    )


def _check_shebang(source: str, default: str) -> str:
    """Check first line for # foreignthon: <lang> override."""
    first_line = source.splitlines()[0] if source else ""
    if first_line.startswith("# foreignthon:"):
        return first_line.split(":", 1)[1].strip()
    return default