109 lines
3.2 KiB
Python
109 lines
3.2 KiB
Python
from __future__ import annotations
|
|
|
|
import io
|
|
import tokenize
|
|
from pathlib import Path
|
|
|
|
from .pack import load_pack
|
|
|
|
|
|
def transpile(source: str, lang_code: str) -> str:
|
|
"""
|
|
Transpile foreign-language Python source to standard Python.
|
|
Uses the tokenizer so strings and comments are never touched.
|
|
"""
|
|
pack = load_pack(lang_code)
|
|
|
|
# Build a single flat lookup: foreign token -> English token
|
|
mapping: dict[str, str] = {}
|
|
mapping.update(pack["keywords"])
|
|
mapping.update(pack["builtins"])
|
|
mapping.update(pack["exceptions"])
|
|
mapping.update(pack["stdlib"])
|
|
|
|
tokens_in = tokenize.generate_tokens(io.StringIO(source).readline)
|
|
result: list[str] = []
|
|
|
|
prev_end = (1, 0)
|
|
|
|
for tok in tokens_in:
|
|
tok_type, tok_string, tok_start, tok_end, _ = tok
|
|
|
|
# Preserve original whitespace/indentation between tokens
|
|
start_row, start_col = tok_start
|
|
end_row, end_col = prev_end
|
|
|
|
if start_row == end_row:
|
|
result.append(" " * (start_col - end_col))
|
|
else:
|
|
result.append("\n" * (start_row - end_row))
|
|
result.append(" " * start_col)
|
|
|
|
# Only swap NAME tokens — leaves strings, comments, ops untouched
|
|
if tok_type == tokenize.NAME and tok_string in mapping:
|
|
result.append(mapping[tok_string])
|
|
else:
|
|
result.append(tok_string)
|
|
|
|
prev_end = tok_end
|
|
|
|
return "".join(result)
|
|
|
|
|
|
def transpile_file(path: Path) -> str:
|
|
"""
|
|
Detect language from file extension (.es.py -> es),
|
|
read the file, and return transpiled Python source.
|
|
"""
|
|
lang_code = _detect_lang(path)
|
|
source = path.read_text(encoding="utf-8")
|
|
|
|
# Allow shebang-style override: # foreignthon: fr
|
|
lang_code = _check_shebang(source, lang_code)
|
|
|
|
return transpile(source, lang_code)
|
|
|
|
|
|
def run_transpiled(original_path: Path, transpiled: str) -> None:
|
|
"""
|
|
Execute transpiled source while making tracebacks point
|
|
to the original .es.py file, not a temp file.
|
|
"""
|
|
import linecache
|
|
|
|
filename = str(original_path.resolve())
|
|
|
|
# Register original source lines so traceback displays them correctly
|
|
original_lines = original_path.read_text(encoding="utf-8").splitlines(keepends=True)
|
|
linecache.cache[filename] = (
|
|
len(original_lines),
|
|
None,
|
|
original_lines,
|
|
filename,
|
|
)
|
|
|
|
# Compile with original filename — this is what sets it in the traceback
|
|
code = compile(transpiled, filename, "exec")
|
|
|
|
glob = {"__file__": filename, "__name__": "__main__"}
|
|
exec(code, glob)
|
|
|
|
|
|
def _detect_lang(path: Path) -> str:
|
|
"""Extract lang code from extension, e.g. script.es.py -> es."""
|
|
suffixes = path.suffixes # e.g. ['.es', '.py']
|
|
if len(suffixes) >= 2 and suffixes[-1] == ".py":
|
|
return suffixes[-2].lstrip(".")
|
|
raise ValueError(
|
|
f"Cannot detect language from filename '{path.name}'. "
|
|
"Expected format: script.<lang>.py (e.g. script.es.py)"
|
|
)
|
|
|
|
|
|
def _check_shebang(source: str, default: str) -> str:
|
|
"""Check first line for # foreignthon: <lang> override."""
|
|
first_line = source.splitlines()[0] if source else ""
|
|
if first_line.startswith("# foreignthon:"):
|
|
return first_line.split(":", 1)[1].strip()
|
|
return default
|