aded @@ for keyword swapping
This commit is contained in:
@@ -29,11 +29,13 @@ def run(file: Path, lang: str | None, keep: bool):
|
||||
source = f"# foreignthon: {lang}\n" + source
|
||||
file.write_text(source, encoding="utf-8")
|
||||
|
||||
transpiled = transpile_file(file)
|
||||
|
||||
# Activate error hook BEFORE transpiling so even transpile
|
||||
# errors get shown in the foreign language
|
||||
detected_lang = lang or _lang_from_file(file)
|
||||
activate(detected_lang)
|
||||
|
||||
transpiled = transpile_file(file)
|
||||
|
||||
if keep:
|
||||
out_path = file.with_suffix("").with_suffix(".compiled.py")
|
||||
out_path.write_text(transpiled, encoding="utf-8")
|
||||
|
||||
@@ -1,12 +1,50 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import re
|
||||
import tokenize
|
||||
from pathlib import Path
|
||||
|
||||
from .pack import load_pack
|
||||
|
||||
|
||||
def _apply_postfix_syntax(source: str, mapping: dict) -> str:
|
||||
"""
|
||||
Pre-tokenizer pass: handle postfix @@ keyword syntax.
|
||||
x > 0 @@ஆனால்: → ஆனால் x > 0:
|
||||
Indentation is preserved by separating it before rewriting.
|
||||
"""
|
||||
if "@@" not in source:
|
||||
return source
|
||||
|
||||
kw_pattern = "|".join(re.escape(k) for k in sorted(mapping, key=len, reverse=True))
|
||||
postfix_re = re.compile(rf"(.+?)@@({kw_pattern})")
|
||||
|
||||
lines = source.splitlines(keepends=True)
|
||||
result = []
|
||||
|
||||
for line in lines:
|
||||
if "@@" not in line:
|
||||
result.append(line)
|
||||
continue
|
||||
|
||||
# Separate indentation from content so we never lose it
|
||||
stripped = line.lstrip()
|
||||
indent = line[: len(line) - len(stripped)]
|
||||
ending = "\n" if stripped.endswith("\n") else ""
|
||||
content = stripped.rstrip("\n")
|
||||
|
||||
def _replace(m: re.Match) -> str:
|
||||
expr = m.group(1).strip()
|
||||
kw = m.group(2)
|
||||
return f"{kw} {expr}"
|
||||
|
||||
rewritten = indent + postfix_re.sub(_replace, content) + ending
|
||||
result.append(rewritten)
|
||||
|
||||
return "".join(result)
|
||||
|
||||
|
||||
def transpile(source: str, lang_code: str) -> str:
|
||||
"""
|
||||
Transpile foreign-language Python source to standard Python.
|
||||
@@ -14,22 +52,21 @@ def transpile(source: str, lang_code: str) -> str:
|
||||
"""
|
||||
pack = load_pack(lang_code)
|
||||
|
||||
# Build a single flat lookup: foreign token -> English token
|
||||
mapping: dict[str, str] = {}
|
||||
mapping.update(pack["keywords"])
|
||||
mapping.update(pack["builtins"])
|
||||
mapping.update(pack["exceptions"])
|
||||
mapping.update(pack["stdlib"])
|
||||
|
||||
source = _apply_postfix_syntax(source, mapping)
|
||||
|
||||
tokens_in = tokenize.generate_tokens(io.StringIO(source).readline)
|
||||
result: list[str] = []
|
||||
|
||||
prev_end = (1, 0)
|
||||
|
||||
for tok in tokens_in:
|
||||
tok_type, tok_string, tok_start, tok_end, _ = tok
|
||||
|
||||
# Preserve original whitespace/indentation between tokens
|
||||
start_row, start_col = tok_start
|
||||
end_row, end_col = prev_end
|
||||
|
||||
@@ -39,7 +76,6 @@ def transpile(source: str, lang_code: str) -> str:
|
||||
result.append("\n" * (start_row - end_row))
|
||||
result.append(" " * start_col)
|
||||
|
||||
# Only swap NAME tokens — leaves strings, comments, ops untouched
|
||||
if tok_type == tokenize.NAME and tok_string in mapping:
|
||||
result.append(mapping[tok_string])
|
||||
else:
|
||||
@@ -51,47 +87,26 @@ def transpile(source: str, lang_code: str) -> str:
|
||||
|
||||
|
||||
def transpile_file(path: Path) -> str:
|
||||
"""
|
||||
Detect language from file extension (.es.py -> es),
|
||||
read the file, and return transpiled Python source.
|
||||
"""
|
||||
lang_code = _detect_lang(path)
|
||||
source = path.read_text(encoding="utf-8")
|
||||
|
||||
# Allow shebang-style override: # foreignthon: fr
|
||||
lang_code = _check_shebang(source, lang_code)
|
||||
|
||||
return transpile(source, lang_code)
|
||||
|
||||
|
||||
def run_transpiled(original_path: Path, transpiled: str) -> None:
|
||||
"""
|
||||
Execute transpiled source while making tracebacks point
|
||||
to the original .es.py file, not a temp file.
|
||||
"""
|
||||
import linecache
|
||||
|
||||
filename = str(original_path.resolve())
|
||||
|
||||
# Register original source lines so traceback displays them correctly
|
||||
original_lines = original_path.read_text(encoding="utf-8").splitlines(keepends=True)
|
||||
linecache.cache[filename] = (
|
||||
len(original_lines),
|
||||
None,
|
||||
original_lines,
|
||||
filename,
|
||||
)
|
||||
linecache.cache[filename] = (len(original_lines), None, original_lines, filename)
|
||||
|
||||
# Compile with original filename — this is what sets it in the traceback
|
||||
code = compile(transpiled, filename, "exec")
|
||||
|
||||
glob = {"__file__": filename, "__name__": "__main__"}
|
||||
exec(code, glob)
|
||||
|
||||
|
||||
def _detect_lang(path: Path) -> str:
|
||||
"""Extract lang code from extension, e.g. script.es.py -> es."""
|
||||
suffixes = path.suffixes # e.g. ['.es', '.py']
|
||||
suffixes = path.suffixes
|
||||
if len(suffixes) >= 2 and suffixes[-1] == ".py":
|
||||
return suffixes[-2].lstrip(".")
|
||||
raise ValueError(
|
||||
@@ -101,7 +116,6 @@ def _detect_lang(path: Path) -> str:
|
||||
|
||||
|
||||
def _check_shebang(source: str, default: str) -> str:
|
||||
"""Check first line for # foreignthon: <lang> override."""
|
||||
first_line = source.splitlines()[0] if source else ""
|
||||
if first_line.startswith("# foreignthon:"):
|
||||
return first_line.split(":", 1)[1].strip()
|
||||
|
||||
@@ -102,3 +102,35 @@ def test_shebang_override():
|
||||
|
||||
def test_shebang_default_when_absent():
|
||||
assert _check_shebang("si x:\n pasar", "es") == "es"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Postfix @@ syntax
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_postfix_if():
|
||||
out = es("x = 5\nx > 0 @@si:\n imprimir(x)")
|
||||
assert "if" in out
|
||||
assert "@@" not in out
|
||||
|
||||
def test_postfix_preserves_indentation():
|
||||
src = (
|
||||
"definir comprobar(x):\n"
|
||||
" x > 0 @@si:\n"
|
||||
" imprimir(x)\n"
|
||||
" sino:\n"
|
||||
" pasar\n"
|
||||
)
|
||||
out = es(src)
|
||||
ast.parse(out) # fails if indentation is broken
|
||||
|
||||
def test_prefix_still_works_alongside_postfix():
|
||||
src = (
|
||||
"si x > 0:\n"
|
||||
" imprimir(x)\n"
|
||||
"y < 0 @@si:\n"
|
||||
" imprimir(y)\n"
|
||||
)
|
||||
out = es(src)
|
||||
assert out.count("if") == 2
|
||||
assert "@@" not in out
|
||||
|
||||
Reference in New Issue
Block a user