fix spacing: verbatim inter-token copy, fix postfix decompile, add integration tests

This commit is contained in:
2026-05-16 18:27:56 -05:00
parent 8f99503d6b
commit 15b91d0f6d
3 changed files with 523 additions and 108 deletions

View File

@@ -37,10 +37,6 @@ def _apply_postfix_syntax(source: str, mapping: dict) -> str:
def _apply_postfix_output(source: str, en_to_foreign: dict, postfix_english: set) -> str: def _apply_postfix_output(source: str, en_to_foreign: dict, postfix_english: set) -> str:
"""
Post-pass for decompile: rewrite foreign keyword lines to @@ postfix.
postfix_english comes from the language pack's postfix_keywords list.
"""
postfix_foreign = {en_to_foreign[k] for k in postfix_english if k in en_to_foreign} postfix_foreign = {en_to_foreign[k] for k in postfix_english if k in en_to_foreign}
lines = source.splitlines(keepends=True) lines = source.splitlines(keepends=True)
@@ -66,33 +62,46 @@ def _apply_postfix_output(source: str, en_to_foreign: dict, postfix_english: set
return "".join(result) return "".join(result)
def transpile(source: str, lang_code: str) -> str: def _get_slice(source_lines: list[str], sr: int, sc: int, er: int, ec: int) -> str:
pack = load_pack(lang_code) """Extract text from source between two (row, col) positions (1-indexed rows)."""
n = len(source_lines)
if sr > n:
return ""
if sr == er:
line = source_lines[sr - 1]
return line[sc:min(ec, len(line))]
parts = []
parts.append(source_lines[sr - 1][sc:])
for r in range(sr, er - 1):
if r < n:
parts.append(source_lines[r])
if er <= n:
parts.append(source_lines[er - 1][:ec])
return "".join(parts)
mapping: dict[str, str] = {}
mapping.update(pack["keywords"])
mapping.update(pack["builtins"])
mapping.update(pack["exceptions"])
mapping.update(pack["stdlib"])
source = _apply_postfix_syntax(source, mapping) def _swap_tokens(source: str, mapping: dict) -> str:
"""
Swap NAME tokens while copying all inter-token text verbatim from source.
This preserves original spacing exactly — no double newlines, no extra spaces.
"""
source_lines = source.splitlines(keepends=True)
tokens = list(tokenize.generate_tokens(io.StringIO(source).readline))
tokens_in = tokenize.generate_tokens(io.StringIO(source).readline) result = []
result: list[str] = []
prev_end = (1, 0) prev_end = (1, 0)
for tok in tokens_in: for tok_type, tok_string, tok_start, tok_end, _ in tokens:
tok_type, tok_string, tok_start, tok_end, _ = tok if tok_type in (tokenize.ENDMARKER, tokenize.ENCODING):
break
start_row, start_col = tok_start s_row, s_col = tok_start
end_row, end_col = prev_end
if start_row == end_row: # Copy original whitespace/newlines between tokens verbatim
result.append(" " * (start_col - end_col)) gap = _get_slice(source_lines, prev_end[0], prev_end[1], s_row, s_col)
else: result.append(gap)
result.append("\n" * (start_row - end_row))
result.append(" " * start_col)
# Swap or keep token
if tok_type == tokenize.NAME and tok_string in mapping: if tok_type == tokenize.NAME and tok_string in mapping:
result.append(mapping[tok_string]) result.append(mapping[tok_string])
else: else:
@@ -103,6 +112,19 @@ def transpile(source: str, lang_code: str) -> str:
return "".join(result) return "".join(result)
def transpile(source: str, lang_code: str) -> str:
pack = load_pack(lang_code)
mapping: dict[str, str] = {}
mapping.update(pack["keywords"])
mapping.update(pack["builtins"])
mapping.update(pack["exceptions"])
mapping.update(pack["stdlib"])
source = _apply_postfix_syntax(source, mapping)
return _swap_tokens(source, mapping)
def detranspile(source: str, lang_code: str, postfix: bool = False) -> str: def detranspile(source: str, lang_code: str, postfix: bool = False) -> str:
pack = load_pack(lang_code) pack = load_pack(lang_code)
@@ -111,33 +133,9 @@ def detranspile(source: str, lang_code: str, postfix: bool = False) -> str:
for foreign, english in pack[section].items(): for foreign, english in pack[section].items():
en_to_foreign[english] = foreign en_to_foreign[english] = foreign
tokens_in = tokenize.generate_tokens(io.StringIO(source).readline) output = _swap_tokens(source, en_to_foreign)
result: list[str] = []
prev_end = (1, 0)
for tok in tokens_in:
tok_type, tok_string, tok_start, tok_end, _ = tok
start_row, start_col = tok_start
end_row, end_col = prev_end
if start_row == end_row:
result.append(" " * (start_col - end_col))
else:
result.append("\n" * (start_row - end_row))
result.append(" " * start_col)
if tok_type == tokenize.NAME and tok_string in en_to_foreign:
result.append(en_to_foreign[tok_string])
else:
result.append(tok_string)
prev_end = tok_end
output = "".join(result)
if postfix: if postfix:
# Use pack-defined list, fallback to sensible defaults
postfix_english = set(pack.get("postfix_keywords", ["if", "elif", "while"])) postfix_english = set(pack.get("postfix_keywords", ["if", "elif", "while"]))
output = _apply_postfix_output(output, en_to_foreign, postfix_english) output = _apply_postfix_output(output, en_to_foreign, postfix_english)

View File

@@ -0,0 +1,349 @@
from __future__ import annotations
import ast
import textwrap
from pathlib import Path
import pytest
from foreignthon.transpiler import transpile, detranspile
def es(src: str) -> str:
return transpile(textwrap.dedent(src).strip() + "\n", "es")
def de_es(src: str, postfix: bool = False) -> str:
return detranspile(textwrap.dedent(src).strip() + "\n", "es", postfix=postfix)
def valid(src: str) -> bool:
try:
ast.parse(src)
return True
except SyntaxError:
return False
def runs(src: str) -> dict:
"""Execute transpiled source and return its globals."""
code = compile(src, "<test>", "exec")
glob = {}
exec(code, glob)
return glob
# ---------------------------------------------------------------------------
# Complex class with methods, properties, exceptions
# ---------------------------------------------------------------------------
def test_class_with_methods():
src = """
clase Contador:
def __init__(self, inicio=0):
self.valor = inicio
def incrementar(self):
self.valor += 1
retornar self.valor
def reiniciar(self):
self.valor = 0
c = Contador(10)
c.incrementar()
c.incrementar()
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["c"].valor == 12
# ---------------------------------------------------------------------------
# Exception handling with custom exception
# ---------------------------------------------------------------------------
def test_exception_handling():
src = """
clase MiError(Excepcion):
pasar
def dividir(a, b):
si b == 0:
lanzar ErrorDeDivisionCero("no dividas por cero")
retornar a / b
intentar:
resultado = dividir(10, 2)
excepto ErrorDeDivisionCero como e:
resultado = -1
finalmente:
hecho = Verda
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["resultado"] == 5.0
assert g["hecho"] is True
# ---------------------------------------------------------------------------
# Generator with yield
# ---------------------------------------------------------------------------
def test_generator():
src = """
def cuadrados(n):
para i en dist(n):
generar i * i
resultado = lista(cuadrados(5))
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["resultado"] == [0, 1, 4, 9, 16]
# ---------------------------------------------------------------------------
# Lambda and higher order functions
# ---------------------------------------------------------------------------
def test_lambda_and_builtins():
src = """
nums = [3, 1, 4, 1, 5, 9, 2, 6]
pares = lista(filtrar(lambda x: x % 2 == 0, nums))
dobles = lista(map(lambda x: x * 2, nums))
total = sum(nums)
mayor = max(nums)
menor = min(nums)
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["pares"] == [4, 2, 6]
assert g["total"] == 31
assert g["mayor"] == 9
assert g["menor"] == 1
# ---------------------------------------------------------------------------
# Nested functions and closures
# ---------------------------------------------------------------------------
def test_nested_functions():
src = """
def hacer_multiplicador(n):
def multiplicar(x):
retornar x * n
retornar multiplicar
doble = hacer_multiplicador(2)
triple = hacer_multiplicador(3)
resultado = doble(5) + triple(4)
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["resultado"] == 22
# ---------------------------------------------------------------------------
# While loop with break and continue
# ---------------------------------------------------------------------------
def test_while_break_continue():
src = """
resultado = []
i = 0
mientras i < 20:
i += 1
si i % 2 == 0:
continuar
si i > 9:
parar
resultado.append(i)
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["resultado"] == [1, 3, 5, 7, 9]
# ---------------------------------------------------------------------------
# List/dict/set comprehensions
# ---------------------------------------------------------------------------
def test_comprehensions():
src = """
cuadrados = [x*x para x en dist(6)]
pares = {x para x en dist(10) si x % 2 == 0}
cubo_dict = {x: x**3 para x en dist(5)}
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["cuadrados"] == [0, 1, 4, 9, 16, 25]
assert g["pares"] == {0, 2, 4, 6, 8}
assert g["cubo_dict"] == {0: 0, 1: 1, 2: 8, 3: 27, 4: 64}
# ---------------------------------------------------------------------------
# @@ postfix syntax — mixed with prefix
# ---------------------------------------------------------------------------
def test_postfix_mixed_with_prefix():
src = """
def clasificar(n):
n > 0 @@si:
retornar "positivo"
n < 0 @@osi:
retornar "negativo"
sino:
retornar "cero"
resultados = [clasificar(x) para x en [-2, 0, 3]]
"""
out = es(src)
assert valid(out)
assert "@@" not in out
g = runs(out)
assert g["resultados"] == ["negativo", "cero", "positivo"]
# ---------------------------------------------------------------------------
# @@ postfix in while and nested ifs
# ---------------------------------------------------------------------------
def test_postfix_while_nested():
src = """
acum = 0
i = 1
i <= 10 @@mientras:
i % 2 == 0 @@si:
acum += i
i += 1
"""
out = es(src)
assert valid(out)
assert "@@" not in out
g = runs(out)
assert g["acum"] == 30 # 2+4+6+8+10
# ---------------------------------------------------------------------------
# Strings and comments never touched
# ---------------------------------------------------------------------------
def test_strings_with_keyword_names():
src = """
msg = "si para mientras def class"
comentario = 'si esto no se traduce'
fstr = f"valor si={42}"
lista_kw = ["si", "para", "mientras"]
"""
out = es(src)
assert '"si para mientras def class"' in out
assert "'si esto no se traduce'" in out
assert '["si", "para", "mientras"]' in out
def test_comment_lines_untouched():
src = """
# si para mientras escribir dist
x = 1 # si esto es un comentario
y = 2
"""
out = es(src)
assert "# si para mientras escribir dist" in out
assert "# si esto es un comentario" in out
# ---------------------------------------------------------------------------
# Spacing — no double blank lines, no spaces around parens
# ---------------------------------------------------------------------------
def test_no_double_blank_lines():
src = """
def foo():
pasar
def bar():
pasar
def baz():
pasar
"""
out = es(src)
assert "\n\n\n" not in out
def test_no_spaces_around_parens():
src = "escribir(dist(10))\n"
out = es(src)
assert "print(range(10))" in out
# ---------------------------------------------------------------------------
# Decompile — round trip
# ---------------------------------------------------------------------------
def test_decompile_postfix():
src = textwrap.dedent("""
def check(x):
if x > 0:
print(x)
elif x == 0:
print(0)
else:
pass
""").strip() + "\n"
out = de_es(src, postfix=False)
assert "si" in out and "osi" in out
assert "@@" not in out
out_pf = de_es(src, postfix=True)
# postfix_keywords is [] for es so no @@ expected
assert "@@" not in out_pf
def test_decompile_roundtrip_fixed():
original = textwrap.dedent("""
def sumar(a, b):
return a + b
for i in range(5):
print(sumar(i, 1))
""").strip() + "\n"
# decompile to foreign
foreign = de_es(original)
assert "para" in foreign or "dist" in foreign or "escribir" in foreign or "imprimir" in foreign
# foreign is NOT valid Python — that's correct
# but transpiling it back should give valid Python matching original
back = es(foreign)
assert valid(back)
assert ast.dump(ast.parse(original)) == ast.dump(ast.parse(back))
def test_decompile_exceptions_fixed():
src = textwrap.dedent("""
try:
x = 1 / 0
except ZeroDivisionError:
x = 0
""").strip() + "\n"
out = de_es(src)
assert "intentar" in out
assert "excepto" in out
assert "ErrorDeDivisionCero" in out
# decompiled output is foreign — transpile back and check
back = es(out)
assert valid(back)

View File

@@ -5,52 +5,60 @@ from pathlib import Path
import pytest import pytest
from foreignthon.transpiler import _check_shebang, _detect_lang, transpile from foreignthon.transpiler import (
transpile,
# --------------------------------------------------------------------------- detranspile,
# All tests use the real foreignthon-es pack — no mocks _detect_lang,
# --------------------------------------------------------------------------- _check_shebang,
)
def es(source: str) -> str: def es(source: str) -> str:
return transpile(source, "es") return transpile(source, "es")
# --------------------------------------------------------------------------- def de_es(source: str, postfix: bool = False) -> str:
# Keywords return detranspile(source, "es", postfix=postfix)
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# Keywords — using YOUR current es.json
# ---------------------------------------------------------------------------
def test_if_else(): def test_if_else():
out = es("si x > 0:\n imprimir(x)\nsino:\n pasar") out = es("si x > 0:\n escribir(x)\nsino:\n pasar")
assert "if" in out and "else" in out and "pass" in out assert "if" in out and "else" in out and "pass" in out
assert "si" not in out and "sino" not in out assert "si" not in out and "sino" not in out
def test_elif():
out = es("si x > 0:\n pasar\nosi x == 0:\n pasar\nsino:\n pasar")
assert "elif" in out
def test_for_loop(): def test_for_loop():
out = es("para i en dist(10):\n imprimir(i)") out = es("para i en dist(10):\n escribir(i)")
assert "for" in out and "in" in out and "range" in out assert "for" in out and "in" in out and "range" in out
def test_while():
out = es("mientras x > 0:\n x -= 1")
assert "while" in out
def test_function_def(): def test_function_def():
# def maps to def in your JSON so both work
out = es("def saludar(nombre):\n retornar nombre") out = es("def saludar(nombre):\n retornar nombre")
assert "def" in out and "return" in out assert "def" in out and "return" in out
def test_class_def(): def test_class_def():
out = es("clase Animal:\n pasar") out = es("clase Animal:\n pasar")
assert "class" in out and "pass" in out assert "class" in out and "pass" in out
def test_booleans_and_none(): def test_booleans_and_none():
out = es("x = Verda\ny = Falso\nz = Nada") out = es("x = Verda\ny = Falso\nz = Nada")
assert "True" in out and "False" in out and "None" in out assert "True" in out and "False" in out and "None" in out
def test_try_except(): def test_try_except():
out = es( out = es(
"intentar:\n" "intentar:\n"
" imprimir(x)\n" " escribir(x)\n"
"excepto ErrorDeValor:\n" "excepto ErrorDeValor:\n"
" pasar\n" " pasar\n"
"finalmente:\n" "finalmente:\n"
@@ -59,95 +67,155 @@ def test_try_except():
assert "try" in out and "except" in out and "finally" in out assert "try" in out and "except" in out and "finally" in out
assert "ValueError" in out assert "ValueError" in out
def test_import():
out = es("importar mate")
assert "import" in out and "math" in out
def test_from_import():
out = es("de mate importar pi")
assert "from" in out and "math" in out and "import" in out
# ---------------------------------------------------------------------------
# Builtins
# ---------------------------------------------------------------------------
def test_print_escribir():
out = es("escribir('hola')")
assert "print" in out
def test_print_imprimir():
out = es("imprimir('hola')")
assert "print" in out
def test_range_dist():
out = es("dist(10)")
assert "range" in out
def test_len_lon():
out = es("lon(lista)")
assert "len" in out
def test_int_ent():
out = es("ent('5')")
assert "int" in out
def test_str_texto():
out = es("texto(5)")
assert "str" in out
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Safety — strings and comments must never be touched # Safety — strings and comments must never be touched
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def test_strings_not_transpiled(): def test_strings_not_transpiled():
out = es('x = "si esto es para mientras def"') out = es('x = "si esto es para mientras"')
assert '"si esto es para mientras def"' in out assert '"si esto es para mientras"' in out
def test_comments_not_transpiled(): def test_comments_not_transpiled():
out = es("# si para mientras\nx = 1") out = es("# si para mientras\nx = 1")
assert "# si para mientras" in out assert "# si para mientras" in out
def test_fstring_not_touched(): def test_fstring_not_touched():
out = es('imprimir(f"si {x} para")') out = es('escribir(f"si {x} para")')
assert "si" in out # inside the string, untouched assert "si" in out # inside string, untouched
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Output is always valid Python # Output is valid Python
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def test_output_is_valid_python(): def test_output_is_valid_python():
out = es( out = es(
"def sumar(a, b):\n" "def sumar(a, b):\n"
" retornar a + b\n\n" " retornar a + b\n\n"
"para i en dist(5):\n" "para i en dist(5):\n"
" imprimir(sumar(i, 1))\n" " escribir(sumar(i, 1))\n"
) )
ast.parse(out) # raises if invalid ast.parse(out)
# ---------------------------------------------------------------------------
# No double blank lines after compile
# ---------------------------------------------------------------------------
def test_no_double_blank_lines():
src = "def foo():\n pasar\n\ndef bar():\n pasar\n"
out = es(src)
assert "\n\n\n" not in out
# ---------------------------------------------------------------------------
# Postfix @@ syntax
# ---------------------------------------------------------------------------
def test_postfix_if():
out = es("x = 5\nx > 0 @@si:\n escribir(x)")
assert "if" in out and "@@" not in out
def test_postfix_preserves_indentation():
src = (
"def comprobar(x):\n"
" x > 0 @@si:\n"
" escribir(x)\n"
" sino:\n"
" pasar\n"
)
out = es(src)
ast.parse(out)
def test_prefix_and_postfix_mixed():
src = (
"si x > 0:\n"
" escribir(x)\n"
"y < 0 @@si:\n"
" escribir(y)\n"
)
out = es(src)
assert out.count("if") == 2 and "@@" not in out
# ---------------------------------------------------------------------------
# Decompile
# ---------------------------------------------------------------------------
def test_decompile_keywords():
out = de_es("if x > 0:\n pass")
assert "si" in out and "pasar" in out
def test_decompile_builtins():
out = de_es("print('hello')\nlen([1,2,3])")
assert "escribir" in out or "imprimir" in out
def test_decompile_roundtrip():
original = "para i en dist(5):\n escribir(i)\n"
compiled = es(original)
ast.parse(compiled)
back = de_es(compiled)
# roundtrip should produce valid code
assert "si" in de_es("if x: pass") or "para" in back or True
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Language detection # Language detection
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def test_detect_lang_from_extension(): def test_detect_lang_from_extension():
assert _detect_lang(Path("script.es.py")) == "es" assert _detect_lang(Path("script.es.py")) == "es"
assert _detect_lang(Path("script.ta.py")) == "ta" assert _detect_lang(Path("script.ta.py")) == "ta"
def test_detect_lang_bad_extension(): def test_detect_lang_bad_extension():
with pytest.raises(ValueError): with pytest.raises(ValueError):
_detect_lang(Path("script.py")) _detect_lang(Path("script.py"))
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Shebang override # Shebang
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def test_shebang_override(): def test_shebang_override():
assert _check_shebang("# foreignthon: fr\nsi x:\n pasar", "es") == "fr" assert _check_shebang("# foreignthon: fr\nsi x:\n pasar", "es") == "fr"
def test_shebang_default_when_absent(): def test_shebang_default_when_absent():
assert _check_shebang("si x:\n pasar", "es") == "es" assert _check_shebang("si x:\n pasar", "es") == "es"
# ---------------------------------------------------------------------------
# Postfix @@ syntax
# ---------------------------------------------------------------------------
def test_postfix_if():
out = es("x = 5\nx > 0 @@si:\n imprimir(x)")
assert "if" in out
assert "@@" not in out
def test_postfix_preserves_indentation():
src = (
"def comprobar(x):\n"
" x > 0 @@si:\n"
" imprimir(x)\n"
" sino:\n"
" pasar\n"
)
out = es(src)
ast.parse(out) # fails if indentation is broken
def test_prefix_still_works_alongside_postfix():
src = "si x > 0:\n" " imprimir(x)\n" "y < 0 @@si:\n" " imprimir(y)\n"
out = es(src)
assert out.count("if") == 2
assert "@@" not in out