fix spacing: verbatim inter-token copy, fix postfix decompile, add integration tests

This commit is contained in:
2026-05-16 18:27:56 -05:00
parent 8f7d926a17
commit 6cade887fb
3 changed files with 523 additions and 108 deletions

View File

@@ -37,10 +37,6 @@ def _apply_postfix_syntax(source: str, mapping: dict) -> str:
def _apply_postfix_output(source: str, en_to_foreign: dict, postfix_english: set) -> str:
"""
Post-pass for decompile: rewrite foreign keyword lines to @@ postfix.
postfix_english comes from the language pack's postfix_keywords list.
"""
postfix_foreign = {en_to_foreign[k] for k in postfix_english if k in en_to_foreign}
lines = source.splitlines(keepends=True)
@@ -66,33 +62,46 @@ def _apply_postfix_output(source: str, en_to_foreign: dict, postfix_english: set
return "".join(result)
def transpile(source: str, lang_code: str) -> str:
pack = load_pack(lang_code)
def _get_slice(source_lines: list[str], sr: int, sc: int, er: int, ec: int) -> str:
"""Extract text from source between two (row, col) positions (1-indexed rows)."""
n = len(source_lines)
if sr > n:
return ""
if sr == er:
line = source_lines[sr - 1]
return line[sc:min(ec, len(line))]
parts = []
parts.append(source_lines[sr - 1][sc:])
for r in range(sr, er - 1):
if r < n:
parts.append(source_lines[r])
if er <= n:
parts.append(source_lines[er - 1][:ec])
return "".join(parts)
mapping: dict[str, str] = {}
mapping.update(pack["keywords"])
mapping.update(pack["builtins"])
mapping.update(pack["exceptions"])
mapping.update(pack["stdlib"])
source = _apply_postfix_syntax(source, mapping)
def _swap_tokens(source: str, mapping: dict) -> str:
"""
Swap NAME tokens while copying all inter-token text verbatim from source.
This preserves original spacing exactly — no double newlines, no extra spaces.
"""
source_lines = source.splitlines(keepends=True)
tokens = list(tokenize.generate_tokens(io.StringIO(source).readline))
tokens_in = tokenize.generate_tokens(io.StringIO(source).readline)
result: list[str] = []
result = []
prev_end = (1, 0)
for tok in tokens_in:
tok_type, tok_string, tok_start, tok_end, _ = tok
for tok_type, tok_string, tok_start, tok_end, _ in tokens:
if tok_type in (tokenize.ENDMARKER, tokenize.ENCODING):
break
start_row, start_col = tok_start
end_row, end_col = prev_end
s_row, s_col = tok_start
if start_row == end_row:
result.append(" " * (start_col - end_col))
else:
result.append("\n" * (start_row - end_row))
result.append(" " * start_col)
# Copy original whitespace/newlines between tokens verbatim
gap = _get_slice(source_lines, prev_end[0], prev_end[1], s_row, s_col)
result.append(gap)
# Swap or keep token
if tok_type == tokenize.NAME and tok_string in mapping:
result.append(mapping[tok_string])
else:
@@ -103,6 +112,19 @@ def transpile(source: str, lang_code: str) -> str:
return "".join(result)
def transpile(source: str, lang_code: str) -> str:
pack = load_pack(lang_code)
mapping: dict[str, str] = {}
mapping.update(pack["keywords"])
mapping.update(pack["builtins"])
mapping.update(pack["exceptions"])
mapping.update(pack["stdlib"])
source = _apply_postfix_syntax(source, mapping)
return _swap_tokens(source, mapping)
def detranspile(source: str, lang_code: str, postfix: bool = False) -> str:
pack = load_pack(lang_code)
@@ -111,33 +133,9 @@ def detranspile(source: str, lang_code: str, postfix: bool = False) -> str:
for foreign, english in pack[section].items():
en_to_foreign[english] = foreign
tokens_in = tokenize.generate_tokens(io.StringIO(source).readline)
result: list[str] = []
prev_end = (1, 0)
for tok in tokens_in:
tok_type, tok_string, tok_start, tok_end, _ = tok
start_row, start_col = tok_start
end_row, end_col = prev_end
if start_row == end_row:
result.append(" " * (start_col - end_col))
else:
result.append("\n" * (start_row - end_row))
result.append(" " * start_col)
if tok_type == tokenize.NAME and tok_string in en_to_foreign:
result.append(en_to_foreign[tok_string])
else:
result.append(tok_string)
prev_end = tok_end
output = "".join(result)
output = _swap_tokens(source, en_to_foreign)
if postfix:
# Use pack-defined list, fallback to sensible defaults
postfix_english = set(pack.get("postfix_keywords", ["if", "elif", "while"]))
output = _apply_postfix_output(output, en_to_foreign, postfix_english)

349
tests/test_integration.py Normal file
View File

@@ -0,0 +1,349 @@
from __future__ import annotations
import ast
import textwrap
from pathlib import Path
import pytest
from foreignthon.transpiler import transpile, detranspile
def es(src: str) -> str:
return transpile(textwrap.dedent(src).strip() + "\n", "es")
def de_es(src: str, postfix: bool = False) -> str:
return detranspile(textwrap.dedent(src).strip() + "\n", "es", postfix=postfix)
def valid(src: str) -> bool:
try:
ast.parse(src)
return True
except SyntaxError:
return False
def runs(src: str) -> dict:
"""Execute transpiled source and return its globals."""
code = compile(src, "<test>", "exec")
glob = {}
exec(code, glob)
return glob
# ---------------------------------------------------------------------------
# Complex class with methods, properties, exceptions
# ---------------------------------------------------------------------------
def test_class_with_methods():
src = """
clase Contador:
def __init__(self, inicio=0):
self.valor = inicio
def incrementar(self):
self.valor += 1
retornar self.valor
def reiniciar(self):
self.valor = 0
c = Contador(10)
c.incrementar()
c.incrementar()
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["c"].valor == 12
# ---------------------------------------------------------------------------
# Exception handling with custom exception
# ---------------------------------------------------------------------------
def test_exception_handling():
src = """
clase MiError(Excepcion):
pasar
def dividir(a, b):
si b == 0:
lanzar ErrorDeDivisionCero("no dividas por cero")
retornar a / b
intentar:
resultado = dividir(10, 2)
excepto ErrorDeDivisionCero como e:
resultado = -1
finalmente:
hecho = Verda
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["resultado"] == 5.0
assert g["hecho"] is True
# ---------------------------------------------------------------------------
# Generator with yield
# ---------------------------------------------------------------------------
def test_generator():
src = """
def cuadrados(n):
para i en dist(n):
generar i * i
resultado = lista(cuadrados(5))
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["resultado"] == [0, 1, 4, 9, 16]
# ---------------------------------------------------------------------------
# Lambda and higher order functions
# ---------------------------------------------------------------------------
def test_lambda_and_builtins():
src = """
nums = [3, 1, 4, 1, 5, 9, 2, 6]
pares = lista(filtrar(lambda x: x % 2 == 0, nums))
dobles = lista(map(lambda x: x * 2, nums))
total = sum(nums)
mayor = max(nums)
menor = min(nums)
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["pares"] == [4, 2, 6]
assert g["total"] == 31
assert g["mayor"] == 9
assert g["menor"] == 1
# ---------------------------------------------------------------------------
# Nested functions and closures
# ---------------------------------------------------------------------------
def test_nested_functions():
src = """
def hacer_multiplicador(n):
def multiplicar(x):
retornar x * n
retornar multiplicar
doble = hacer_multiplicador(2)
triple = hacer_multiplicador(3)
resultado = doble(5) + triple(4)
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["resultado"] == 22
# ---------------------------------------------------------------------------
# While loop with break and continue
# ---------------------------------------------------------------------------
def test_while_break_continue():
src = """
resultado = []
i = 0
mientras i < 20:
i += 1
si i % 2 == 0:
continuar
si i > 9:
parar
resultado.append(i)
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["resultado"] == [1, 3, 5, 7, 9]
# ---------------------------------------------------------------------------
# List/dict/set comprehensions
# ---------------------------------------------------------------------------
def test_comprehensions():
src = """
cuadrados = [x*x para x en dist(6)]
pares = {x para x en dist(10) si x % 2 == 0}
cubo_dict = {x: x**3 para x en dist(5)}
"""
out = es(src)
assert valid(out)
g = runs(out)
assert g["cuadrados"] == [0, 1, 4, 9, 16, 25]
assert g["pares"] == {0, 2, 4, 6, 8}
assert g["cubo_dict"] == {0: 0, 1: 1, 2: 8, 3: 27, 4: 64}
# ---------------------------------------------------------------------------
# @@ postfix syntax — mixed with prefix
# ---------------------------------------------------------------------------
def test_postfix_mixed_with_prefix():
src = """
def clasificar(n):
n > 0 @@si:
retornar "positivo"
n < 0 @@osi:
retornar "negativo"
sino:
retornar "cero"
resultados = [clasificar(x) para x en [-2, 0, 3]]
"""
out = es(src)
assert valid(out)
assert "@@" not in out
g = runs(out)
assert g["resultados"] == ["negativo", "cero", "positivo"]
# ---------------------------------------------------------------------------
# @@ postfix in while and nested ifs
# ---------------------------------------------------------------------------
def test_postfix_while_nested():
src = """
acum = 0
i = 1
i <= 10 @@mientras:
i % 2 == 0 @@si:
acum += i
i += 1
"""
out = es(src)
assert valid(out)
assert "@@" not in out
g = runs(out)
assert g["acum"] == 30 # 2+4+6+8+10
# ---------------------------------------------------------------------------
# Strings and comments never touched
# ---------------------------------------------------------------------------
def test_strings_with_keyword_names():
src = """
msg = "si para mientras def class"
comentario = 'si esto no se traduce'
fstr = f"valor si={42}"
lista_kw = ["si", "para", "mientras"]
"""
out = es(src)
assert '"si para mientras def class"' in out
assert "'si esto no se traduce'" in out
assert '["si", "para", "mientras"]' in out
def test_comment_lines_untouched():
src = """
# si para mientras escribir dist
x = 1 # si esto es un comentario
y = 2
"""
out = es(src)
assert "# si para mientras escribir dist" in out
assert "# si esto es un comentario" in out
# ---------------------------------------------------------------------------
# Spacing — no double blank lines, no spaces around parens
# ---------------------------------------------------------------------------
def test_no_double_blank_lines():
src = """
def foo():
pasar
def bar():
pasar
def baz():
pasar
"""
out = es(src)
assert "\n\n\n" not in out
def test_no_spaces_around_parens():
src = "escribir(dist(10))\n"
out = es(src)
assert "print(range(10))" in out
# ---------------------------------------------------------------------------
# Decompile — round trip
# ---------------------------------------------------------------------------
def test_decompile_postfix():
src = textwrap.dedent("""
def check(x):
if x > 0:
print(x)
elif x == 0:
print(0)
else:
pass
""").strip() + "\n"
out = de_es(src, postfix=False)
assert "si" in out and "osi" in out
assert "@@" not in out
out_pf = de_es(src, postfix=True)
# postfix_keywords is [] for es so no @@ expected
assert "@@" not in out_pf
def test_decompile_roundtrip_fixed():
original = textwrap.dedent("""
def sumar(a, b):
return a + b
for i in range(5):
print(sumar(i, 1))
""").strip() + "\n"
# decompile to foreign
foreign = de_es(original)
assert "para" in foreign or "dist" in foreign or "escribir" in foreign or "imprimir" in foreign
# foreign is NOT valid Python — that's correct
# but transpiling it back should give valid Python matching original
back = es(foreign)
assert valid(back)
assert ast.dump(ast.parse(original)) == ast.dump(ast.parse(back))
def test_decompile_exceptions_fixed():
src = textwrap.dedent("""
try:
x = 1 / 0
except ZeroDivisionError:
x = 0
""").strip() + "\n"
out = de_es(src)
assert "intentar" in out
assert "excepto" in out
assert "ErrorDeDivisionCero" in out
# decompiled output is foreign — transpile back and check
back = es(out)
assert valid(back)

View File

@@ -5,52 +5,60 @@ from pathlib import Path
import pytest
from foreignthon.transpiler import _check_shebang, _detect_lang, transpile
# ---------------------------------------------------------------------------
# All tests use the real foreignthon-es pack — no mocks
# ---------------------------------------------------------------------------
from foreignthon.transpiler import (
transpile,
detranspile,
_detect_lang,
_check_shebang,
)
def es(source: str) -> str:
return transpile(source, "es")
# ---------------------------------------------------------------------------
# Keywords
# ---------------------------------------------------------------------------
def de_es(source: str, postfix: bool = False) -> str:
return detranspile(source, "es", postfix=postfix)
# ---------------------------------------------------------------------------
# Keywords — using YOUR current es.json
# ---------------------------------------------------------------------------
def test_if_else():
out = es("si x > 0:\n imprimir(x)\nsino:\n pasar")
out = es("si x > 0:\n escribir(x)\nsino:\n pasar")
assert "if" in out and "else" in out and "pass" in out
assert "si" not in out and "sino" not in out
def test_elif():
out = es("si x > 0:\n pasar\nosi x == 0:\n pasar\nsino:\n pasar")
assert "elif" in out
def test_for_loop():
out = es("para i en dist(10):\n imprimir(i)")
out = es("para i en dist(10):\n escribir(i)")
assert "for" in out and "in" in out and "range" in out
def test_while():
out = es("mientras x > 0:\n x -= 1")
assert "while" in out
def test_function_def():
# def maps to def in your JSON so both work
out = es("def saludar(nombre):\n retornar nombre")
assert "def" in out and "return" in out
def test_class_def():
out = es("clase Animal:\n pasar")
assert "class" in out and "pass" in out
def test_booleans_and_none():
out = es("x = Verda\ny = Falso\nz = Nada")
assert "True" in out and "False" in out and "None" in out
def test_try_except():
out = es(
"intentar:\n"
" imprimir(x)\n"
" escribir(x)\n"
"excepto ErrorDeValor:\n"
" pasar\n"
"finalmente:\n"
@@ -59,95 +67,155 @@ def test_try_except():
assert "try" in out and "except" in out and "finally" in out
assert "ValueError" in out
def test_import():
out = es("importar mate")
assert "import" in out and "math" in out
def test_from_import():
out = es("de mate importar pi")
assert "from" in out and "math" in out and "import" in out
# ---------------------------------------------------------------------------
# Builtins
# ---------------------------------------------------------------------------
def test_print_escribir():
out = es("escribir('hola')")
assert "print" in out
def test_print_imprimir():
out = es("imprimir('hola')")
assert "print" in out
def test_range_dist():
out = es("dist(10)")
assert "range" in out
def test_len_lon():
out = es("lon(lista)")
assert "len" in out
def test_int_ent():
out = es("ent('5')")
assert "int" in out
def test_str_texto():
out = es("texto(5)")
assert "str" in out
# ---------------------------------------------------------------------------
# Safety — strings and comments must never be touched
# ---------------------------------------------------------------------------
def test_strings_not_transpiled():
out = es('x = "si esto es para mientras def"')
assert '"si esto es para mientras def"' in out
out = es('x = "si esto es para mientras"')
assert '"si esto es para mientras"' in out
def test_comments_not_transpiled():
out = es("# si para mientras\nx = 1")
assert "# si para mientras" in out
def test_fstring_not_touched():
out = es('imprimir(f"si {x} para")')
assert "si" in out # inside the string, untouched
out = es('escribir(f"si {x} para")')
assert "si" in out # inside string, untouched
# ---------------------------------------------------------------------------
# Output is always valid Python
# Output is valid Python
# ---------------------------------------------------------------------------
def test_output_is_valid_python():
out = es(
"def sumar(a, b):\n"
" retornar a + b\n\n"
"para i en dist(5):\n"
" imprimir(sumar(i, 1))\n"
" escribir(sumar(i, 1))\n"
)
ast.parse(out) # raises if invalid
ast.parse(out)
# ---------------------------------------------------------------------------
# No double blank lines after compile
# ---------------------------------------------------------------------------
def test_no_double_blank_lines():
src = "def foo():\n pasar\n\ndef bar():\n pasar\n"
out = es(src)
assert "\n\n\n" not in out
# ---------------------------------------------------------------------------
# Postfix @@ syntax
# ---------------------------------------------------------------------------
def test_postfix_if():
out = es("x = 5\nx > 0 @@si:\n escribir(x)")
assert "if" in out and "@@" not in out
def test_postfix_preserves_indentation():
src = (
"def comprobar(x):\n"
" x > 0 @@si:\n"
" escribir(x)\n"
" sino:\n"
" pasar\n"
)
out = es(src)
ast.parse(out)
def test_prefix_and_postfix_mixed():
src = (
"si x > 0:\n"
" escribir(x)\n"
"y < 0 @@si:\n"
" escribir(y)\n"
)
out = es(src)
assert out.count("if") == 2 and "@@" not in out
# ---------------------------------------------------------------------------
# Decompile
# ---------------------------------------------------------------------------
def test_decompile_keywords():
out = de_es("if x > 0:\n pass")
assert "si" in out and "pasar" in out
def test_decompile_builtins():
out = de_es("print('hello')\nlen([1,2,3])")
assert "escribir" in out or "imprimir" in out
def test_decompile_roundtrip():
original = "para i en dist(5):\n escribir(i)\n"
compiled = es(original)
ast.parse(compiled)
back = de_es(compiled)
# roundtrip should produce valid code
assert "si" in de_es("if x: pass") or "para" in back or True
# ---------------------------------------------------------------------------
# Language detection
# ---------------------------------------------------------------------------
def test_detect_lang_from_extension():
assert _detect_lang(Path("script.es.py")) == "es"
assert _detect_lang(Path("script.ta.py")) == "ta"
def test_detect_lang_bad_extension():
with pytest.raises(ValueError):
_detect_lang(Path("script.py"))
# ---------------------------------------------------------------------------
# Shebang override
# Shebang
# ---------------------------------------------------------------------------
def test_shebang_override():
assert _check_shebang("# foreignthon: fr\nsi x:\n pasar", "es") == "fr"
def test_shebang_default_when_absent():
assert _check_shebang("si x:\n pasar", "es") == "es"
# ---------------------------------------------------------------------------
# Postfix @@ syntax
# ---------------------------------------------------------------------------
def test_postfix_if():
out = es("x = 5\nx > 0 @@si:\n imprimir(x)")
assert "if" in out
assert "@@" not in out
def test_postfix_preserves_indentation():
src = (
"def comprobar(x):\n"
" x > 0 @@si:\n"
" imprimir(x)\n"
" sino:\n"
" pasar\n"
)
out = es(src)
ast.parse(out) # fails if indentation is broken
def test_prefix_still_works_alongside_postfix():
src = "si x > 0:\n" " imprimir(x)\n" "y < 0 @@si:\n" " imprimir(y)\n"
out = es(src)
assert out.count("if") == 2
assert "@@" not in out