fix spacing: verbatim inter-token copy, fix postfix decompile, add integration tests
This commit is contained in:
@@ -37,10 +37,6 @@ def _apply_postfix_syntax(source: str, mapping: dict) -> str:
|
||||
|
||||
|
||||
def _apply_postfix_output(source: str, en_to_foreign: dict, postfix_english: set) -> str:
|
||||
"""
|
||||
Post-pass for decompile: rewrite foreign keyword lines to @@ postfix.
|
||||
postfix_english comes from the language pack's postfix_keywords list.
|
||||
"""
|
||||
postfix_foreign = {en_to_foreign[k] for k in postfix_english if k in en_to_foreign}
|
||||
|
||||
lines = source.splitlines(keepends=True)
|
||||
@@ -66,33 +62,46 @@ def _apply_postfix_output(source: str, en_to_foreign: dict, postfix_english: set
|
||||
return "".join(result)
|
||||
|
||||
|
||||
def transpile(source: str, lang_code: str) -> str:
|
||||
pack = load_pack(lang_code)
|
||||
def _get_slice(source_lines: list[str], sr: int, sc: int, er: int, ec: int) -> str:
|
||||
"""Extract text from source between two (row, col) positions (1-indexed rows)."""
|
||||
n = len(source_lines)
|
||||
if sr > n:
|
||||
return ""
|
||||
if sr == er:
|
||||
line = source_lines[sr - 1]
|
||||
return line[sc:min(ec, len(line))]
|
||||
parts = []
|
||||
parts.append(source_lines[sr - 1][sc:])
|
||||
for r in range(sr, er - 1):
|
||||
if r < n:
|
||||
parts.append(source_lines[r])
|
||||
if er <= n:
|
||||
parts.append(source_lines[er - 1][:ec])
|
||||
return "".join(parts)
|
||||
|
||||
mapping: dict[str, str] = {}
|
||||
mapping.update(pack["keywords"])
|
||||
mapping.update(pack["builtins"])
|
||||
mapping.update(pack["exceptions"])
|
||||
mapping.update(pack["stdlib"])
|
||||
|
||||
source = _apply_postfix_syntax(source, mapping)
|
||||
def _swap_tokens(source: str, mapping: dict) -> str:
|
||||
"""
|
||||
Swap NAME tokens while copying all inter-token text verbatim from source.
|
||||
This preserves original spacing exactly — no double newlines, no extra spaces.
|
||||
"""
|
||||
source_lines = source.splitlines(keepends=True)
|
||||
tokens = list(tokenize.generate_tokens(io.StringIO(source).readline))
|
||||
|
||||
tokens_in = tokenize.generate_tokens(io.StringIO(source).readline)
|
||||
result: list[str] = []
|
||||
result = []
|
||||
prev_end = (1, 0)
|
||||
|
||||
for tok in tokens_in:
|
||||
tok_type, tok_string, tok_start, tok_end, _ = tok
|
||||
for tok_type, tok_string, tok_start, tok_end, _ in tokens:
|
||||
if tok_type in (tokenize.ENDMARKER, tokenize.ENCODING):
|
||||
break
|
||||
|
||||
start_row, start_col = tok_start
|
||||
end_row, end_col = prev_end
|
||||
s_row, s_col = tok_start
|
||||
|
||||
if start_row == end_row:
|
||||
result.append(" " * (start_col - end_col))
|
||||
else:
|
||||
result.append("\n" * (start_row - end_row))
|
||||
result.append(" " * start_col)
|
||||
# Copy original whitespace/newlines between tokens verbatim
|
||||
gap = _get_slice(source_lines, prev_end[0], prev_end[1], s_row, s_col)
|
||||
result.append(gap)
|
||||
|
||||
# Swap or keep token
|
||||
if tok_type == tokenize.NAME and tok_string in mapping:
|
||||
result.append(mapping[tok_string])
|
||||
else:
|
||||
@@ -103,6 +112,19 @@ def transpile(source: str, lang_code: str) -> str:
|
||||
return "".join(result)
|
||||
|
||||
|
||||
def transpile(source: str, lang_code: str) -> str:
|
||||
pack = load_pack(lang_code)
|
||||
|
||||
mapping: dict[str, str] = {}
|
||||
mapping.update(pack["keywords"])
|
||||
mapping.update(pack["builtins"])
|
||||
mapping.update(pack["exceptions"])
|
||||
mapping.update(pack["stdlib"])
|
||||
|
||||
source = _apply_postfix_syntax(source, mapping)
|
||||
return _swap_tokens(source, mapping)
|
||||
|
||||
|
||||
def detranspile(source: str, lang_code: str, postfix: bool = False) -> str:
|
||||
pack = load_pack(lang_code)
|
||||
|
||||
@@ -111,33 +133,9 @@ def detranspile(source: str, lang_code: str, postfix: bool = False) -> str:
|
||||
for foreign, english in pack[section].items():
|
||||
en_to_foreign[english] = foreign
|
||||
|
||||
tokens_in = tokenize.generate_tokens(io.StringIO(source).readline)
|
||||
result: list[str] = []
|
||||
prev_end = (1, 0)
|
||||
|
||||
for tok in tokens_in:
|
||||
tok_type, tok_string, tok_start, tok_end, _ = tok
|
||||
|
||||
start_row, start_col = tok_start
|
||||
end_row, end_col = prev_end
|
||||
|
||||
if start_row == end_row:
|
||||
result.append(" " * (start_col - end_col))
|
||||
else:
|
||||
result.append("\n" * (start_row - end_row))
|
||||
result.append(" " * start_col)
|
||||
|
||||
if tok_type == tokenize.NAME and tok_string in en_to_foreign:
|
||||
result.append(en_to_foreign[tok_string])
|
||||
else:
|
||||
result.append(tok_string)
|
||||
|
||||
prev_end = tok_end
|
||||
|
||||
output = "".join(result)
|
||||
output = _swap_tokens(source, en_to_foreign)
|
||||
|
||||
if postfix:
|
||||
# Use pack-defined list, fallback to sensible defaults
|
||||
postfix_english = set(pack.get("postfix_keywords", ["if", "elif", "while"]))
|
||||
output = _apply_postfix_output(output, en_to_foreign, postfix_english)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user