4 Commits
v0.5.3 ... main

Author SHA1 Message Date
21107a1d43 Merge pull request 'fix/tokenizer-bug' (#1) from fix/tokenizer-bug into main
All checks were successful
CI / test (push) Successful in 11s
Reviewed-on: #1
2026-05-21 15:32:46 +00:00
d4b931ec2c bumped version
All checks were successful
CI / test (pull_request) Successful in 12s
Publish Core / verify (push) Successful in 10s
Publish Core / publish (push) Successful in 14s
2026-05-21 15:31:29 +00:00
784b3729e4 replit agent fixes for languages like tamil 2026-05-21 15:30:10 +00:00
e50b2754ae added readme
All checks were successful
CI / test (push) Successful in 12s
2026-05-20 17:29:55 -05:00
3 changed files with 157 additions and 4 deletions

View File

@@ -1 +1,56 @@
# foreignthon
# ForeignThon
Write Python in any human language.
ForeignThon transpiles `.es.py`, `.ta.py` and more into standard Python — keywords, builtins, exceptions, all translated. Errors come back in your language too.
```python
# main.es.py
def saludar(nombre):
retornar f"Hola, {nombre}!"
para i en dist(3):
escribir(saludar(f"mundo {i}"))
```
```bash
fpy run main.es.py
# Hola, mundo 0!
# Hola, mundo 1!
# Hola, mundo 2!
```
## Install
```bash
pip install foreignthon # Core
pip install foreignthon-es # Spanish
pip install foreignthon-ta # Tamil
```
## Quick start
```bash
fpy new myproject --lang es
cd myproject
fpy run src/main.es.py
```
## Commands
| Command | Description |
|---|---|
| `fpy new <name> --lang <code>` | Scaffold a new project |
| `fpy run <file>` | Transpile and run |
| `fpy compile <file>` | Transpile to `.compiled.py` |
| `fpy decompile <file> --lang <code>` | Convert Python back to a language |
| `fpy check <file>` | Validate without running |
## Documentation
→ [fpy.keshavanand.net](https://foreignthon.keshavanand.net)
## License
GPL v3

View File

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project]
name = "foreignthon"
version = "0.5.3"
version = "0.5.4"
description = "Write Python in any language. Transpiles foreign-language .xx.py files to standard Python."
license = { text = "GPL v3" }
requires-python = ">=3.9"

View File

@@ -3,6 +3,8 @@ from __future__ import annotations
import io
import re
import tokenize
import unicodedata
from functools import lru_cache
from pathlib import Path
from .pack import load_pack
@@ -79,7 +81,103 @@ def _get_slice(source_lines: list[str], sr: int, sc: int, er: int, ec: int) -> s
return "".join(parts)
@lru_cache(maxsize=2048)
def _is_safe_token(s: str) -> bool:
"""Return True if the tokenizer produces `s` as a single NAME token.
Some Unicode scripts (e.g. Tamil) contain combining characters that the
tokenize module's regex treats as token boundaries, even though the full
string passes str.isidentifier(). Keys that fail this check need a
pre-pass string replacement before tokenization.
"""
try:
toks = [
t for t in tokenize.generate_tokens(io.StringIO(s + "\n").readline)
if t.type not in (
tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL,
tokenize.COMMENT, tokenize.INDENT, tokenize.DEDENT,
)
]
return len(toks) == 1 and toks[0].type == tokenize.NAME and toks[0].string == s
except tokenize.TokenError:
return False
def _is_id_char(c: str) -> bool:
"""True if `c` can be part of an identifier (letter, mark, digit, or underscore)."""
cat = unicodedata.category(c)
return cat.startswith(("L", "M", "N")) or c == "_"
def _code_region_replace(source: str, unsafe_mapping: dict) -> str:
"""Replace unsafe keys in source only in code regions (not string literals or comments).
The tokenizer correctly identifies STRING/COMMENT boundaries even when NAME
tokens are mangled by combining characters, so we use it to find protected spans.
Identifier-boundary checks prevent partial matches inside longer words.
"""
lines = source.splitlines(keepends=True)
cumlen = [0]
for line in lines:
cumlen.append(cumlen[-1] + len(line))
protected: list[tuple[int, int]] = []
try:
for tok in tokenize.generate_tokens(io.StringIO(source).readline):
if tok.type in (tokenize.STRING, tokenize.COMMENT):
sr, sc = tok.start
er, ec = tok.end
protected.append((cumlen[sr - 1] + sc, cumlen[er - 1] + ec))
except tokenize.TokenError:
pass
sorted_keys = sorted(unsafe_mapping, key=len, reverse=True)
result: list[str] = []
pos = 0
span_idx = 0
n = len(source)
while pos < n:
if span_idx < len(protected) and pos >= protected[span_idx][0]:
end = protected[span_idx][1]
result.append(source[pos:end])
pos = end
span_idx += 1
continue
code_end = protected[span_idx][0] if span_idx < len(protected) else n
matched = False
for k in sorted_keys:
klen = len(k)
if pos + klen > code_end:
continue
if source[pos:pos + klen] != k:
continue
before_ok = pos == 0 or not _is_id_char(source[pos - 1])
after_ok = (pos + klen >= n) or not _is_id_char(source[pos + klen])
if before_ok and after_ok:
result.append(unsafe_mapping[k])
pos += klen
matched = True
break
if not matched:
result.append(source[pos])
pos += 1
return "".join(result)
def _swap_tokens(source: str, mapping: dict) -> str:
safe_mapping: dict[str, str] = {}
unsafe_mapping: dict[str, str] = {}
for k, v in mapping.items():
(safe_mapping if _is_safe_token(k) else unsafe_mapping)[k] = v
if unsafe_mapping:
source = _code_region_replace(source, unsafe_mapping)
source_lines = source.splitlines(keepends=True)
tokens = list(tokenize.generate_tokens(io.StringIO(source).readline))
@@ -94,8 +192,8 @@ def _swap_tokens(source: str, mapping: dict) -> str:
gap = _get_slice(source_lines, prev_end[0], prev_end[1], s_row, s_col)
result.append(gap)
if tok_type == tokenize.NAME and tok_string in mapping:
result.append(mapping[tok_string])
if tok_type == tokenize.NAME and tok_string in safe_mapping:
result.append(safe_mapping[tok_string])
else:
result.append(tok_string)