10 Commits
v0.5.0 ... main

Author SHA1 Message Date
21107a1d43 Merge pull request 'fix/tokenizer-bug' (#1) from fix/tokenizer-bug into main
All checks were successful
CI / test (push) Successful in 11s
Reviewed-on: #1
2026-05-21 15:32:46 +00:00
d4b931ec2c bumped version
All checks were successful
CI / test (pull_request) Successful in 12s
Publish Core / verify (push) Successful in 10s
Publish Core / publish (push) Successful in 14s
2026-05-21 15:31:29 +00:00
784b3729e4 replit agent fixes for languages like tamil 2026-05-21 15:30:10 +00:00
e50b2754ae added readme
All checks were successful
CI / test (push) Successful in 12s
2026-05-20 17:29:55 -05:00
982b578c15 fixed token name errror
All checks were successful
CI / test (push) Successful in 10s
Publish Core / verify (push) Successful in 9s
Publish Core / publish (push) Successful in 14s
2026-05-19 16:06:53 -05:00
24f4cece7e debug workflow
Some checks failed
CI / test (push) Successful in 10s
Publish Core / verify (push) Successful in 9s
Publish Core / publish (push) Failing after 9s
2026-05-19 16:04:04 -05:00
863ad43716 bump versions
Some checks failed
CI / test (push) Successful in 11s
Publish Core / verify (push) Successful in 10s
Publish Core / publish (push) Failing after 11s
2026-05-19 15:58:35 -05:00
d727e3d51e added test and a test json
All checks were successful
CI / test (push) Successful in 11s
2026-05-19 15:56:42 -05:00
d78a8235b5 fixed 2 issues with ruff
Some checks failed
CI / test (push) Failing after 11s
2026-05-19 15:45:00 -05:00
e47aa0bb77 added workflows again
Some checks failed
CI / test (push) Failing after 15s
2026-05-19 15:43:01 -05:00
9 changed files with 566 additions and 5 deletions

29
.gitea/workflows/ci.yml Normal file
View File

@@ -0,0 +1,29 @@
name: CI
on:
push:
branches: ["main"]
pull_request:
branches: ["main"]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install Core with Dev Dependencies
run: |
pip install --upgrade pip
pip install -e .[dev]
- name: Run Quality Checks (Linter)
run: ruff check src/
- name: Execute Test Suite
run: pytest tests/ -v

View File

@@ -0,0 +1,77 @@
name: Publish Core
on:
push:
tags:
- "v*" # Fires directly on v0.1.0, v0.2.0 etc.
jobs:
# Enforce that tests MUST pass before release can execute
verify:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install and Verify
run: |
pip install -e .[dev]
pytest tests/ -v
publish:
needs: verify # Blocks execution if verify job fails
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install Release Tools
run: pip install build twine
- name: Build Wheel and Source Distribution
run: python -m build .
- name: Publish Package to PyPI
env:
TWINE_USERNAME: __token__
# Inherits your clean Organization level secret
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
run: twine upload --skip-existing dist/*
- name: Build Gitea Release with Assets
env:
# Pulls your clean Organization level Gitea Token
GIT_RELEASE_TOKEN: ${{ secrets.GIT_RELEASE_TOKEN }}
run: |
TAG=${GITHUB_REF#refs/tags/}
# Delete existing release block if present
EXISTING=$(curl -s -H "Authorization: token $GIT_RELEASE_TOKEN" "${{ github.server_url }}/api/v1/repos/${{ github.repository }}/releases/tags/$TAG")
EXISTING_ID=$(echo $EXISTING | python -c "import sys,json; d=json.load(sys.stdin); print(d.get('id',''))" 2>/dev/null || echo "")
if [ -n "$EXISTING_ID" ]; then
curl -s -X DELETE -H "Authorization: token $GIT_RELEASE_TOKEN" "${{ github.server_url }}/api/v1/repos/${{ github.repository }}/releases/$EXISTING_ID"
fi
# Create fresh production release container
RELEASE=$(curl -s -X POST \
-H "Authorization: token $GIT_RELEASE_TOKEN" \
-H "Content-Type: application/json" \
-d "{
\"tag_name\": \"$TAG\",
\"name\": \"foreignthon $TAG\",
\"body\": \"Release version $TAG of foreignthon core compiler engine.\",
\"draft\": false,
\"prerelease\": false
}" "${{ github.server_url }}/api/v1/repos/${{ github.repository }}/releases")
RELEASE_ID=$(echo $RELEASE | python -c "import sys,json; print(json.load(sys.stdin)['id'])")
# Upload wheels directly into Gitea Assets tab
for FILE in dist/*; do
curl -s -X POST -H "Authorization: token $GIT_RELEASE_TOKEN" -F "attachment=@$FILE" "${{ github.server_url }}/api/v1/repos/${{ github.repository }}/releases/$RELEASE_ID/assets"
done

View File

@@ -1 +1,56 @@
# foreignthon # ForeignThon
Write Python in any human language.
ForeignThon transpiles `.es.py`, `.ta.py` and more into standard Python — keywords, builtins, exceptions, all translated. Errors come back in your language too.
```python
# main.es.py
def saludar(nombre):
retornar f"Hola, {nombre}!"
para i en dist(3):
escribir(saludar(f"mundo {i}"))
```
```bash
fpy run main.es.py
# Hola, mundo 0!
# Hola, mundo 1!
# Hola, mundo 2!
```
## Install
```bash
pip install foreignthon # Core
pip install foreignthon-es # Spanish
pip install foreignthon-ta # Tamil
```
## Quick start
```bash
fpy new myproject --lang es
cd myproject
fpy run src/main.es.py
```
## Commands
| Command | Description |
|---|---|
| `fpy new <name> --lang <code>` | Scaffold a new project |
| `fpy run <file>` | Transpile and run |
| `fpy compile <file>` | Transpile to `.compiled.py` |
| `fpy decompile <file> --lang <code>` | Convert Python back to a language |
| `fpy check <file>` | Validate without running |
## Documentation
→ [fpy.keshavanand.net](https://foreignthon.keshavanand.net)
## License
GPL v3

View File

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project] [project]
name = "foreignthon" name = "foreignthon"
version = "0.5.2" version = "0.5.4"
description = "Write Python in any language. Transpiles foreign-language .xx.py files to standard Python." description = "Write Python in any language. Transpiles foreign-language .xx.py files to standard Python."
license = { text = "GPL v3" } license = { text = "GPL v3" }
requires-python = ">=3.9" requires-python = ">=3.9"

View File

@@ -1,4 +1,4 @@
from importlib.metadata import version, PackageNotFoundError from importlib.metadata import PackageNotFoundError, version
try: try:
__version__ = version("foreignthon") __version__ = version("foreignthon")

View File

@@ -292,6 +292,7 @@ def check(files: tuple):
def langs(): def langs():
"""List all installed language packs with their versions and authors.""" """List all installed language packs with their versions and authors."""
import json import json
from .pack import _discover_packs from .pack import _discover_packs
packs = _discover_packs() packs = _discover_packs()

View File

@@ -3,6 +3,8 @@ from __future__ import annotations
import io import io
import re import re
import tokenize import tokenize
import unicodedata
from functools import lru_cache
from pathlib import Path from pathlib import Path
from .pack import load_pack from .pack import load_pack
@@ -79,7 +81,103 @@ def _get_slice(source_lines: list[str], sr: int, sc: int, er: int, ec: int) -> s
return "".join(parts) return "".join(parts)
@lru_cache(maxsize=2048)
def _is_safe_token(s: str) -> bool:
"""Return True if the tokenizer produces `s` as a single NAME token.
Some Unicode scripts (e.g. Tamil) contain combining characters that the
tokenize module's regex treats as token boundaries, even though the full
string passes str.isidentifier(). Keys that fail this check need a
pre-pass string replacement before tokenization.
"""
try:
toks = [
t for t in tokenize.generate_tokens(io.StringIO(s + "\n").readline)
if t.type not in (
tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL,
tokenize.COMMENT, tokenize.INDENT, tokenize.DEDENT,
)
]
return len(toks) == 1 and toks[0].type == tokenize.NAME and toks[0].string == s
except tokenize.TokenError:
return False
def _is_id_char(c: str) -> bool:
"""True if `c` can be part of an identifier (letter, mark, digit, or underscore)."""
cat = unicodedata.category(c)
return cat.startswith(("L", "M", "N")) or c == "_"
def _code_region_replace(source: str, unsafe_mapping: dict) -> str:
"""Replace unsafe keys in source only in code regions (not string literals or comments).
The tokenizer correctly identifies STRING/COMMENT boundaries even when NAME
tokens are mangled by combining characters, so we use it to find protected spans.
Identifier-boundary checks prevent partial matches inside longer words.
"""
lines = source.splitlines(keepends=True)
cumlen = [0]
for line in lines:
cumlen.append(cumlen[-1] + len(line))
protected: list[tuple[int, int]] = []
try:
for tok in tokenize.generate_tokens(io.StringIO(source).readline):
if tok.type in (tokenize.STRING, tokenize.COMMENT):
sr, sc = tok.start
er, ec = tok.end
protected.append((cumlen[sr - 1] + sc, cumlen[er - 1] + ec))
except tokenize.TokenError:
pass
sorted_keys = sorted(unsafe_mapping, key=len, reverse=True)
result: list[str] = []
pos = 0
span_idx = 0
n = len(source)
while pos < n:
if span_idx < len(protected) and pos >= protected[span_idx][0]:
end = protected[span_idx][1]
result.append(source[pos:end])
pos = end
span_idx += 1
continue
code_end = protected[span_idx][0] if span_idx < len(protected) else n
matched = False
for k in sorted_keys:
klen = len(k)
if pos + klen > code_end:
continue
if source[pos:pos + klen] != k:
continue
before_ok = pos == 0 or not _is_id_char(source[pos - 1])
after_ok = (pos + klen >= n) or not _is_id_char(source[pos + klen])
if before_ok and after_ok:
result.append(unsafe_mapping[k])
pos += klen
matched = True
break
if not matched:
result.append(source[pos])
pos += 1
return "".join(result)
def _swap_tokens(source: str, mapping: dict) -> str: def _swap_tokens(source: str, mapping: dict) -> str:
safe_mapping: dict[str, str] = {}
unsafe_mapping: dict[str, str] = {}
for k, v in mapping.items():
(safe_mapping if _is_safe_token(k) else unsafe_mapping)[k] = v
if unsafe_mapping:
source = _code_region_replace(source, unsafe_mapping)
source_lines = source.splitlines(keepends=True) source_lines = source.splitlines(keepends=True)
tokens = list(tokenize.generate_tokens(io.StringIO(source).readline)) tokens = list(tokenize.generate_tokens(io.StringIO(source).readline))
@@ -94,8 +192,8 @@ def _swap_tokens(source: str, mapping: dict) -> str:
gap = _get_slice(source_lines, prev_end[0], prev_end[1], s_row, s_col) gap = _get_slice(source_lines, prev_end[0], prev_end[1], s_row, s_col)
result.append(gap) result.append(gap)
if tok_type == tokenize.NAME and tok_string in mapping: if tok_type == tokenize.NAME and tok_string in safe_mapping:
result.append(mapping[tok_string]) result.append(safe_mapping[tok_string])
else: else:
result.append(tok_string) result.append(tok_string)

154
tests/test_engine.py Normal file
View File

@@ -0,0 +1,154 @@
from __future__ import annotations
import ast
import json
import textwrap
from pathlib import Path
import pytest
from foreignthon.transpiler import _check_shebang, _detect_lang, detranspile, transpile
# ---------------------------------------------------------------------------
# Setup: Load the local test JSON fixture
# ---------------------------------------------------------------------------
TEST_PACK_PATH = Path(__file__).parent / "test_pack.json"
TEST_PACK = json.loads(TEST_PACK_PATH.read_text(encoding="utf-8"))
def core_transpile(src: str) -> str:
# We pass "es" to match the JSON's meta code, but we feed it the local pack
return transpile(textwrap.dedent(src).strip() + "\n", "es", pack=TEST_PACK)
def core_detranspile(src: str, postfix: bool = False) -> str:
return detranspile(
textwrap.dedent(src).strip() + "\n", "es", postfix=postfix, pack=TEST_PACK
)
def valid(src: str) -> bool:
try:
ast.parse(src)
return True
except SyntaxError:
return False
# ---------------------------------------------------------------------------
# 1. Core Mechanics: Translation & AST Validity
# ---------------------------------------------------------------------------
def test_engine_basic_translation():
# Ensures the engine reads the dictionary and swaps the words
src = """
para i en dist(5):
escribir(i)
"""
out = core_transpile(src)
assert "for" in out and "in" in out and "range" in out and "print" in out
assert valid(out)
# ---------------------------------------------------------------------------
# 2. Core Mechanics: Safety Boundaries (Strings & Comments)
# ---------------------------------------------------------------------------
def test_engine_preserves_strings():
# The engine MUST NOT translate keywords hidden inside strings
out = core_transpile('mensaje = "si para mientras def clase"')
assert '"si para mientras def clase"' in out
def test_engine_preserves_comments():
# The engine MUST NOT translate keywords hidden in comments
out = core_transpile("# si para mientras\nx = 1")
assert "# si para mientras" in out
def test_engine_preserves_fstrings():
out = core_transpile('escribir(f"valor si={42}")')
assert "si" in out # 'si' survives because it is inside the string
# ---------------------------------------------------------------------------
# 3. Core Mechanics: Postfix Syntax (@@)
# ---------------------------------------------------------------------------
def test_engine_postfix_reversal():
# Tests the engine's ability to move the keyword to the front
src = """
x = 5
x > 0 @@si:
escribir(x)
"""
out = core_transpile(src)
assert "if x > 0:" in out
assert "@@" not in out
assert valid(out)
def test_engine_mixed_prefix_postfix():
src = """
si x > 0:
escribir(x)
y < 0 @@si:
escribir(y)
"""
out = core_transpile(src)
assert out.count("if") == 2
assert "@@" not in out
# ---------------------------------------------------------------------------
# 4. Core Mechanics: Decompilation (Round Trip)
# ---------------------------------------------------------------------------
def test_engine_detranspile():
# Standard Python should turn back into foreignthon syntax
src = """
if x > 0:
pass
"""
out = core_detranspile(src)
assert "si" in out and "pasar" in out
def test_engine_roundtrip():
# foreignthon -> Python -> foreignthon
original = "para i en dist(5):\n escribir(i)\n"
compiled = core_transpile(original)
assert valid(compiled)
back = core_detranspile(compiled)
assert "para" in back and "dist" in back
# Accept either valid translation for 'print'
assert "escribir" in back or "imprimir" in back
# ---------------------------------------------------------------------------
# 5. Core Utilities: Detection & Shebangs
# ---------------------------------------------------------------------------
def test_detect_lang_from_extension():
assert _detect_lang(Path("script.es.py")) == "es"
assert _detect_lang(Path("script.ta.py")) == "ta"
def test_detect_lang_bad_extension():
with pytest.raises(ValueError):
_detect_lang(Path("script.py"))
def test_shebang_override():
assert _check_shebang("# foreignthon: fr\nsi x:\n pasar", "es") == "fr"
def test_shebang_default_when_absent():
assert _check_shebang("si x:\n pasar", "es") == "es"

147
tests/test_pack.json Normal file
View File

@@ -0,0 +1,147 @@
{
"meta": {
"name": "Spanish",
"native_name": "Español",
"code": "es"
},
"keywords": {
"si": "if",
"sino": "else",
"osi": "elif",
"para": "for",
"mientras": "while",
"def": "def",
"clase": "class",
"importar": "import",
"de": "from",
"como": "as",
"retornar": "return",
"parar": "break",
"continuar": "continue",
"pasar": "pass",
"intentar": "try",
"excepto": "except",
"finalmente": "finally",
"lanzar": "raise",
"con": "with",
"en": "in",
"es": "is",
"y": "and",
"o": "or",
"no": "not",
"elim": "del",
"global": "global",
"nolocal": "nonlocal",
"afirmar": "assert",
"generar": "yield",
"esperar": "await",
"asinc": "async",
"lambda": "lambda",
"Verda": "True",
"Falso": "False",
"Nada": "None"
},
"builtins": {
"escribir": "print",
"imprimir": "print",
"entrada": "input",
"lon": "len",
"dist": "range",
"tipo": "type",
"ent": "int",
"dec": "float",
"texto": "str",
"lista": "list",
"dicc": "dict",
"conj": "set",
"tupla": "tuple",
"bool": "bool",
"abrir": "open",
"enumerar": "enumerate",
"map": "map",
"filtrar": "filter",
"ordenado": "sorted",
"invertido": "reversed",
"sum": "sum",
"min": "min",
"max": "max",
"abs": "abs",
"redondear": "round",
"rnd": "round",
"todos": "all",
"alguno": "any",
"esinstancia": "isinstance",
"teneatri": "hasattr",
"obtatri": "getattr",
"estabatri": "setattr",
"repr": "repr",
"formatear": "format",
"vars": "vars",
"sigue": "next",
"id": "id",
"car": "chr",
"hex": "hex",
"bin": "bin",
"oct": "oct"
},
"exceptions": {
"Excepcion": "Exception",
"ExcepcionBase": "BaseException",
"ErrorDeValor": "ValueError",
"ErrorDeTipo": "TypeError",
"ErrorDeClave": "KeyError",
"ErrorDeIndice": "IndexError",
"ErrorDeAtributo": "AttributeError",
"ErrorDeNombre": "NameError",
"ErrorDeImportacion": "ImportError",
"ErrorDelSistema": "OSError",
"ArchivoNoEncontrado": "FileNotFoundError",
"ErrorDeEjecucion": "RuntimeError",
"DetenerIteracion": "StopIteration",
"SalidaDelSistema": "SystemExit",
"InterrupcionDeTeclado": "KeyboardInterrupt",
"ErrorNoImplementado": "NotImplementedError",
"ErrorDeDivisionCero": "ZeroDivisionError",
"ErrorDeRecursion": "RecursionError",
"ErrorDeSintaxis": "SyntaxError",
"ErrorDeAfirmacion": "AssertionError",
"ErrorDeDesbordamiento": "OverflowError",
"ErrorDeMemoria": "MemoryError",
"ErrorDePermiso": "PermissionError",
"ErrorDeTiempoAgotado": "TimeoutError"
},
"error_messages": {
"SyntaxError": "Error de sintaxis",
"ValueError": "Error de valor",
"TypeError": "Error de tipo",
"KeyError": "Error de clave",
"IndexError": "Error de índice",
"AttributeError": "Error de atributo",
"NameError": "Error de nombre",
"ImportError": "Error de importación",
"FileNotFoundError": "Archivo no encontrado",
"ZeroDivisionError": "Error división por cero",
"RecursionError": "Error de recursión",
"RuntimeError": "Error de ejecución",
"MemoryError": "Error de memoria",
"OverflowError": "Error de desbordamiento",
"AssertionError": "Error de afirmación",
"NotImplementedError": "Error no implementado",
"StopIteration": "Detener iteración",
"KeyboardInterrupt": "Interrupción de teclado",
"PermissionError": "Error de permiso",
"TimeoutError": "Error de tiempo agotado"
},
"stdlib": {
"mate": "math",
"sis": "sys",
"fechahora": "datetime",
"tiempo": "time",
"aleatorio": "random",
"aleatoria": "random",
"colecciones": "collections",
"ruta": "pathlib",
"er": "re"
},
"postfix_keywords": []
}