Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d4b931ec2c | |||
| 784b3729e4 | |||
| e50b2754ae |
57
README.md
57
README.md
@@ -1 +1,56 @@
|
||||
# foreignthon
|
||||
# ForeignThon
|
||||
|
||||
Write Python in any human language.
|
||||
|
||||
ForeignThon transpiles `.es.py`, `.ta.py` and more into standard Python — keywords, builtins, exceptions, all translated. Errors come back in your language too.
|
||||
|
||||
```python
|
||||
# main.es.py
|
||||
def saludar(nombre):
|
||||
retornar f"Hola, {nombre}!"
|
||||
|
||||
para i en dist(3):
|
||||
escribir(saludar(f"mundo {i}"))
|
||||
```
|
||||
|
||||
```bash
|
||||
fpy run main.es.py
|
||||
# Hola, mundo 0!
|
||||
# Hola, mundo 1!
|
||||
# Hola, mundo 2!
|
||||
```
|
||||
|
||||
## Install
|
||||
|
||||
```bash
|
||||
pip install foreignthon # Core
|
||||
pip install foreignthon-es # Spanish
|
||||
pip install foreignthon-ta # Tamil
|
||||
```
|
||||
|
||||
## Quick start
|
||||
|
||||
```bash
|
||||
fpy new myproject --lang es
|
||||
cd myproject
|
||||
fpy run src/main.es.py
|
||||
```
|
||||
|
||||
## Commands
|
||||
|
||||
| Command | Description |
|
||||
|---|---|
|
||||
| `fpy new <name> --lang <code>` | Scaffold a new project |
|
||||
| `fpy run <file>` | Transpile and run |
|
||||
| `fpy compile <file>` | Transpile to `.compiled.py` |
|
||||
| `fpy decompile <file> --lang <code>` | Convert Python back to a language |
|
||||
| `fpy check <file>` | Validate without running |
|
||||
|
||||
## Documentation
|
||||
|
||||
→ [fpy.keshavanand.net](https://foreignthon.keshavanand.net)
|
||||
|
||||
## License
|
||||
|
||||
GPL v3
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "foreignthon"
|
||||
version = "0.5.3"
|
||||
version = "0.5.4"
|
||||
description = "Write Python in any language. Transpiles foreign-language .xx.py files to standard Python."
|
||||
license = { text = "GPL v3" }
|
||||
requires-python = ">=3.9"
|
||||
|
||||
@@ -3,6 +3,8 @@ from __future__ import annotations
|
||||
import io
|
||||
import re
|
||||
import tokenize
|
||||
import unicodedata
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
from .pack import load_pack
|
||||
@@ -79,7 +81,103 @@ def _get_slice(source_lines: list[str], sr: int, sc: int, er: int, ec: int) -> s
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
@lru_cache(maxsize=2048)
|
||||
def _is_safe_token(s: str) -> bool:
|
||||
"""Return True if the tokenizer produces `s` as a single NAME token.
|
||||
|
||||
Some Unicode scripts (e.g. Tamil) contain combining characters that the
|
||||
tokenize module's regex treats as token boundaries, even though the full
|
||||
string passes str.isidentifier(). Keys that fail this check need a
|
||||
pre-pass string replacement before tokenization.
|
||||
"""
|
||||
try:
|
||||
toks = [
|
||||
t for t in tokenize.generate_tokens(io.StringIO(s + "\n").readline)
|
||||
if t.type not in (
|
||||
tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL,
|
||||
tokenize.COMMENT, tokenize.INDENT, tokenize.DEDENT,
|
||||
)
|
||||
]
|
||||
return len(toks) == 1 and toks[0].type == tokenize.NAME and toks[0].string == s
|
||||
except tokenize.TokenError:
|
||||
return False
|
||||
|
||||
|
||||
def _is_id_char(c: str) -> bool:
|
||||
"""True if `c` can be part of an identifier (letter, mark, digit, or underscore)."""
|
||||
cat = unicodedata.category(c)
|
||||
return cat.startswith(("L", "M", "N")) or c == "_"
|
||||
|
||||
|
||||
def _code_region_replace(source: str, unsafe_mapping: dict) -> str:
|
||||
"""Replace unsafe keys in source only in code regions (not string literals or comments).
|
||||
|
||||
The tokenizer correctly identifies STRING/COMMENT boundaries even when NAME
|
||||
tokens are mangled by combining characters, so we use it to find protected spans.
|
||||
Identifier-boundary checks prevent partial matches inside longer words.
|
||||
"""
|
||||
lines = source.splitlines(keepends=True)
|
||||
cumlen = [0]
|
||||
for line in lines:
|
||||
cumlen.append(cumlen[-1] + len(line))
|
||||
|
||||
protected: list[tuple[int, int]] = []
|
||||
try:
|
||||
for tok in tokenize.generate_tokens(io.StringIO(source).readline):
|
||||
if tok.type in (tokenize.STRING, tokenize.COMMENT):
|
||||
sr, sc = tok.start
|
||||
er, ec = tok.end
|
||||
protected.append((cumlen[sr - 1] + sc, cumlen[er - 1] + ec))
|
||||
except tokenize.TokenError:
|
||||
pass
|
||||
|
||||
sorted_keys = sorted(unsafe_mapping, key=len, reverse=True)
|
||||
result: list[str] = []
|
||||
pos = 0
|
||||
span_idx = 0
|
||||
n = len(source)
|
||||
|
||||
while pos < n:
|
||||
if span_idx < len(protected) and pos >= protected[span_idx][0]:
|
||||
end = protected[span_idx][1]
|
||||
result.append(source[pos:end])
|
||||
pos = end
|
||||
span_idx += 1
|
||||
continue
|
||||
|
||||
code_end = protected[span_idx][0] if span_idx < len(protected) else n
|
||||
|
||||
matched = False
|
||||
for k in sorted_keys:
|
||||
klen = len(k)
|
||||
if pos + klen > code_end:
|
||||
continue
|
||||
if source[pos:pos + klen] != k:
|
||||
continue
|
||||
before_ok = pos == 0 or not _is_id_char(source[pos - 1])
|
||||
after_ok = (pos + klen >= n) or not _is_id_char(source[pos + klen])
|
||||
if before_ok and after_ok:
|
||||
result.append(unsafe_mapping[k])
|
||||
pos += klen
|
||||
matched = True
|
||||
break
|
||||
|
||||
if not matched:
|
||||
result.append(source[pos])
|
||||
pos += 1
|
||||
|
||||
return "".join(result)
|
||||
|
||||
|
||||
def _swap_tokens(source: str, mapping: dict) -> str:
|
||||
safe_mapping: dict[str, str] = {}
|
||||
unsafe_mapping: dict[str, str] = {}
|
||||
for k, v in mapping.items():
|
||||
(safe_mapping if _is_safe_token(k) else unsafe_mapping)[k] = v
|
||||
|
||||
if unsafe_mapping:
|
||||
source = _code_region_replace(source, unsafe_mapping)
|
||||
|
||||
source_lines = source.splitlines(keepends=True)
|
||||
tokens = list(tokenize.generate_tokens(io.StringIO(source).readline))
|
||||
|
||||
@@ -94,8 +192,8 @@ def _swap_tokens(source: str, mapping: dict) -> str:
|
||||
gap = _get_slice(source_lines, prev_end[0], prev_end[1], s_row, s_col)
|
||||
result.append(gap)
|
||||
|
||||
if tok_type == tokenize.NAME and tok_string in mapping:
|
||||
result.append(mapping[tok_string])
|
||||
if tok_type == tokenize.NAME and tok_string in safe_mapping:
|
||||
result.append(safe_mapping[tok_string])
|
||||
else:
|
||||
result.append(tok_string)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user