Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d4b931ec2c | |||
| 784b3729e4 | |||
| e50b2754ae |
57
README.md
57
README.md
@@ -1 +1,56 @@
|
|||||||
# foreignthon
|
# ForeignThon
|
||||||
|
|
||||||
|
Write Python in any human language.
|
||||||
|
|
||||||
|
ForeignThon transpiles `.es.py`, `.ta.py` and more into standard Python — keywords, builtins, exceptions, all translated. Errors come back in your language too.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# main.es.py
|
||||||
|
def saludar(nombre):
|
||||||
|
retornar f"Hola, {nombre}!"
|
||||||
|
|
||||||
|
para i en dist(3):
|
||||||
|
escribir(saludar(f"mundo {i}"))
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
fpy run main.es.py
|
||||||
|
# Hola, mundo 0!
|
||||||
|
# Hola, mundo 1!
|
||||||
|
# Hola, mundo 2!
|
||||||
|
```
|
||||||
|
|
||||||
|
## Install
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install foreignthon # Core
|
||||||
|
pip install foreignthon-es # Spanish
|
||||||
|
pip install foreignthon-ta # Tamil
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
fpy new myproject --lang es
|
||||||
|
cd myproject
|
||||||
|
fpy run src/main.es.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Commands
|
||||||
|
|
||||||
|
| Command | Description |
|
||||||
|
|---|---|
|
||||||
|
| `fpy new <name> --lang <code>` | Scaffold a new project |
|
||||||
|
| `fpy run <file>` | Transpile and run |
|
||||||
|
| `fpy compile <file>` | Transpile to `.compiled.py` |
|
||||||
|
| `fpy decompile <file> --lang <code>` | Convert Python back to a language |
|
||||||
|
| `fpy check <file>` | Validate without running |
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
→ [fpy.keshavanand.net](https://foreignthon.keshavanand.net)
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
GPL v3
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "foreignthon"
|
name = "foreignthon"
|
||||||
version = "0.5.3"
|
version = "0.5.4"
|
||||||
description = "Write Python in any language. Transpiles foreign-language .xx.py files to standard Python."
|
description = "Write Python in any language. Transpiles foreign-language .xx.py files to standard Python."
|
||||||
license = { text = "GPL v3" }
|
license = { text = "GPL v3" }
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ from __future__ import annotations
|
|||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
import tokenize
|
import tokenize
|
||||||
|
import unicodedata
|
||||||
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from .pack import load_pack
|
from .pack import load_pack
|
||||||
@@ -79,7 +81,103 @@ def _get_slice(source_lines: list[str], sr: int, sc: int, er: int, ec: int) -> s
|
|||||||
return "".join(parts)
|
return "".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=2048)
|
||||||
|
def _is_safe_token(s: str) -> bool:
|
||||||
|
"""Return True if the tokenizer produces `s` as a single NAME token.
|
||||||
|
|
||||||
|
Some Unicode scripts (e.g. Tamil) contain combining characters that the
|
||||||
|
tokenize module's regex treats as token boundaries, even though the full
|
||||||
|
string passes str.isidentifier(). Keys that fail this check need a
|
||||||
|
pre-pass string replacement before tokenization.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
toks = [
|
||||||
|
t for t in tokenize.generate_tokens(io.StringIO(s + "\n").readline)
|
||||||
|
if t.type not in (
|
||||||
|
tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL,
|
||||||
|
tokenize.COMMENT, tokenize.INDENT, tokenize.DEDENT,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
return len(toks) == 1 and toks[0].type == tokenize.NAME and toks[0].string == s
|
||||||
|
except tokenize.TokenError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _is_id_char(c: str) -> bool:
|
||||||
|
"""True if `c` can be part of an identifier (letter, mark, digit, or underscore)."""
|
||||||
|
cat = unicodedata.category(c)
|
||||||
|
return cat.startswith(("L", "M", "N")) or c == "_"
|
||||||
|
|
||||||
|
|
||||||
|
def _code_region_replace(source: str, unsafe_mapping: dict) -> str:
|
||||||
|
"""Replace unsafe keys in source only in code regions (not string literals or comments).
|
||||||
|
|
||||||
|
The tokenizer correctly identifies STRING/COMMENT boundaries even when NAME
|
||||||
|
tokens are mangled by combining characters, so we use it to find protected spans.
|
||||||
|
Identifier-boundary checks prevent partial matches inside longer words.
|
||||||
|
"""
|
||||||
|
lines = source.splitlines(keepends=True)
|
||||||
|
cumlen = [0]
|
||||||
|
for line in lines:
|
||||||
|
cumlen.append(cumlen[-1] + len(line))
|
||||||
|
|
||||||
|
protected: list[tuple[int, int]] = []
|
||||||
|
try:
|
||||||
|
for tok in tokenize.generate_tokens(io.StringIO(source).readline):
|
||||||
|
if tok.type in (tokenize.STRING, tokenize.COMMENT):
|
||||||
|
sr, sc = tok.start
|
||||||
|
er, ec = tok.end
|
||||||
|
protected.append((cumlen[sr - 1] + sc, cumlen[er - 1] + ec))
|
||||||
|
except tokenize.TokenError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
sorted_keys = sorted(unsafe_mapping, key=len, reverse=True)
|
||||||
|
result: list[str] = []
|
||||||
|
pos = 0
|
||||||
|
span_idx = 0
|
||||||
|
n = len(source)
|
||||||
|
|
||||||
|
while pos < n:
|
||||||
|
if span_idx < len(protected) and pos >= protected[span_idx][0]:
|
||||||
|
end = protected[span_idx][1]
|
||||||
|
result.append(source[pos:end])
|
||||||
|
pos = end
|
||||||
|
span_idx += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
code_end = protected[span_idx][0] if span_idx < len(protected) else n
|
||||||
|
|
||||||
|
matched = False
|
||||||
|
for k in sorted_keys:
|
||||||
|
klen = len(k)
|
||||||
|
if pos + klen > code_end:
|
||||||
|
continue
|
||||||
|
if source[pos:pos + klen] != k:
|
||||||
|
continue
|
||||||
|
before_ok = pos == 0 or not _is_id_char(source[pos - 1])
|
||||||
|
after_ok = (pos + klen >= n) or not _is_id_char(source[pos + klen])
|
||||||
|
if before_ok and after_ok:
|
||||||
|
result.append(unsafe_mapping[k])
|
||||||
|
pos += klen
|
||||||
|
matched = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not matched:
|
||||||
|
result.append(source[pos])
|
||||||
|
pos += 1
|
||||||
|
|
||||||
|
return "".join(result)
|
||||||
|
|
||||||
|
|
||||||
def _swap_tokens(source: str, mapping: dict) -> str:
|
def _swap_tokens(source: str, mapping: dict) -> str:
|
||||||
|
safe_mapping: dict[str, str] = {}
|
||||||
|
unsafe_mapping: dict[str, str] = {}
|
||||||
|
for k, v in mapping.items():
|
||||||
|
(safe_mapping if _is_safe_token(k) else unsafe_mapping)[k] = v
|
||||||
|
|
||||||
|
if unsafe_mapping:
|
||||||
|
source = _code_region_replace(source, unsafe_mapping)
|
||||||
|
|
||||||
source_lines = source.splitlines(keepends=True)
|
source_lines = source.splitlines(keepends=True)
|
||||||
tokens = list(tokenize.generate_tokens(io.StringIO(source).readline))
|
tokens = list(tokenize.generate_tokens(io.StringIO(source).readline))
|
||||||
|
|
||||||
@@ -94,8 +192,8 @@ def _swap_tokens(source: str, mapping: dict) -> str:
|
|||||||
gap = _get_slice(source_lines, prev_end[0], prev_end[1], s_row, s_col)
|
gap = _get_slice(source_lines, prev_end[0], prev_end[1], s_row, s_col)
|
||||||
result.append(gap)
|
result.append(gap)
|
||||||
|
|
||||||
if tok_type == tokenize.NAME and tok_string in mapping:
|
if tok_type == tokenize.NAME and tok_string in safe_mapping:
|
||||||
result.append(mapping[tok_string])
|
result.append(safe_mapping[tok_string])
|
||||||
else:
|
else:
|
||||||
result.append(tok_string)
|
result.append(tok_string)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user