bumped version

replit agent fixes for languages like tamil
added readme
2026-05-21 15:31:29 +00:00 · 2026-05-21 15:30:10 +00:00 · 2026-05-20 17:29:55 -05:00
3 changed files with 157 additions and 4 deletions
--- a/README.md
+++ b/README.md
@@ -1 +1,56 @@
-# foreignthon
+# ForeignThon
 Write Python in any human language.
 ForeignThon transpiles `.es.py`, `.ta.py` and more into standard Python — keywords, builtins, exceptions, all translated. Errors come back in your language too.
 ```python
 # main.es.py
 def saludar(nombre):
    retornar f"Hola, {nombre}!"
 para i en dist(3):
    escribir(saludar(f"mundo {i}"))
 ```
 ```bash
 fpy run main.es.py
 # Hola, mundo 0!
 # Hola, mundo 1!
 # Hola, mundo 2!
 ```
 ## Install
 ```bash
 pip install foreignthon      # Core 
 pip install foreignthon-es   # Spanish
 pip install foreignthon-ta   # Tamil
 ```
 ## Quick start
 ```bash
 fpy new myproject --lang es
 cd myproject
 fpy run src/main.es.py
 ```
 ## Commands
 | Command | Description |
 |---|---|
 | `fpy new <name> --lang <code>` | Scaffold a new project |
 | `fpy run <file>` | Transpile and run |
 | `fpy compile <file>` | Transpile to `.compiled.py` |
 | `fpy decompile <file> --lang <code>` | Convert Python back to a language |
 | `fpy check <file>` | Validate without running |
 ## Documentation
 → [fpy.keshavanand.net](https://foreignthon.keshavanand.net)
 ## License
 GPL v3
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "foreignthon"
-version = "0.5.3"
+version = "0.5.4"
 description = "Write Python in any language. Transpiles foreign-language .xx.py files to standard Python."
 license = { text = "GPL v3" }
 requires-python = ">=3.9"
--- a/src/foreignthon/transpiler.py
+++ b/src/foreignthon/transpiler.py
@@ -3,6 +3,8 @@ from __future__ import annotations
 import io
 import re
 import tokenize
 import unicodedata
 from functools import lru_cache
 from pathlib import Path
 from .pack import load_pack
@@ -79,7 +81,103 @@ def _get_slice(source_lines: list[str], sr: int, sc: int, er: int, ec: int) -> s
    return "".join(parts)
@lru_cache(maxsize=2048)
 def _is_safe_token(s: str) -> bool:
    """Return True if the tokenizer produces `s` as a single NAME token.
    Some Unicode scripts (e.g. Tamil) contain combining characters that the
    tokenize module's regex treats as token boundaries, even though the full
    string passes str.isidentifier().  Keys that fail this check need a
    pre-pass string replacement before tokenization.
    """
    try:
        toks = [
            t for t in tokenize.generate_tokens(io.StringIO(s + "\n").readline)
            if t.type not in (
                tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL,
                tokenize.COMMENT, tokenize.INDENT, tokenize.DEDENT,
            )
        ]
        return len(toks) == 1 and toks[0].type == tokenize.NAME and toks[0].string == s
    except tokenize.TokenError:
        return False
 def _is_id_char(c: str) -> bool:
    """True if `c` can be part of an identifier (letter, mark, digit, or underscore)."""
    cat = unicodedata.category(c)
    return cat.startswith(("L", "M", "N")) or c == "_"
 def _code_region_replace(source: str, unsafe_mapping: dict) -> str:
    """Replace unsafe keys in source only in code regions (not string literals or comments).
    The tokenizer correctly identifies STRING/COMMENT boundaries even when NAME
    tokens are mangled by combining characters, so we use it to find protected spans.
    Identifier-boundary checks prevent partial matches inside longer words.
    """
    lines = source.splitlines(keepends=True)
    cumlen = [0]
    for line in lines:
        cumlen.append(cumlen[-1] + len(line))
    protected: list[tuple[int, int]] = []
    try:
        for tok in tokenize.generate_tokens(io.StringIO(source).readline):
            if tok.type in (tokenize.STRING, tokenize.COMMENT):
                sr, sc = tok.start
                er, ec = tok.end
                protected.append((cumlen[sr - 1] + sc, cumlen[er - 1] + ec))
    except tokenize.TokenError:
        pass
    sorted_keys = sorted(unsafe_mapping, key=len, reverse=True)
    result: list[str] = []
    pos = 0
    span_idx = 0
    n = len(source)
    while pos < n:
        if span_idx < len(protected) and pos >= protected[span_idx][0]:
            end = protected[span_idx][1]
            result.append(source[pos:end])
            pos = end
            span_idx += 1
            continue
        code_end = protected[span_idx][0] if span_idx < len(protected) else n
        matched = False
        for k in sorted_keys:
            klen = len(k)
            if pos + klen > code_end:
                continue
            if source[pos:pos + klen] != k:
                continue
            before_ok = pos == 0 or not _is_id_char(source[pos - 1])
            after_ok = (pos + klen >= n) or not _is_id_char(source[pos + klen])
            if before_ok and after_ok:
                result.append(unsafe_mapping[k])
                pos += klen
                matched = True
                break
        if not matched:
            result.append(source[pos])
            pos += 1
    return "".join(result)
 def _swap_tokens(source: str, mapping: dict) -> str:
    safe_mapping: dict[str, str] = {}
    unsafe_mapping: dict[str, str] = {}
    for k, v in mapping.items():
        (safe_mapping if _is_safe_token(k) else unsafe_mapping)[k] = v
    if unsafe_mapping:
        source = _code_region_replace(source, unsafe_mapping)
    source_lines = source.splitlines(keepends=True)
    tokens = list(tokenize.generate_tokens(io.StringIO(source).readline))
@@ -94,8 +192,8 @@ def _swap_tokens(source: str, mapping: dict) -> str:
        gap = _get_slice(source_lines, prev_end[0], prev_end[1], s_row, s_col)
        result.append(gap)
-        if tok_type == tokenize.NAME and tok_string in mapping:
+        if tok_type == tokenize.NAME and tok_string in safe_mapping:
-            result.append(mapping[tok_string])
+            result.append(safe_mapping[tok_string])
        else:
            result.append(tok_string)
Author	SHA1	Message	Date
KeshavAnandCode	d4b931ec2c	bumped version All checks were successful CI / test (pull_request) Successful in 12s Details Publish Core / verify (push) Successful in 10s Details Publish Core / publish (push) Successful in 14s Details	2026-05-21 15:31:29 +00:00
KeshavAnandCode	784b3729e4	replit agent fixes for languages like tamil	2026-05-21 15:30:10 +00:00
KeshavAnandCode	e50b2754ae	added readme All checks were successful CI / test (push) Successful in 12s Details	2026-05-20 17:29:55 -05:00