a bunc of stuff
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
{
|
||||
"name": "Comix Reader",
|
||||
"version": "1.0.6",
|
||||
"version": "1.0.7",
|
||||
"author": "Animex",
|
||||
"description": "Comix.to Manga Reader - Double-Safe Nested Data Parsing.",
|
||||
"description": "Comix.to Manga Reader - Fixed WAF validation, URL encoding, and Anti-Leech headers.",
|
||||
"type": "MANGA_READER",
|
||||
"requirements": ["httpx", "re", "json"]
|
||||
}
|
||||
@@ -13,12 +13,14 @@ import urllib.parse
|
||||
import inspect
|
||||
import httpx
|
||||
|
||||
# Exact headers from your working test client
|
||||
# Extended headers to satisfy WAF & Anti-Leech checks
|
||||
HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': 'application/json, text/plain, */*',
|
||||
'Referer': 'https://comix.to/'
|
||||
'Referer': 'https://comix.to/',
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
}
|
||||
|
||||
async def _smart_fetch(method: str, url: str, **kwargs):
|
||||
"""Uses injected HybridClient if available, otherwise falls back to real httpx."""
|
||||
client_or_lib = globals().get('httpx')
|
||||
@@ -26,21 +28,14 @@ async def _smart_fetch(method: str, url: str, **kwargs):
|
||||
func = getattr(client_or_lib, method.lower(), None)
|
||||
if func and inspect.iscoroutinefunction(func):
|
||||
resp = await func(url, **kwargs)
|
||||
|
||||
# Debug: dump raw response to see what we're actually getting
|
||||
raw = getattr(resp, 'text', '') or ''
|
||||
print(f"[Comix] _smart_fetch response preview: {raw[:300]}")
|
||||
|
||||
return resp
|
||||
|
||||
# Fallback: real httpx.AsyncClient
|
||||
import httpx as _real_httpx
|
||||
async with _real_httpx.AsyncClient(follow_redirects=True) as client:
|
||||
resp = await getattr(client, method.lower())(url, **kwargs)
|
||||
print(f"[Comix] _smart_fetch (direct) response preview: {resp.text[:300]}")
|
||||
return resp
|
||||
|
||||
|
||||
def get_nested(data, *keys, default=None):
|
||||
"""Helper to safely traverse deeply nested dictionaries even if keys are None."""
|
||||
for key in keys:
|
||||
@@ -65,15 +60,19 @@ async def get_title_from_mal(mal_id: int):
|
||||
return None
|
||||
|
||||
async def search_manga(query: str):
|
||||
"""Searches Comix.to and returns (hash_id, slug)."""
|
||||
"""Searches Comix.to and returns (manga_id, hash_id, slug)."""
|
||||
if not query: return None
|
||||
url = f"https://comix.to/api/v2/manga?keyword={urllib.parse.quote(query)}&order[relevance]=desc"
|
||||
|
||||
# Safely URL encode parameters (prevents WAF bracket blocks)
|
||||
params = {"keyword": query, "order[relevance]": "desc"}
|
||||
qs = urllib.parse.urlencode(params)
|
||||
url = f"https://comix.to/api/v2/manga?{qs}"
|
||||
|
||||
try:
|
||||
resp = await _smart_fetch("GET", url, headers=HEADERS)
|
||||
print(f"[Comix] Search status: {resp.status_code}")
|
||||
print(f"[Comix] Search status: {getattr(resp, 'status_code', 500)}")
|
||||
data = resp.json() if hasattr(resp, "json") else None
|
||||
if data is None:
|
||||
print(f"[Comix] Search response was not JSON. Raw: {resp.text[:200]}")
|
||||
return None
|
||||
|
||||
# Safely get first item from result -> items
|
||||
@@ -81,7 +80,8 @@ async def search_manga(query: str):
|
||||
if items and isinstance(items, list) and len(items) > 0:
|
||||
first = items[0]
|
||||
if isinstance(first, dict):
|
||||
return first.get('hash_id'), first.get('slug')
|
||||
# Return manga_id too for WAF API fallback purposes
|
||||
return first.get('manga_id'), first.get('hash_id'), first.get('slug')
|
||||
except Exception as e:
|
||||
print(f"[Comix] Search Error: {e}")
|
||||
return None
|
||||
@@ -91,30 +91,47 @@ async def get_chapters(mal_id: int):
|
||||
print(f"[Comix] get_chapters called for MAL ID: {mal_id}")
|
||||
|
||||
title = await get_title_from_mal(mal_id)
|
||||
print(f"[Comix] Resolved title: {title}")
|
||||
if not title: return None
|
||||
|
||||
manga_info = await search_manga(title)
|
||||
print(f"[Comix] Search result: {manga_info}")
|
||||
if not manga_info: return None
|
||||
|
||||
hash_id, slug = manga_info
|
||||
manga_id, hash_id, slug = manga_info
|
||||
|
||||
# Paginate since API caps at 100 per request
|
||||
all_items = []
|
||||
offset = 0
|
||||
seen_ids = set()
|
||||
|
||||
all_items = []
|
||||
page = 1
|
||||
|
||||
while True:
|
||||
url = f"https://comix.to/api/v2/manga/{hash_id}/chapters?order[number]=asc&limit=100&page={page}"
|
||||
resp = await _smart_fetch("GET", url, headers=HEADERS)
|
||||
# Dynamically spoof the Referer for the specific manga to bypass Anti-Leech
|
||||
req_headers = HEADERS.copy()
|
||||
req_headers["Referer"] = f"https://comix.to/title/{hash_id}-{slug}"
|
||||
|
||||
# Safely encode the query string to prevent 403 Bracket Rejection
|
||||
params = {
|
||||
"order[number]": "asc",
|
||||
"limit": 100,
|
||||
"page": page
|
||||
}
|
||||
qs = urllib.parse.urlencode(params)
|
||||
|
||||
# Primary Try: Try requesting chapters using the Hash ID
|
||||
url = f"https://comix.to/api/v2/manga/{hash_id}/chapters?{qs}&_=xQm9tJfLwGhz_0Eq8S_YAHYkwp-q1PLfm50W5QJnyd1NnNYpAjXjyCoAzoOLRgUaJOoxWS0NeDGz_rNrbqBjLLP1H9qi"
|
||||
resp = await _smart_fetch("GET", url, headers=req_headers)
|
||||
|
||||
data = resp.json() if hasattr(resp, "json") else None
|
||||
|
||||
# Fallback: If framework validation fails (400, 403, 404), it might be strictly
|
||||
# expecting the internal integer primary key instead of the string hash.
|
||||
if data and data.get("status") in [400, 403, 404]:
|
||||
print(f"[Comix] Hash ID rejected ({data.get('status')}). Falling back to Integer Manga ID...")
|
||||
url_fallback = f"https://comix.to/api/v2/manga/{manga_id}/chapters?{qs}&_=xQm9tJfLwGhz_0Eq8S_YAHYkwp-q1PLfm50W5QJnyd1NnNYpAjXjyCoAzoOLRgUaJOoxWS0NeDGz_rNrbqBjLLP1H9qi"
|
||||
resp = await _smart_fetch("GET", url_fallback, headers=req_headers)
|
||||
data = resp.json() if hasattr(resp, "json") else None
|
||||
|
||||
if data is None or data.get("status") != 200:
|
||||
print(f"[Comix] Bad response on page {page}: {data.get('message') if data else resp.text[:200]}")
|
||||
error_msg = data.get('message') if data else getattr(resp, 'text', '')[:200]
|
||||
print(f"[Comix] Bad response on page {page}: {error_msg}")
|
||||
break
|
||||
|
||||
items = get_nested(data, 'result', 'items', default=[])
|
||||
@@ -137,11 +154,15 @@ async def get_chapters(mal_id: int):
|
||||
for item in all_items:
|
||||
if not isinstance(item, dict): continue
|
||||
num = str(item.get('number', '0'))
|
||||
|
||||
# Safely fallback to any available ID key
|
||||
c_id = item.get('chapter_id') or item.get('id') or item.get('hash_id')
|
||||
|
||||
if num not in seen_numbers:
|
||||
seen_numbers[num] = True
|
||||
formatted.append({
|
||||
"title": item.get('name') or f"Chapter {num}",
|
||||
"url": f"{hash_id}:{slug}:{item.get('chapter_id')}",
|
||||
"url": f"{hash_id}:{slug}:{c_id}",
|
||||
"chapter_number": num,
|
||||
"is_external": False
|
||||
})
|
||||
@@ -185,7 +206,11 @@ async def get_chapter_images(mal_id: int, chapter_num: str):
|
||||
hash_id, slug, chapter_id = target_chapter["url"].split(":")
|
||||
url = f"https://comix.to/title/{hash_id}-{slug}/{chapter_id}-chapter-{chapter_num}"
|
||||
|
||||
resp = await _smart_fetch("GET", url, headers=HEADERS)
|
||||
# Mirror the Referer just as we do for chapters
|
||||
req_headers = HEADERS.copy()
|
||||
req_headers["Referer"] = f"https://comix.to/title/{hash_id}-{slug}"
|
||||
|
||||
resp = await _smart_fetch("GET", url, headers=req_headers)
|
||||
if not resp or not hasattr(resp, "text"): return None
|
||||
|
||||
regex = r'["\\]*images["\\]*\s*:\s*(\[[^\]]*\])'
|
||||
338
modules/weebcentral.module
Normal file
338
modules/weebcentral.module
Normal file
@@ -0,0 +1,338 @@
|
||||
{
|
||||
"name": "WeebCentral",
|
||||
"version": "1.0.0",
|
||||
"author": "Animex",
|
||||
"description": "WeebCentral.com Manga Reader — uses HTMX scraping endpoints for chapters and pages.",
|
||||
"type": "MANGA_READER",
|
||||
"requirements": ["httpx", "beautifulsoup4", "re"]
|
||||
}
|
||||
---
|
||||
import re
|
||||
import inspect
|
||||
import httpx as _httpx_lib
|
||||
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
BeautifulSoup = None
|
||||
|
||||
BASE_URL = "https://weebcentral.com"
|
||||
|
||||
BROWSER_HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html, */*",
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal HTTP helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _get_hybrid():
|
||||
"""Returns the injected HybridClient if present, else None."""
|
||||
candidate = globals().get("httpx")
|
||||
if candidate is not None and candidate is not _httpx_lib:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
async def _get(url, headers=None, timeout=20):
|
||||
"""GET via HybridClient tunnel when available, real httpx otherwise."""
|
||||
hybrid = _get_hybrid()
|
||||
if hybrid is not None:
|
||||
func = getattr(hybrid, "get", None)
|
||||
if func and inspect.iscoroutinefunction(func):
|
||||
return await func(url, headers=headers, timeout=timeout)
|
||||
|
||||
async with _httpx_lib.AsyncClient(follow_redirects=True) as c:
|
||||
return await c.get(url, headers=headers, timeout=timeout)
|
||||
|
||||
|
||||
async def _post_form(url, data: dict, extra_headers: dict = None, timeout=20):
|
||||
"""
|
||||
Form-encoded POST. The HybridClient tunnel only speaks JSON bodies, so
|
||||
form POSTs always go through real httpx directly — this is intentional.
|
||||
"""
|
||||
merged = {**BROWSER_HEADERS, **(extra_headers or {})}
|
||||
merged["Content-Type"] = "application/x-www-form-urlencoded"
|
||||
async with _httpx_lib.AsyncClient(follow_redirects=True) as c:
|
||||
return await c.post(url, data=data, headers=merged, timeout=timeout)
|
||||
|
||||
|
||||
def _parse_html(text: str):
|
||||
if BeautifulSoup is None:
|
||||
raise RuntimeError("[WeebCentral] beautifulsoup4 is not installed.")
|
||||
return BeautifulSoup(text, "html.parser")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MAL title lookup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _get_mal_titles(mal_id: int):
|
||||
"""Returns (romaji_title, english_title) from Jikan, or (None, None)."""
|
||||
url = f"https://api.jikan.moe/v4/manga/{mal_id}"
|
||||
try:
|
||||
resp = await _get(url)
|
||||
if getattr(resp, "status_code", 500) != 200:
|
||||
return None, None
|
||||
data = resp.json() if hasattr(resp, "json") else None
|
||||
if not data:
|
||||
return None, None
|
||||
d = data.get("data", {})
|
||||
return d.get("title"), d.get("title_english")
|
||||
except Exception as e:
|
||||
print(f"[WeebCentral] MAL fetch error: {e}")
|
||||
return None, None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WeebCentral search
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _search(query: str):
|
||||
"""
|
||||
Searches WeebCentral via its HTMX quick-search endpoint.
|
||||
Returns a list of {"id", "title", "url"} dicts.
|
||||
"""
|
||||
if not query:
|
||||
return []
|
||||
|
||||
search_url = f"{BASE_URL}/search/simple?location=main"
|
||||
htmx_headers = {
|
||||
"HX-Request": "true",
|
||||
"HX-Trigger": "quick-search-input",
|
||||
"HX-Trigger-Name": "text",
|
||||
"HX-Target": "quick-search-result",
|
||||
"HX-Current-URL": f"{BASE_URL}/",
|
||||
}
|
||||
|
||||
try:
|
||||
resp = await _post_form(search_url, data={"text": query}, extra_headers=htmx_headers)
|
||||
if getattr(resp, "status_code", 500) != 200:
|
||||
print(f"[WeebCentral] Search returned status {resp.status_code}")
|
||||
return []
|
||||
|
||||
soup = _parse_html(resp.text)
|
||||
results = []
|
||||
|
||||
for a in soup.select("a"):
|
||||
href = a.get("href", "")
|
||||
if "/series/" not in href:
|
||||
continue
|
||||
|
||||
title_el = a.select_one(".flex-1")
|
||||
title = title_el.get_text(strip=True) if title_el else "Unknown"
|
||||
|
||||
id_match = re.search(r"/series/([^/]+)", href)
|
||||
if not id_match:
|
||||
continue
|
||||
manga_id = id_match.group(1)
|
||||
|
||||
# Accept if either string is a substring of the other (case-insensitive)
|
||||
q_lower, t_lower = query.lower(), title.lower()
|
||||
if q_lower in t_lower or t_lower in q_lower:
|
||||
results.append({"id": manga_id, "title": title, "url": href})
|
||||
|
||||
return results
|
||||
except Exception as e:
|
||||
print(f"[WeebCentral] Search error: {e}")
|
||||
return []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Chapter list
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _get_chapters_for_series(manga_id: str):
|
||||
"""
|
||||
Returns a sorted list of chapter dicts:
|
||||
{"id", "title", "chapter_number"}
|
||||
Ordered ascending by chapter number.
|
||||
"""
|
||||
url = f"{BASE_URL}/series/{manga_id}/full-chapter-list"
|
||||
htmx_headers = {
|
||||
"HX-Request": "true",
|
||||
"HX-Target": "chapter-list",
|
||||
"HX-Current-URL": f"{BASE_URL}/series/{manga_id}",
|
||||
"Referer": f"{BASE_URL}/series/{manga_id}",
|
||||
}
|
||||
|
||||
try:
|
||||
resp = await _get(url, headers={**BROWSER_HEADERS, **htmx_headers})
|
||||
if getattr(resp, "status_code", 500) != 200:
|
||||
return []
|
||||
|
||||
soup = _parse_html(resp.text)
|
||||
chapters = []
|
||||
|
||||
for row in soup.select("div.flex.items-center"):
|
||||
a = row.find("a")
|
||||
if not a:
|
||||
continue
|
||||
href = a.get("href", "")
|
||||
|
||||
title_span = a.select_one("span.grow > span")
|
||||
title = title_span.get_text(strip=True) if title_span else ""
|
||||
|
||||
id_match = re.search(r"/chapters/([^/]+)", href)
|
||||
if not id_match:
|
||||
continue
|
||||
chapter_id = id_match.group(1)
|
||||
|
||||
num_match = re.search(r"(\d+(?:\.\d+)?)", title)
|
||||
chapter_num = num_match.group(1) if num_match else "0"
|
||||
|
||||
chapters.append({
|
||||
"id": chapter_id,
|
||||
"title": title,
|
||||
"chapter_number": chapter_num,
|
||||
})
|
||||
|
||||
# Chapters come in descending order from the site — reverse to ascending
|
||||
chapters.reverse()
|
||||
return chapters
|
||||
except Exception as e:
|
||||
print(f"[WeebCentral] Chapter list error: {e}")
|
||||
return []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Page images
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _get_pages(chapter_id: str):
|
||||
"""Returns a list of image URLs for the given chapter ID."""
|
||||
url = f"{BASE_URL}/chapters/{chapter_id}/images?is_prev=False&reading_style=long_strip"
|
||||
htmx_headers = {
|
||||
"HX-Request": "true",
|
||||
"HX-Current-URL": f"{BASE_URL}/chapters/{chapter_id}",
|
||||
"Referer": f"{BASE_URL}/chapters/{chapter_id}",
|
||||
}
|
||||
|
||||
try:
|
||||
resp = await _get(url, headers={**BROWSER_HEADERS, **htmx_headers})
|
||||
if getattr(resp, "status_code", 500) != 200:
|
||||
return []
|
||||
|
||||
soup = _parse_html(resp.text)
|
||||
imgs = soup.select("section.flex-1 img") or soup.find_all("img")
|
||||
pages = [img.get("src") for img in imgs if img.get("src")]
|
||||
return pages
|
||||
except Exception as e:
|
||||
print(f"[WeebCentral] Page fetch error: {e}")
|
||||
return []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API — called by app.py
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _resolve_series_id(mal_id: int):
|
||||
"""Shared helper: MAL ID → WeebCentral series ID, or None."""
|
||||
romaji, english = await _get_mal_titles(mal_id)
|
||||
if not romaji and not english:
|
||||
print("[WeebCentral] Could not resolve titles from MAL.")
|
||||
return None
|
||||
|
||||
for title in filter(None, [english, romaji]):
|
||||
results = await _search(title)
|
||||
if results:
|
||||
sid = results[0]["id"]
|
||||
print(f"[WeebCentral] Matched series '{results[0]['title']}' (id={sid})")
|
||||
return sid
|
||||
print(f"[WeebCentral] No results for '{title}', trying next…")
|
||||
|
||||
print(f"[WeebCentral] No series found for MAL ID {mal_id}.")
|
||||
return None
|
||||
|
||||
|
||||
async def get_chapters(mal_id: int):
|
||||
"""
|
||||
Called by app.py /chapters/{mal_id}.
|
||||
Returns a list of chapter dicts compatible with the app's module interface:
|
||||
[{"title", "url", "chapter_number", "is_external"}, ...]
|
||||
Sorted descending (newest first), or None on failure.
|
||||
"""
|
||||
print(f"[WeebCentral] get_chapters called — MAL {mal_id}")
|
||||
|
||||
series_id = await _resolve_series_id(mal_id)
|
||||
if not series_id:
|
||||
return None
|
||||
|
||||
raw = await _get_chapters_for_series(series_id)
|
||||
if not raw:
|
||||
return None
|
||||
|
||||
# Expose the chapter_id inside `url` so get_chapter_images can re-use it
|
||||
# without a second search round-trip (format: "wc:{series_id}:{chapter_id}")
|
||||
formatted = []
|
||||
for ch in raw:
|
||||
formatted.append({
|
||||
"title": ch["title"],
|
||||
"url": f"wc:{series_id}:{ch['id']}",
|
||||
"chapter_number": ch["chapter_number"],
|
||||
"is_external": False,
|
||||
})
|
||||
|
||||
# Return descending (newest first) — matches convention used by comix
|
||||
formatted.sort(key=lambda x: _safe_float(x["chapter_number"]), reverse=True)
|
||||
return formatted
|
||||
|
||||
|
||||
def _safe_float(v):
|
||||
try:
|
||||
return float(v)
|
||||
except (ValueError, TypeError):
|
||||
return 0.0
|
||||
|
||||
|
||||
async def get_chapter_images(mal_id: int, chapter_num: str):
|
||||
"""
|
||||
Called by app.py /retrieve/{mal_id}/{chapter_num}.
|
||||
Reuses get_chapters() so the MAL -> search round-trip is not repeated.
|
||||
Returns a list of image URL strings, or None on failure.
|
||||
"""
|
||||
print(f"[WeebCentral] get_chapter_images called -- MAL {mal_id}, chapter {chapter_num}")
|
||||
|
||||
# 1. Get full chapter list (handles MAL lookup + search internally)
|
||||
chapters = await get_chapters(mal_id)
|
||||
if not chapters:
|
||||
print("[WeebCentral] Chapter list is empty.")
|
||||
return None
|
||||
|
||||
# 2. Find the target chapter by number
|
||||
target = None
|
||||
try:
|
||||
target_f = float(chapter_num)
|
||||
except (ValueError, TypeError):
|
||||
target_f = None
|
||||
|
||||
for ch in chapters:
|
||||
if target_f is not None:
|
||||
try:
|
||||
if float(ch["chapter_number"]) == target_f:
|
||||
target = ch
|
||||
break
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
if ch["chapter_number"] == str(chapter_num):
|
||||
target = ch
|
||||
break
|
||||
|
||||
if not target:
|
||||
print(f"[WeebCentral] Chapter {chapter_num} not found.")
|
||||
return None
|
||||
|
||||
# 3. Unpack chapter ID from url field ("wc:{series_id}:{chapter_id}")
|
||||
try:
|
||||
_, _series_id, chapter_id = target["url"].split(":")
|
||||
except ValueError:
|
||||
print(f"[WeebCentral] Malformed url field: {target['url']}")
|
||||
return None
|
||||
|
||||
print(f"[WeebCentral] Fetching pages for: {target['title']} (id={chapter_id})")
|
||||
|
||||
# 4. Fetch page images
|
||||
pages = await _get_pages(chapter_id)
|
||||
print(f"[WeebCentral] Found {len(pages)} pages." if pages else "[WeebCentral] No pages extracted.")
|
||||
return pages if pages else None
|
||||
Reference in New Issue
Block a user