fixed and added mcp
This commit is contained in:
155
mcp_server.py
Normal file
155
mcp_server.py
Normal file
@@ -0,0 +1,155 @@
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
# ── Paths ──────────────────────────────────────────────────────────────────
|
||||
project_root = Path(__file__).resolve().parent
|
||||
|
||||
# ── Models / Clients ───────────────────────────────────────────────────────
|
||||
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
|
||||
|
||||
qdrant = QdrantClient(path=str(project_root / "data" / "qdrant_local"))
|
||||
COLLECTION = "apush_chunks"
|
||||
|
||||
with open(project_root / "data" / "processed" / "parent_lookup.json") as f:
|
||||
parent_lookup = json.load(f)
|
||||
|
||||
# ── Config (same as notebook) ──────────────────────────────────────────────
|
||||
TOP_K = 10
|
||||
|
||||
SYSTEM_PROMPT = """You are an expert AP US History tutor helping a student ace their APUSH exam.
|
||||
|
||||
You have access to the search_textbook tool. Call it before answering ANY history question.
|
||||
|
||||
ANSWERING:
|
||||
- Cite inline like (Ch5, p.153) after every specific claim
|
||||
- **Bold** key terms, dates, names, and critical facts
|
||||
- Correct false premises directly — don't reinforce wrong assumptions
|
||||
- If the textbook doesn't cover it, answer from general knowledge and prefix with "Outside textbook:"
|
||||
|
||||
FORMAT — match the question type:
|
||||
- One word/fact → one word
|
||||
- SAQ → 1 focused paragraph, dense with evidence
|
||||
- LEQ/DBQ → full essay: context, thesis, body paragraphs with evidence, nuance
|
||||
- General question → clear prose, as long as needed
|
||||
|
||||
END EVERY RESPONSE WITH:
|
||||
---
|
||||
**Sources Used:**
|
||||
[list every source from the tool output with chapter, section, page, and score]
|
||||
**Retrieval Confidence:** HIGH/MEDIUM/LOW"""
|
||||
|
||||
# ── Embed ──────────────────────────────────────────────────────────────────
|
||||
def embed_query(query: str) -> list[float]:
|
||||
return model.encode(
|
||||
f"search_query: {query}",
|
||||
normalize_embeddings=True,
|
||||
).tolist()
|
||||
|
||||
# ── Retrieve (same as notebook) ────────────────────────────────────────────
|
||||
def retrieve(query: str) -> dict:
|
||||
hits = qdrant.query_points(
|
||||
collection_name=COLLECTION,
|
||||
query=embed_query(query),
|
||||
limit=TOP_K,
|
||||
query_filter=Filter(
|
||||
must_not=[
|
||||
FieldCondition(key="is_chapter_review", match=MatchValue(value=True))
|
||||
]
|
||||
),
|
||||
).points
|
||||
|
||||
top_score = hits[0].score if hits else 0
|
||||
if top_score >= 0.70:
|
||||
confidence = "HIGH"
|
||||
elif top_score >= 0.50:
|
||||
confidence = "MEDIUM"
|
||||
else:
|
||||
confidence = "LOW"
|
||||
|
||||
# Deduplicate by parent_id
|
||||
seen_parents = set()
|
||||
unique_hits = []
|
||||
for h in hits:
|
||||
pid = h.payload["parent_id"]
|
||||
if pid not in seen_parents:
|
||||
seen_parents.add(pid)
|
||||
unique_hits.append(h)
|
||||
|
||||
unique_hits = unique_hits[:5]
|
||||
|
||||
sources = []
|
||||
for h in unique_hits:
|
||||
pid = h.payload["parent_id"]
|
||||
parts = parent_lookup.get(pid, [])
|
||||
full_text = "\n\n".join(p["text"] for p in parts)
|
||||
|
||||
sources.append({
|
||||
"score": h.score,
|
||||
"chapter_num": h.payload["chapter_num"],
|
||||
"chapter_title": h.payload["chapter_title"],
|
||||
"section_title": h.payload["section_title"],
|
||||
"textbook_page": h.payload["textbook_page"],
|
||||
"text": full_text,
|
||||
})
|
||||
|
||||
return {
|
||||
"query": query,
|
||||
"confidence": confidence,
|
||||
"top_score": top_score,
|
||||
"sources": sources,
|
||||
}
|
||||
|
||||
# ── MCP Server ─────────────────────────────────────────────────────────────
|
||||
mcp = FastMCP("APUSH Tutor")
|
||||
|
||||
@mcp.tool()
|
||||
def search_textbook(query: str) -> str:
|
||||
"""
|
||||
Search the AP US History textbook for relevant passages.
|
||||
Use this for any question about US history before answering.
|
||||
Always cite sources inline and list all sources at the end.
|
||||
Bold or emphasize the most important phrases in your answer.
|
||||
"""
|
||||
retrieved = retrieve(query)
|
||||
|
||||
if not retrieved["sources"]:
|
||||
return "No relevant passages found in the textbook."
|
||||
|
||||
header = f"[Confidence: {retrieved['confidence']} | Top score: {retrieved['top_score']:.3f}]\n\n"
|
||||
|
||||
passages = "\n\n---\n\n".join(
|
||||
f"[SOURCE {i+1} | Ch{s['chapter_num']} › {s['section_title']} › p.{s['textbook_page']} | score: {s['score']:.3f}]\n{s['text']}"
|
||||
for i, s in enumerate(retrieved["sources"])
|
||||
)
|
||||
|
||||
footer = "\n\n===SOURCES===\n" + "\n".join(
|
||||
f"[{i+1}] Ch{s['chapter_num']} › {s['section_title']} › p.{s['textbook_page']} (score: {s['score']:.3f})"
|
||||
for i, s in enumerate(retrieved["sources"])
|
||||
)
|
||||
|
||||
return header + passages + footer
|
||||
|
||||
@mcp.prompt()
|
||||
def system_prompt() -> str:
|
||||
"""The APUSH tutor system prompt."""
|
||||
return SYSTEM_PROMPT
|
||||
|
||||
# ── Run ────────────────────────────────────────────────────────────────────
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
from starlette.middleware.cors import CORSMiddleware
|
||||
|
||||
app = mcp.streamable_http_app()
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
uvicorn.run(app, host="127.0.0.1", port=52437)
|
||||
Reference in New Issue
Block a user