Spaces:

marcsixtysix
/

rag_chat_

Sleeping

App Files Files Community

mryt66 commited on Sep 11

Commit

b4f3a10

1 Parent(s): a840639

Initial commit

Browse files

Files changed (1) hide show

api.py +24 -95

api.py CHANGED Viewed

@@ -7,10 +7,8 @@ import numpy as np
 from datetime import datetime
 from contextlib import asynccontextmanager
-from fastapi import FastAPI, Depends, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-from sqlalchemy import Column, Integer, Text, DateTime, create_engine
-from sqlalchemy.orm import declarative_base, sessionmaker, Session
 from pydantic import BaseModel
 import uvicorn
 from starlette.concurrency import run_in_threadpool
@@ -18,7 +16,7 @@ import subprocess, sys
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-# Always use local data directory (no env var logic)
 DATA_DIR = os.path.join(SCRIPT_DIR, "data")
 os.makedirs(DATA_DIR, exist_ok=True)
@@ -26,25 +24,6 @@ OUTPUT_CHUNKS_FILE = os.path.join(SCRIPT_DIR, "output_chunks.jsonl")
 RAG_CONFIG_FILE = os.path.join(SCRIPT_DIR, "rag_prompt_config.jsonl")
 FAISS_INDEX_FILE = os.path.join(DATA_DIR, "faiss_index.index")
 EMBEDDINGS_FILE = os.path.join(DATA_DIR, "chunk_embeddings.npy")
-DATABASE_URL = f"sqlite:///{os.path.join(DATA_DIR, 'conversations.db')}"
-Base = declarative_base()
-engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
-SessionLocal = sessionmaker(bind=engine)
-# Database model
-class Conversation(Base):
-    __tablename__ = "conversations"
-    id = Column(Integer, primary_key=True, index=True)
-    query = Column(Text)
-    response = Column(Text)
-    context = Column(Text)
-    base_context = Column(Text)
-    system_prompt = Column(Text)
-    full_prompt = Column(Text)
-    timestamp = Column(DateTime, default=datetime.utcnow)
 # Pydantic models for API
@@ -65,34 +44,23 @@ class ChatRequest(BaseModel):
 # Lifespan function to handle startup and shutdown
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    # Startup
-    print("Starting RAG Chat API...")
-    print(f"SQLite DB path: {os.path.join(DATA_DIR, 'conversations.db')}")
-    # Ensure tables now that directory is confirmed writable
-    Base.metadata.create_all(bind=engine)
-    # Configure Gemini here (fail early but at startup)
     API_KEY = os.getenv("GEMINI_API_KEY")
     if not API_KEY:
         raise RuntimeError("Please set GEMINI_API_KEY environment variable")
     genai.configure(api_key=API_KEY)
-    try:
-        success, chunks_count = initialize_system()
-        if success:
-            print(f"✅ RAG system initialized successfully with {chunks_count} chunks")
-            print("API ready at: http://localhost:8000")
-            print("API documentation at: http://localhost:8000/docs")
-        else:
-            print("❌ Failed to initialize RAG system")
-            raise RuntimeError("System initialization failed")
-    except Exception as e:
-        print(f"❌ Initialization error: {str(e)}")
-        raise RuntimeError(f"System initialization failed: {str(e)}")
-    yield  # This is where the app runs
-    # Shutdown (if needed)
     print("Shutting down RAG Chat API...")
@@ -135,13 +103,7 @@ system_prompt = None
 model_embedding = None
-# Dependency to get database session
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
 def load_chunks(json_file):
@@ -278,11 +240,9 @@ def run_generate_rag_data():
 def initialize_system():
-    """Initialize the RAG system with precomputed embeddings"""
     global chunks_data, base_chunk, system_prompt, model_embedding
     try:
-        # If embeddings or required JSON files are missing, (re)generate data first.
         need_generation = (
             not os.path.exists(EMBEDDINGS_FILE)
             or not os.path.exists(OUTPUT_CHUNKS_FILE)
@@ -292,72 +252,41 @@ def initialize_system():
             print("RAG data or embeddings missing. Triggering data generation...")
             run_generate_rag_data()
-        # Initialize embedding model
         print("Loading embedding model...")
         model_embedding = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
-        # Load configurations
         print("Loading chunks and configuration...")
         chunks_data = load_chunks(OUTPUT_CHUNKS_FILE)
         config = load_chunks(RAG_CONFIG_FILE)[0]
         base_chunk = config["base_chunk"]
         system_prompt = config["system_prompt"]
         print(f"Loaded {len(chunks_data)} chunks from knowledge base")
-        # Precompute embeddings once (will compute if file absent)
         compute_and_cache_embeddings(chunks_data)
-        print("System initialized successfully!")
         return True, len(chunks_data)
     except Exception as e:
         print(f"Failed to initialize system: {e}")
         return False, 0
 @app.post("/chat", response_model=ChatResponse)
-async def chat_endpoint(payload: ChatRequest, db: Session = Depends(get_db)):
-    """Chat endpoint that processes queries and saves conversations to database
-    Accepts a JSON body: {"query": "..."
-    """
     global base_chunk, system_prompt
     query = (payload.query or "").strip()
     if not query:
         raise HTTPException(status_code=400, detail="Query cannot be empty")
-    try:
-        # Construct prompt and get answer
-        history_text = _format_history(payload.history)
-        full_prompt, context = construct_prompt(
-            base_chunk, system_prompt, query, history_text
-        )
-        # Avoid blocking the event loop with a sync network call
-        answer = await run_in_threadpool(get_answer, full_prompt)
-        if not answer:
-            answer = "Sorry, I failed to get a response from Gemini. Please try again."
-        # Save conversation to database
-        conversation = Conversation(
-            query=query,
-            response=answer,
-            context=context,
-            base_context=base_chunk["content"],
-            system_prompt=system_prompt["content"],
-            full_prompt=full_prompt,
-        )
-        db.add(conversation)
-        db.commit()
-        return ChatResponse(response=answer, timestamp=conversation.timestamp)
-    except Exception as e:
-        db.rollback()
-        raise HTTPException(status_code=500, detail=f"Chat processing error: {str(e)}")
 # Simple health probe

 from datetime import datetime
 from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import uvicorn
 from starlette.concurrency import run_in_threadpool
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+# Always use local data directory (no env var logic and no DB)
 DATA_DIR = os.path.join(SCRIPT_DIR, "data")
 os.makedirs(DATA_DIR, exist_ok=True)
 RAG_CONFIG_FILE = os.path.join(SCRIPT_DIR, "rag_prompt_config.jsonl")
 FAISS_INDEX_FILE = os.path.join(DATA_DIR, "faiss_index.index")
 EMBEDDINGS_FILE = os.path.join(DATA_DIR, "chunk_embeddings.npy")
 # Pydantic models for API
 # Lifespan function to handle startup and shutdown
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    # Startup (no DB setup anymore)
+    print("Starting RAG Chat API (stateless, no database)...")
     API_KEY = os.getenv("GEMINI_API_KEY")
     if not API_KEY:
         raise RuntimeError("Please set GEMINI_API_KEY environment variable")
     genai.configure(api_key=API_KEY)
+    success, chunks_count = initialize_system()
+    if success:
+        print(f"✅ RAG system initialized with {chunks_count} chunks")
+        print("API ready at: http://localhost:8000 (docs at /docs)")
+    else:
+        raise RuntimeError("System initialization failed")
+    yield
     print("Shutting down RAG Chat API...")
 model_embedding = None
+# Removed database session dependency (stateless mode)
 def load_chunks(json_file):
 def initialize_system():
+    """Initialize the RAG system with precomputed embeddings (stateless)."""
     global chunks_data, base_chunk, system_prompt, model_embedding
     try:
         need_generation = (
             not os.path.exists(EMBEDDINGS_FILE)
             or not os.path.exists(OUTPUT_CHUNKS_FILE)
             print("RAG data or embeddings missing. Triggering data generation...")
             run_generate_rag_data()
         print("Loading embedding model...")
         model_embedding = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
         print("Loading chunks and configuration...")
         chunks_data = load_chunks(OUTPUT_CHUNKS_FILE)
         config = load_chunks(RAG_CONFIG_FILE)[0]
         base_chunk = config["base_chunk"]
         system_prompt = config["system_prompt"]
         print(f"Loaded {len(chunks_data)} chunks from knowledge base")
         compute_and_cache_embeddings(chunks_data)
+        print("System initialized successfully (stateless mode)")
         return True, len(chunks_data)
     except Exception as e:
         print(f"Failed to initialize system: {e}")
         return False, 0
 @app.post("/chat", response_model=ChatResponse)
+async def chat_endpoint(payload: ChatRequest):
+    """Chat endpoint that processes queries (no persistence)."""
     global base_chunk, system_prompt
     query = (payload.query or "").strip()
     if not query:
         raise HTTPException(status_code=400, detail="Query cannot be empty")
+    history_text = _format_history(payload.history)
+    full_prompt, _context = construct_prompt(
+        base_chunk, system_prompt, query, history_text
+    )
+    answer = await run_in_threadpool(get_answer, full_prompt)
+    if not answer:
+        answer = "Sorry, I failed to get a response from Gemini. Please try again."
+    return ChatResponse(response=answer, timestamp=datetime.utcnow())
 # Simple health probe