Spaces:

Sbnos
/

medchat2

Running on CPU Upgrade

App Files Files Community

Sbnos commited on 12 days ago

Commit

bcf3e78

1 Parent(s): 35e79b2

adding other specialties

Browse files

Files changed (1) hide show

app.py +270 -117

app.py CHANGED Viewed

@@ -4,142 +4,295 @@ from together import Together
 from langchain_community.vectorstores import Chroma
 from langchain_huggingface import HuggingFaceEmbeddings
-# --- Configuration ---
-# TogetherAI API key (env var name pilotikval)
 TOGETHER_API_KEY = os.environ.get("pilotikval")
 if not TOGETHER_API_KEY:
-    st.error("Missing pilotikval environment variable.")
     st.stop()
 # Initialize TogetherAI client
-client = Together(api_key=TOGETHER_API_KEY)
-# Embeddings setup
-EMBED_MODEL_NAME = "BAAI/bge-base-en"
-embeddings = HuggingFaceEmbeddings(
-    model_name=EMBED_MODEL_NAME,
-    encode_kwargs={"normalize_embeddings": True},
-)
-# Sidebar: select collection
-st.sidebar.title("DocChatter RAG")
-collection = st.sidebar.selectbox(
-    "Choose a document collection:",
-    ['General Medicine', 'RespiratoryFishman', 'RespiratoryMurray', 'MedMRCP2', 'OldMedicine']
-)
-dirs = {
-    'General Medicine': './oxfordmedbookdir/',
-    'RespiratoryFishman': './respfishmandbcud/',
-    'RespiratoryMurray': './respmurray/',
-    'MedMRCP2': './medmrcp2store/',
-    'OldMedicine': './mrcpchromadb/'
-}
-cols = {
-    'General Medicine': 'oxfordmed',
-    'RespiratoryFishman': 'fishmannotescud',
-    'RespiratoryMurray': 'respmurraynotes',
-    'MedMRCP2': 'medmrcp2notes',
-    'OldMedicine': 'mrcppassmednotes'
-}
-persist_directory = dirs[collection]
-collection_name = cols[collection]
-# Load Chroma vector store
-vectorstore = Chroma(
-    collection_name=collection_name,
-    persist_directory=persist_directory,
-    embedding_function=embeddings
-)
-retriever = vectorstore.as_retriever(search_kwargs={"k": 20})  # k=20
-# System prompt template
-def build_system(context: str) -> dict:
-    """
-    Build a comprehensive system prompt:
-    - Act as an expert medical assistant and attentive listener.
-    - Leverage retrieved context to craft detailed, accurate, and empathetic responses.
-    - Ask clarifying follow-up questions if the user's query is ambiguous.
-    - Structure answers clearly with headings, bullet points, and step-by-step explanations.
-    - Cite relevant context sections when appropriate.
-    - Maintain conversational memory for follow-up continuity.
-    """
-    prompt = f"""
-You are a world-class medical assistant and conversational partner.
-Listen carefully to the user’s questions, reference the context below, and provide a thorough, evidence-based response.
-If any part of the question is unclear, ask a clarifying question before proceeding.
-Organize your answer with clear headings or bullet points, and refer back to specific context snippets as needed.
-Always be empathetic, concise, and precise in your medical explanations.
-Retain memory of previous user messages to support follow-up interactions.
-=== Retrieved Context Start ===
 {context}
-=== Retrieved Context End ===
 """
     return {"role": "system", "content": prompt}
-st.title("🩺 DocChatter RAG (Streaming & Memory)")
-# Initialize chat history
-if 'chat_history' not in st.session_state:
-    st.session_state.chat_history = []  # list of dicts {role, content}
-# Get user input at top level
-user_prompt = st.chat_input("Ask anything about your docs…")
-# Tabs for UI
-chat_tab, clear_tab = st.tabs(["Chat", "Clear History"])
-with chat_tab:
-    # Display existing chat
-    for msg in st.session_state.chat_history:
-        st.chat_message(msg['role']).write(msg['content'])
-    # Handle new user input
-    if user_prompt:
-        # Echo user
-        st.chat_message("user").write(user_prompt)
-        st.session_state.chat_history.append({"role": "user", "content": user_prompt})
-        # Retrieve top-k documents
         try:
-            docs = retriever.invoke({"query": user_prompt})
-        except Exception:
-            docs = retriever.get_relevant_documents(user_prompt)
-        context = "\n---\n".join([d.page_content for d in docs])
-        # Build TogetherAI message sequence
-        messages = [build_system(context)]
-        for m in st.session_state.chat_history:
-            messages.append(m)
-        # Stream assistant response
-        response_container = st.chat_message("assistant")
-        stream_placeholder = response_container.empty()
-        answer = ""
-        for token in client.chat.completions.create(
-            model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
-            messages=messages,
-            max_tokens=22048,
-            temperature=0.1,
-            stream=True
-        ):
-            try:
-                choice = token.choices[0]
-                delta = getattr(choice.delta, 'content', '')
-                if delta:
-                    answer += delta
-                    stream_placeholder.write(answer)
-            except (IndexError, AttributeError):
-                continue
-        # Save assistant response
-        st.session_state.chat_history.append({"role": "assistant", "content": answer})
-with clear_tab:
-    if st.button("🗑️ Clear chat history"):
-        st.session_state.chat_history = []
-        st.experimental_rerun()

 from langchain_community.vectorstores import Chroma
 from langchain_huggingface import HuggingFaceEmbeddings
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+# Vector store configurations
+VECTOR_STORES = {
+    "Paediatrics": {
+        "collection_name": "paedia",
+        "persist_directory": "nelsonpaedia"
+    },
+    "Respiratory": {
+        "collection_name": "respmurraynotes",
+        "persist_directory": "respmurray"
+    },
+    "Dermatology": {
+        "collection_name": "derma",
+        "persist_directory": "rookderma"
+    },
+    "Endocrine": {
+        "collection_name": "endocrine",
+        "persist_directory": "williamsendocrine"
+    },
+    "Gastroenterology": {
+        "collection_name": "gastro",
+        "persist_directory": "yamadagastro"
+    },
+    "Surgery": {
+        "collection_name": "gensurgery",
+        "persist_directory": "baileysurgery"
+    },
+    "Neurology": {
+        "collection_name": "neuro",
+        "persist_directory": "bradleyneuro"
+    },
+    "Cardiology": {
+        "collection_name": "cardiobraun",
+        "persist_directory": "braunwaldcardiofin"
+    },
+    "Nephrology": {
+        "collection_name": "nephro",
+        "persist_directory": "brennernephro"
+    },
+    "Orthopedics": {
+        "collection_name": "oportho",
+        "persist_directory": "campbellorthop"
+    },
+    "Rheumatology": {
+        "collection_name": "rheumatology",
+        "persist_directory": "firesteinrheumatology"
+    }
+}
+# Model configurations
+EMBED_MODEL = "BAAI/bge-base-en"
+LLM_MODEL = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
+RETRIEVAL_K = 20
+# ============================================================================
+# PAGE CONFIG
+# ============================================================================
+st.set_page_config(
+    page_title="DocChatter Medical RAG",
+    page_icon="🩺",
+    layout="wide"
+)
+# ============================================================================
+# INITIALIZATION
+# ============================================================================
+# Check API key
 TOGETHER_API_KEY = os.environ.get("pilotikval")
 if not TOGETHER_API_KEY:
+    st.error("❌ Missing 'pilotikval' environment variable. Please set your TogetherAI API key.")
     st.stop()
 # Initialize TogetherAI client
+@st.cache_resource
+def get_together_client():
+    return Together(api_key=TOGETHER_API_KEY)
+client = get_together_client()
+# Initialize embeddings
+@st.cache_resource
+def get_embeddings():
+    return HuggingFaceEmbeddings(
+        model_name=EMBED_MODEL,
+        encode_kwargs={"normalize_embeddings": True}
+    )
+embeddings = get_embeddings()
+# ============================================================================
+# SESSION STATE
+# ============================================================================
+if 'chat_history' not in st.session_state:
+    st.session_state.chat_history = []
+if 'selected_collection' not in st.session_state:
+    st.session_state.selected_collection = list(VECTOR_STORES.keys())[0]
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+@st.cache_resource
+def load_vectorstore(_embeddings, collection_name, persist_directory):
+    """Load and cache vector store"""
+    vectorstore = Chroma(
+        collection_name=collection_name,
+        persist_directory=persist_directory,
+        embedding_function=_embeddings
+    )
+    return vectorstore.as_retriever(search_kwargs={"k": RETRIEVAL_K})
+def build_system_prompt(context: str) -> dict:
+    """Build system prompt with retrieved context"""
+    prompt = f"""You are an expert medical assistant with access to authoritative medical literature.
+Your role:
+- Provide accurate, evidence-based medical information
+- Answer questions clearly and comprehensively
+- Ask clarifying questions if needed
+- Use the context below to support your answers
+- Be empathetic and professional
+- Remember previous messages in the conversation
+Retrieved Context:
 {context}
+Instructions:
+- Base your answers on the provided context
+- If the context doesn't contain relevant information, acknowledge this
+- Structure complex answers with clear organization
+- Cite specific information when referencing the context
 """
     return {"role": "system", "content": prompt}
+def stream_llm_response(messages):
+    """Stream response from TogetherAI"""
+    response = ""
+    for chunk in client.chat.completions.create(
+        model=LLM_MODEL,
+        messages=messages,
+        max_tokens=4096,
+        temperature=0.1,
+        stream=True
+    ):
+        try:
+            if chunk.choices[0].delta.content:
+                response += chunk.choices[0].delta.content
+                yield response
+        except (IndexError, AttributeError):
+            continue
+# ============================================================================
+# SIDEBAR
+# ============================================================================
+with st.sidebar:
+    st.title("🩺 DocChatter Medical RAG")
+    st.markdown("---")
+    # Collection selector
+    st.subheader("📚 Select Medical Specialty")
+    selected = st.selectbox(
+        "Choose a collection:",
+        options=list(VECTOR_STORES.keys()),
+        index=list(VECTOR_STORES.keys()).index(st.session_state.selected_collection),
+        key="collection_selector"
+    )
+    if selected != st.session_state.selected_collection:
+        st.session_state.selected_collection = selected
+        st.rerun()
+    st.markdown("---")
+    # Stats
+    st.subheader("📊 Session Info")
+    st.metric("Messages", len(st.session_state.chat_history))
+    st.metric("Current Collection", selected)
+    st.markdown("---")
+    # Clear button
+    if st.button("🗑️ Clear Chat History", use_container_width=True):
+        st.session_state.chat_history = []
+        st.rerun()
+    st.markdown("---")
+    st.caption("Powered by TogetherAI & LangChain")
+# ============================================================================
+# MAIN CHAT INTERFACE
+# ============================================================================
+st.title("💬 Medical Document Chat")
+st.caption(f"Currently using: **{st.session_state.selected_collection}** collection")
+# Load retriever for selected collection
+config = VECTOR_STORES[st.session_state.selected_collection]
+retriever = load_vectorstore(
+    embeddings,
+    config["collection_name"],
+    config["persist_directory"]
+)
+# Display chat history
+for i, message in enumerate(st.session_state.chat_history):
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+        # Add copy button for assistant messages
+        if message["role"] == "assistant":
+            st.button(
+                "📋 Copy",
+                key=f"copy_{i}",
+                on_click=lambda msg=message["content"]: st.toast("Copied to clipboard! (Use Ctrl+C to copy manually)"),
+                help="Click to copy this response"
+            )
+# Chat input
+user_input = st.chat_input("Ask me anything about medical topics...")
+if user_input:
+    # Add user message
+    st.session_state.chat_history.append({
+        "role": "user",
+        "content": user_input
+    })
+    # Display user message
+    with st.chat_message("user"):
+        st.markdown(user_input)
+    # Retrieve relevant documents
+    with st.spinner("🔍 Searching medical literature..."):
         try:
+            docs = retriever.invoke(user_input)
+        except:
+            docs = retriever.get_relevant_documents(user_input)
+        context = "\n\n---\n\n".join([doc.page_content for doc in docs])
+    # Build messages for LLM
+    messages = [build_system_prompt(context)]
+    # Add chat history
+    for msg in st.session_state.chat_history:
+        messages.append({
+            "role": msg["role"],
+            "content": msg["content"]
+        })
+    # Stream assistant response
+    with st.chat_message("assistant"):
+        response_placeholder = st.empty()
+        full_response = ""
+        for response_chunk in stream_llm_response(messages):
+            full_response = response_chunk
+            response_placeholder.markdown(full_response + "▌")
+        response_placeholder.markdown(full_response)
+        # Add copy button
+        copy_button_key = f"copy_{len(st.session_state.chat_history)}"
+        st.button(
+            "📋 Copy",
+            key=copy_button_key,
+            on_click=lambda: st.toast("Response ready to copy! (Use Ctrl+C)"),
+            help="Click to copy this response"
+        )
+    # Save assistant response
+    st.session_state.chat_history.append({
+        "role": "assistant",
+        "content": full_response
+    })
+    st.rerun()
+# ============================================================================
+# FOOTER
+# ============================================================================
+st.markdown("---")
+st.caption("⚠️ This is an AI assistant. Always consult qualified healthcare professionals for medical advice.")