Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
|
| 2 |
import gradio as gr
|
| 3 |
import nltk
|
| 4 |
from nltk.tokenize import sent_tokenize
|
|
@@ -10,15 +9,15 @@ import openai
|
|
| 10 |
|
| 11 |
# Set up OpenAI API key
|
| 12 |
openai.api_key = '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'
|
|
|
|
| 13 |
# Download NLTK data
|
| 14 |
nltk.download('punkt')
|
| 15 |
-
nltk.download('punkt_tab')
|
| 16 |
|
| 17 |
# Load the tokenizer and model
|
| 18 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
| 19 |
model = AutoModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
| 20 |
|
| 21 |
-
manual_path="ubuntu_manual.txt"
|
| 22 |
|
| 23 |
# Load the Ubuntu manual from a .txt file
|
| 24 |
with open(manual_path, "r", encoding="utf-8") as file:
|
|
@@ -64,17 +63,22 @@ dimension = chunk_embeddings_np.shape[1]
|
|
| 64 |
index = faiss.IndexFlatL2(dimension)
|
| 65 |
index.add(chunk_embeddings_np)
|
| 66 |
|
| 67 |
-
# Function to retrieve relevant chunks for a user query
|
| 68 |
def retrieve_chunks(query, k=5):
|
| 69 |
query_embedding = embed_text([query])
|
| 70 |
distances, indices = index.search(query_embedding, k=k)
|
| 71 |
valid_indices = [i for i in indices[0] if i < len(manual_chunks)]
|
| 72 |
relevant_chunks = [manual_chunks[i] for i in valid_indices]
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
# Function to perform RAG: Retrieve chunks and generate a response using GPT-3.5
|
| 76 |
def rag_response_gpt3_5(query, k=3, max_tokens=150):
|
| 77 |
-
relevant_chunks = retrieve_chunks(query, k=k)
|
| 78 |
if not relevant_chunks:
|
| 79 |
return "Sorry, I couldn't find relevant information."
|
| 80 |
|
|
@@ -101,10 +105,17 @@ def rag_response_gpt3_5(query, k=3, max_tokens=150):
|
|
| 101 |
return response.choices[0].message['content'].strip()
|
| 102 |
|
| 103 |
# Chat history to maintain conversation context
|
| 104 |
-
history = []
|
| 105 |
-
|
| 106 |
-
# Define Gradio interface function with chat history
|
| 107 |
def chatbot(query, history):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
response = rag_response_gpt3_5(query)
|
| 109 |
history.append((query, response))
|
| 110 |
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import nltk
|
| 3 |
from nltk.tokenize import sent_tokenize
|
|
|
|
| 9 |
|
| 10 |
# Set up OpenAI API key
|
| 11 |
openai.api_key = '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'
|
| 12 |
+
|
| 13 |
# Download NLTK data
|
| 14 |
nltk.download('punkt')
|
|
|
|
| 15 |
|
| 16 |
# Load the tokenizer and model
|
| 17 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
| 18 |
model = AutoModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
| 19 |
|
| 20 |
+
manual_path = "ubuntu_manual.txt"
|
| 21 |
|
| 22 |
# Load the Ubuntu manual from a .txt file
|
| 23 |
with open(manual_path, "r", encoding="utf-8") as file:
|
|
|
|
| 63 |
index = faiss.IndexFlatL2(dimension)
|
| 64 |
index.add(chunk_embeddings_np)
|
| 65 |
|
| 66 |
+
# Function to retrieve relevant chunks for a user query and print indices and distances
|
| 67 |
def retrieve_chunks(query, k=5):
|
| 68 |
query_embedding = embed_text([query])
|
| 69 |
distances, indices = index.search(query_embedding, k=k)
|
| 70 |
valid_indices = [i for i in indices[0] if i < len(manual_chunks)]
|
| 71 |
relevant_chunks = [manual_chunks[i] for i in valid_indices]
|
| 72 |
+
|
| 73 |
+
# Print indices and distances
|
| 74 |
+
for i, idx in enumerate(valid_indices):
|
| 75 |
+
print(f"Index: {idx}, Distance: {distances[0][i]}")
|
| 76 |
+
|
| 77 |
+
return relevant_chunks, indices[0], distances[0]
|
| 78 |
|
| 79 |
# Function to perform RAG: Retrieve chunks and generate a response using GPT-3.5
|
| 80 |
def rag_response_gpt3_5(query, k=3, max_tokens=150):
|
| 81 |
+
relevant_chunks, indices, distances = retrieve_chunks(query, k=k)
|
| 82 |
if not relevant_chunks:
|
| 83 |
return "Sorry, I couldn't find relevant information."
|
| 84 |
|
|
|
|
| 105 |
return response.choices[0].message['content'].strip()
|
| 106 |
|
| 107 |
# Chat history to maintain conversation context
|
|
|
|
|
|
|
|
|
|
| 108 |
def chatbot(query, history):
|
| 109 |
+
if history is None:
|
| 110 |
+
history = []
|
| 111 |
+
|
| 112 |
+
# Retrieve relevant chunks along with their indices and distances
|
| 113 |
+
relevant_chunks, indices, distances = retrieve_chunks(query)
|
| 114 |
+
|
| 115 |
+
# Print the indices and distances of the retrieved chunks
|
| 116 |
+
print(f"Retrieved Indices: {indices}")
|
| 117 |
+
print(f"Retrieved Distances: {distances}")
|
| 118 |
+
|
| 119 |
response = rag_response_gpt3_5(query)
|
| 120 |
history.append((query, response))
|
| 121 |
|