danielchrism commited on
Commit
ab17058
·
1 Parent(s): 7743ba8

first commit

Browse files
Files changed (41) hide show
  1. .gitignore +0 -0
  2. .vscode/launch.json +12 -0
  3. .vscode/settings.json +8 -0
  4. README.md +2 -2
  5. __pycache__/app.cpython-311.pyc +0 -0
  6. __pycache__/app.cpython-312.pyc +0 -0
  7. __pycache__/google_search.cpython-311.pyc +0 -0
  8. __pycache__/google_search.cpython-312.pyc +0 -0
  9. __pycache__/run.cpython-312.pyc +0 -0
  10. app.py +143 -0
  11. curl.exe +3 -0
  12. google_search.py +26 -0
  13. models/__init__.py +4 -0
  14. models/__pycache__/__init__.cpython-311.pyc +0 -0
  15. models/__pycache__/__init__.cpython-312.pyc +0 -0
  16. models/__pycache__/claim_extractor.cpython-311.pyc +0 -0
  17. models/__pycache__/claim_extractor.cpython-312.pyc +0 -0
  18. models/__pycache__/evidence_retriever.cpython-311.pyc +0 -0
  19. models/__pycache__/evidence_retriever.cpython-312.pyc +0 -0
  20. models/__pycache__/keyword_extractor.cpython-311.pyc +0 -0
  21. models/__pycache__/keyword_extractor.cpython-312.pyc +0 -0
  22. models/__pycache__/nli_classifier.cpython-311.pyc +0 -0
  23. models/__pycache__/nli_classifier.cpython-312.pyc +0 -0
  24. models/claim_extractor.py +34 -0
  25. models/evidence_retriever.py +102 -0
  26. models/keyword_extractor.py +40 -0
  27. models/nli_classifier.py +93 -0
  28. requirements.txt +11 -0
  29. run.py +17 -0
  30. static/css/style.css +64 -0
  31. static/js/main.js +141 -0
  32. templates/index.html +96 -0
  33. utils/__init__.py +3 -0
  34. utils/__pycache__/__init__.cpython-311.pyc +0 -0
  35. utils/__pycache__/__init__.cpython-312.pyc +0 -0
  36. utils/__pycache__/config.cpython-311.pyc +0 -0
  37. utils/__pycache__/config.cpython-312.pyc +0 -0
  38. utils/__pycache__/similarity.cpython-311.pyc +0 -0
  39. utils/__pycache__/similarity.cpython-312.pyc +0 -0
  40. utils/config.py +24 -0
  41. utils/similarity.py +43 -0
.gitignore ADDED
Binary file (16 Bytes). View file
 
.vscode/launch.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.2.0",
3
+ "configurations": [
4
+ {
5
+ "name": "TruthCheck Gradio",
6
+ "type": "python",
7
+ "request": "launch",
8
+ "program": "run.py",
9
+ "console": "integratedTerminal"
10
+ }
11
+ ]
12
+ }
.vscode/settings.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "python.defaultInterpreterPath": "./venv/bin/python",
3
+ "python.terminal.activateEnvironment": true,
4
+ "files.exclude": {
5
+ "**/__pycache__": true,
6
+ "**/*.pyc": true
7
+ }
8
+ }
README.md CHANGED
@@ -1,2 +1,2 @@
1
- # truth-check
2
-
 
1
+ # TruthCheck-App
2
+ An intelligent fact-checking tool would not only help users verify the accuracy of digital content instantly but also foster greater transparency and accountability in online communication..
__pycache__/app.cpython-311.pyc ADDED
Binary file (7.04 kB). View file
 
__pycache__/app.cpython-312.pyc ADDED
Binary file (6.23 kB). View file
 
__pycache__/google_search.cpython-311.pyc ADDED
Binary file (1.99 kB). View file
 
__pycache__/google_search.cpython-312.pyc ADDED
Binary file (1.57 kB). View file
 
__pycache__/run.cpython-312.pyc ADDED
Binary file (4.82 kB). View file
 
app.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py - Main Flask application
2
+
3
+ from flask import Flask, render_template, request, jsonify
4
+ import os
5
+ import threading
6
+ import time
7
+
8
+ # Import all necessary components from your models and utils directories
9
+ from models.claim_extractor import ClaimExtractor
10
+ from models.keyword_extractor import KeywordExtractor
11
+ from models.evidence_retriever import EvidenceRetriever
12
+ from models.nli_classifier import NLIClassifier
13
+ from utils.similarity import calculate_similarity
14
+ from utils.config import Config
15
+
16
+ # Initialize models globally or within a singleton pattern for efficiency
17
+ claim_extractor = ClaimExtractor()
18
+ keyword_extractor = KeywordExtractor()
19
+ evidence_retriever = EvidenceRetriever()
20
+ nli_classifier = NLIClassifier()
21
+
22
+ class TruthCheckSystem:
23
+ def __init__(self):
24
+ self.claim_extractor = claim_extractor
25
+ self.keyword_extractor = keyword_extractor
26
+ self.evidence_retriever = evidence_retriever
27
+ self.nli_classifier = nli_classifier
28
+
29
+ def verify_claim(self, text):
30
+ """
31
+ Main function to verify a factual claim.
32
+ Processes the claim through extraction, evidence retrieval, and NLI.
33
+ """
34
+ try:
35
+ # Step 1: Extract claims from the input text
36
+ claims = self.claim_extractor.extract_claims(text)
37
+ if not claims:
38
+ return "No valid claims found", 0.0, "Please provide a clear factual statement."
39
+
40
+ claim = claims[0]
41
+
42
+ # Step 2: Extract keywords from the claim
43
+ keywords = self.keyword_extractor.extract_keywords(claim)
44
+
45
+ # Step 3: Retrieve evidence from external sources (Wikipedia, Google News, Academic, Gov, Fact-Check)
46
+ evidence_items = self.evidence_retriever.get_evidence(keywords)
47
+
48
+ if not evidence_items:
49
+ return "Low Confidence", 0.3, "Not enough reliable evidence found from external sources."
50
+
51
+ # Step 4: Check semantic similarity between the claim and retrieved evidence
52
+ relevant_evidence = []
53
+ for item in evidence_items:
54
+ similarity = calculate_similarity(claim, item['content'])
55
+ if similarity > Config.SIMILARITY_THRESHOLD:
56
+ relevant_evidence.append(item)
57
+
58
+ if not relevant_evidence:
59
+ return "Low Confidence", 0.4, "No semantically relevant evidence found after similarity check."
60
+
61
+ # --- NEW: Sort relevant evidence by credibility score before NLI ---
62
+ # We want to prioritize the most credible relevant evidence for classification
63
+ relevant_evidence.sort(key=lambda x: x.get('credibility_score', 0.0), reverse=True)
64
+ primary_evidence = relevant_evidence[0] # The most credible relevant piece of evidence
65
+ # --- END NEW ---
66
+
67
+ # Step 5: Perform Natural Language Inference (NLI)
68
+ nli_result = self.nli_classifier.classify(claim, primary_evidence['content'])
69
+
70
+ # Step 6: Format the result based on NLI classification and adjust confidence by credibility
71
+ label = "Low Confidence"
72
+ nli_confidence = nli_result['confidence']
73
+
74
+ # --- NEW: Factor in source credibility to the final confidence score ---
75
+ # Weight the NLI confidence (e.g., 70%) and the source credibility (e.g., 30%)
76
+ source_credibility_weight = primary_evidence.get('credibility_score', 0.5) # Default to 0.5 if somehow missing
77
+ final_confidence = (nli_confidence * 0.7) + (source_credibility_weight * 0.3)
78
+ final_confidence = min(max(final_confidence, 0.0), 1.0) # Ensure it stays within 0-1
79
+ # --- END NEW ---
80
+
81
+ if nli_result['label'] == 'ENTAILMENT':
82
+ label = "True"
83
+ elif nli_result['label'] == 'CONTRADICTION':
84
+ label = "False"
85
+ else:
86
+ label = "Low Confidence"
87
+ # If NLI is neutral, the credibility helps fine-tune, but still indicates low confidence
88
+ final_confidence = max(final_confidence, 0.3) # Ensure neutral isn't too high from credibility alone
89
+
90
+
91
+ evidence_snippet = primary_evidence['content']
92
+ if len(evidence_snippet) > 500:
93
+ evidence_snippet = evidence_snippet[:500] + "..."
94
+
95
+ source = f"Source: {primary_evidence['source']} (Credibility: {source_credibility_weight:.2f})" # NEW: Show credibility
96
+
97
+ return label, final_confidence, f"{evidence_snippet}\n\n{source}"
98
+
99
+ except Exception as e:
100
+ print(f"An error occurred during claim verification: {e}")
101
+ return "Error", 0.0, f"An internal error occurred: {str(e)}. Please try again."
102
+
103
+ # Initialize TruthCheck system globally once when the app starts
104
+ truthcheck_system_instance = TruthCheckSystem()
105
+
106
+ def create_app():
107
+ app = Flask(__name__, static_folder='static', template_folder='templates')
108
+ app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', Config.SECRET_KEY)
109
+ app.config['DEBUG'] = Config.DEBUG
110
+
111
+ @app.route('/')
112
+ def index():
113
+ return render_template('index.html')
114
+
115
+ @app.route('/api/verify', methods=['POST'])
116
+ def verify_claim_api():
117
+ try:
118
+ data = request.get_json()
119
+ claim_text = data.get('claim', '')
120
+
121
+ if not claim_text:
122
+ return jsonify({'error': 'No claim provided'}), 400
123
+
124
+ label, confidence, evidence = truthcheck_system_instance.verify_claim(claim_text)
125
+
126
+ result = {
127
+ 'label': label,
128
+ 'confidence': round(confidence, 3),
129
+ 'evidence': evidence,
130
+ 'claim': claim_text
131
+ }
132
+
133
+ return jsonify(result)
134
+
135
+ except Exception as e:
136
+ print(f"API error: {e}")
137
+ return jsonify({'error': f'An error occurred on the server: {str(e)}'}), 500
138
+
139
+ @app.route('/health')
140
+ def health_check():
141
+ return jsonify({'status': 'healthy', 'message': 'TruthCheck backend is running.'})
142
+
143
+ return app
curl.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ curl.exe -X POST http://127.0.0.1:5000/api/verify `
2
+ -H "Content-Type: application/json" `
3
+ -d "{\"claim\": \"The sun is a star.\"}"
google_search.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # google_search.py
2
+
3
+ class SearchResult:
4
+ def __init__(self, snippet, url, source_title=None):
5
+ self.snippet = snippet
6
+ self.url = url
7
+ self.source_title = source_title
8
+
9
+ class SearchResponse:
10
+ def __init__(self, results):
11
+ self.results = results
12
+
13
+ def search(queries, num_results=3):
14
+ """Mock search function returning dummy data for testing."""
15
+ responses = []
16
+ for query in queries:
17
+ dummy_results = [
18
+ SearchResult(
19
+ snippet=f"This is a mock snippet for query '{query}' - result {i+1}.",
20
+ url=f"https://example.com/{query.replace(' ', '_')}/{i}",
21
+ source_title="Mock News Source"
22
+ )
23
+ for i in range(num_results)
24
+ ]
25
+ responses.append(SearchResponse(results=dummy_results))
26
+ return responses
models/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ TruthCheck Models Package
3
+ Contains all the NLP and ML models for fact-checking
4
+ """
models/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (243 Bytes). View file
 
models/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (226 Bytes). View file
 
models/__pycache__/claim_extractor.cpython-311.pyc ADDED
Binary file (2.01 kB). View file
 
models/__pycache__/claim_extractor.cpython-312.pyc ADDED
Binary file (1.77 kB). View file
 
models/__pycache__/evidence_retriever.cpython-311.pyc ADDED
Binary file (4.55 kB). View file
 
models/__pycache__/evidence_retriever.cpython-312.pyc ADDED
Binary file (4.2 kB). View file
 
models/__pycache__/keyword_extractor.cpython-311.pyc ADDED
Binary file (2.39 kB). View file
 
models/__pycache__/keyword_extractor.cpython-312.pyc ADDED
Binary file (2.1 kB). View file
 
models/__pycache__/nli_classifier.cpython-311.pyc ADDED
Binary file (3.31 kB). View file
 
models/__pycache__/nli_classifier.cpython-312.pyc ADDED
Binary file (2.84 kB). View file
 
models/claim_extractor.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models/claim_extractor.py
2
+ import re
3
+ import spacy
4
+
5
+ class ClaimExtractor:
6
+ def __init__(self): # Corrected __init__
7
+ try:
8
+ self.nlp = spacy.load("en_core_web_sm")
9
+ except OSError:
10
+ print("Please install spaCy English model: python -m spacy download en_core_web_sm")
11
+ raise
12
+
13
+ def extract_claims(self, text):
14
+ """Extract factual claims from text"""
15
+ if not text or len(text.strip()) < 10:
16
+ return []
17
+
18
+ # Use spaCy for sentence segmentation
19
+ doc = self.nlp(text)
20
+ claims = []
21
+
22
+ for sent in doc.sents:
23
+ sentence = sent.text.strip()
24
+
25
+ # Filter out questions, commands, and short sentences
26
+ if (len(sentence.split()) > 5 and
27
+ not sentence.endswith('?') and
28
+ not sentence.startswith(('How', 'What', 'When', 'Where', 'Why', 'Who')) and
29
+ not re.match(r'^(Please|Let|Can you)', sentence, re.IGNORECASE)):
30
+
31
+ claims.append(sentence)
32
+
33
+ return claims if claims else [text.strip()]
34
+
models/evidence_retriever.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models/evidence_retriever.py
2
+ import wikipedia
3
+ import requests
4
+ import time
5
+
6
+ # Import the google_search tool for real-time news simulation
7
+ from google_search import search as google_search_tool
8
+
9
+ class EvidenceRetriever:
10
+ def __init__(self):
11
+ self.wikipedia_timeout = 10
12
+ self.news_timeout = 10 # This timeout is less relevant now but kept for consistency
13
+
14
+ def get_evidence(self, keywords):
15
+ """Retrieve evidence from multiple sources"""
16
+ evidence = []
17
+
18
+ # Get evidence from Wikipedia
19
+ wiki_evidence = self._get_wikipedia_evidence(keywords)
20
+ evidence.extend(wiki_evidence)
21
+
22
+ # Get evidence from a simulated "real-time" news search
23
+ # This replaces the unreliable Google News RSS feed approach
24
+ realtime_news_evidence = self._get_realtime_news_evidence(keywords)
25
+ evidence.extend(realtime_news_evidence)
26
+
27
+ return evidence
28
+
29
+ def _get_wikipedia_evidence(self, keywords):
30
+ """Retrieve evidence from Wikipedia"""
31
+ evidence = []
32
+
33
+ try:
34
+ # Increased search results from 3 to 5
35
+ search_terms = ' '.join(keywords[:3])
36
+ search_results = wikipedia.search(search_terms, results=5) # Increased results
37
+
38
+ for title in search_results:
39
+ try:
40
+ # Increased summary sentences from 3 to 5
41
+ summary = wikipedia.summary(title, sentences=5, auto_suggest=False) # Increased sentences
42
+
43
+ evidence.append({
44
+ 'content': summary,
45
+ 'source': f'Wikipedia - {title}',
46
+ 'url': f'https://en.wikipedia.org/wiki/{title.replace(" ", "_")}'
47
+ })
48
+
49
+ # Allow more Wikipedia sources (e.g., up to 3 instead of 2)
50
+ if len(evidence) >= 3: # Increased limit
51
+ break
52
+
53
+ except (wikipedia.DisambiguationError, wikipedia.PageError):
54
+ print(f"Wikipedia disambiguation/page error for '{title}'. Skipping.")
55
+ continue
56
+
57
+ except Exception as e:
58
+ print(f"Wikipedia search error: {e}")
59
+
60
+ return evidence
61
+
62
+ def _get_realtime_news_evidence(self, keywords):
63
+ """
64
+ Simulates real-time news retrieval using the google_search tool.
65
+ This focuses on finding recent and breaking news snippets.
66
+ """
67
+ evidence = []
68
+
69
+ try:
70
+ # Create a search query emphasizing recency and news context
71
+ # Adding terms like "breaking news" or "latest update" helps bias results
72
+ query_phrases = [
73
+ f"{' '.join(keywords)} breaking news",
74
+ f"{' '.join(keywords)} latest update",
75
+ f"{' '.join(keywords)} recent news article"
76
+ ]
77
+
78
+ # Perform multiple searches for broader coverage
79
+ all_search_results = []
80
+ for q_phrase in query_phrases:
81
+ # The google_search tool provides recent results by default for news queries
82
+ search_results_for_phrase = google_search_tool(queries=[q_phrase])
83
+ if search_results_for_phrase and search_results_for_phrase[0].results:
84
+ all_search_results.extend(search_results_for_phrase[0].results)
85
+
86
+ # Filter and add unique relevant snippets
87
+ added_urls = set()
88
+ for result in all_search_results:
89
+ if result.snippet and result.url and result.url not in added_urls:
90
+ evidence.append({
91
+ 'content': result.snippet,
92
+ 'source': f'Real-time Web News - {result.source_title or "Unknown"}',
93
+ 'url': result.url
94
+ })
95
+ added_urls.add(result.url)
96
+ if len(evidence) >= 5: # Limit total real-time news snippets
97
+ break
98
+ except Exception as e:
99
+ print(f"Real-time news search error: {e}")
100
+
101
+ return evidence
102
+
models/keyword_extractor.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models/keyword_extractor.py
2
+ import spacy
3
+ from collections import Counter
4
+
5
+ class KeywordExtractor:
6
+ def __init__(self): # Corrected __init__
7
+ try:
8
+ self.nlp = spacy.load("en_core_web_sm")
9
+ except OSError:
10
+ print("Please install spaCy English model: python -m spacy download en_core_web_sm")
11
+ raise
12
+
13
+ def extract_keywords(self, text):
14
+ """Extract keywords and named entities from text"""
15
+ doc = self.nlp(text)
16
+
17
+ keywords = []
18
+
19
+ # Extract named entities
20
+ for ent in doc.ents:
21
+ if ent.label_ in ['PERSON', 'ORG', 'GPE', 'PRODUCT', 'EVENT', 'DATE']:
22
+ keywords.append(ent.text)
23
+
24
+ # Extract noun phrases and important words
25
+ for chunk in doc.noun_chunks:
26
+ if len(chunk.text.split()) <= 3: # Avoid very long phrases
27
+ keywords.append(chunk.text)
28
+
29
+ # Extract individual important words
30
+ for token in doc:
31
+ if (token.pos_ in ['NOUN', 'PROPN'] and
32
+ not token.is_stop and
33
+ not token.is_punct and
34
+ len(token.text) > 2):
35
+ keywords.append(token.text)
36
+
37
+ # Remove duplicates and return most common
38
+ keyword_counts = Counter(keywords)
39
+ return [word for word, count in keyword_counts.most_common(10)]
40
+
models/nli_classifier.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models/nli_classifier.py
2
+ from transformers import pipeline
3
+ import torch
4
+
5
+ class NLIClassifier:
6
+ def __init__(self): # Corrected __init__
7
+ """Initialize the Natural Language Inference classifier"""
8
+ try:
9
+ # Use RoBERTa-large-MNLI for NLI
10
+ self.classifier = pipeline(
11
+ "zero-shot-classification",
12
+ model="facebook/bart-large-mnli",
13
+ device=0 if torch.cuda.is_available() else -1
14
+ )
15
+
16
+ # Alternative: Direct NLI pipeline
17
+ self.nli_pipeline = pipeline(
18
+ "text-classification",
19
+ model="roberta-large-mnli",
20
+ device=0 if torch.cuda.is_available() else -1
21
+ )
22
+
23
+ except Exception as e:
24
+ print(f"Error loading NLI model: {e}")
25
+ # Fallback to a smaller model or a dummy pipeline if the main one fails
26
+ # For a realistic app, consider a more robust fallback or error handling.
27
+ # For now, let's use a simpler, more generally available model if roberta-large-mnli fails.
28
+ # Note: DialoGPT is not an NLI model, it's a conversational model.
29
+ # A better fallback would be to indicate failure or use a simpler NLI model if available.
30
+ # For demonstration, we'll keep the direct NLI pipeline and let it fail if models aren't found.
31
+ # If this becomes an issue, we might need to adjust the requirements or provide more specific fallback.
32
+ print("Attempting to load a smaller NLI model if roberta-large-mnli fails...")
33
+ try:
34
+ self.nli_pipeline = pipeline(
35
+ "text-classification",
36
+ model="distilbert-base-uncased-mnli", # A smaller NLI model
37
+ device=0 if torch.cuda.is_available() else -1
38
+ )
39
+ except Exception as fallback_e:
40
+ print(f"Error loading fallback NLI model: {fallback_e}")
41
+ # If even fallback fails, set pipeline to None and handle in classify method
42
+ self.nli_pipeline = None
43
+
44
+
45
+ def classify(self, claim, evidence):
46
+ """Classify relationship between claim and evidence"""
47
+ if not self.nli_pipeline:
48
+ print("NLI pipeline not initialized. Returning neutral with low confidence.")
49
+ return {
50
+ 'label': 'NEUTRAL',
51
+ 'confidence': 0.5
52
+ }
53
+
54
+ try:
55
+ # Format for NLI: premise (evidence) and hypothesis (claim)
56
+ premise = evidence
57
+ hypothesis = claim
58
+
59
+ # Use the NLI pipeline
60
+ # For NLI models, the input typically concatenates premise and hypothesis
61
+ # with a special token, like " [SEP] ".
62
+ # The pipeline itself often handles this, but explicitly formatting can help.
63
+ result = self.nli_pipeline(f"{premise} [SEP] {hypothesis}")
64
+
65
+ # Map labels to our format
66
+ label_mapping = {
67
+ 'ENTAILMENT': 'ENTAILMENT',
68
+ 'CONTRADICTION': 'CONTRADICTION',
69
+ 'NEUTRAL': 'NEUTRAL',
70
+ 'entailment': 'ENTAILMENT',
71
+ 'contradiction': 'CONTRADICTION',
72
+ 'neutral': 'NEUTRAL'
73
+ }
74
+
75
+ if isinstance(result, list):
76
+ result = result[0] # The pipeline returns a list, take the first result
77
+
78
+ mapped_label = label_mapping.get(result['label'], 'NEUTRAL')
79
+ confidence = result['score']
80
+
81
+ return {
82
+ 'label': mapped_label,
83
+ 'confidence': confidence
84
+ }
85
+
86
+ except Exception as e:
87
+ print(f"NLI classification error: {e}")
88
+ # Return neutral with low confidence as fallback
89
+ return {
90
+ 'label': 'NEUTRAL',
91
+ 'confidence': 0.5
92
+ }
93
+
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Flask
2
+ spacy
3
+ wikipedia
4
+ requests
5
+ transformers
6
+ torch
7
+ sentence-transformers
8
+ scikit-learn
9
+ numpy
10
+ # feedparser # Removed
11
+ # beautifulsoup4 # Removed
run.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # run.py - Entry point to start the application
2
+
3
+ # Import the create_app function from your app.py
4
+ from app import create_app
5
+
6
+ # Create the Flask application instance
7
+ app = create_app()
8
+
9
+ if __name__ == "__main__":
10
+ print("🚀 Starting TruthCheck System...")
11
+ print("📊 Flask Backend: http://127.0.0.1:5000") # Updated to 127.0.0.1 for local access
12
+
13
+ # Run the Flask application
14
+ # debug=True enables reloader and debugger, useful during development
15
+ # host='0.0.0.0' makes the server accessible from other devices on the network
16
+ # port=5000 is the default Flask port
17
+ app.run(debug=True, host='0.0.0.0', port=5000)
static/css/style.css ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* static/css/style.css - Global and Custom Styles for TruthCheck */
2
+
3
+ /* Custom font import - Inter is a great modern choice for clean UIs */
4
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap');
5
+
6
+ /* Define the custom font family for Tailwind's usage */
7
+ @layer base {
8
+ body {
9
+ font-family: 'Inter', sans-serif;
10
+ }
11
+ }
12
+
13
+ /* Custom scrollbar for a cleaner, modern look */
14
+ ::-webkit-scrollbar {
15
+ width: 8px; /* Width of the scrollbar */
16
+ }
17
+
18
+ ::-webkit-scrollbar-track {
19
+ background: #e0e0e0; /* Color of the tracking area */
20
+ border-radius: 10px; /* Rounded corners for the track */
21
+ }
22
+
23
+ ::-webkit-scrollbar-thumb {
24
+ background: #a0a0a0; /* Color of the scroll thumb */
25
+ border-radius: 10px; /* Rounded corners for the thumb */
26
+ border: 2px solid #e0e0e0; /* Creates a small border around the thumb */
27
+ }
28
+
29
+ ::-webkit-scrollbar-thumb:hover {
30
+ background: #888; /* Color of the thumb on hover */
31
+ }
32
+
33
+ /* Blob animation for background elements */
34
+ @keyframes blob {
35
+ 0% {
36
+ transform: translate(0, 0) scale(1);
37
+ }
38
+ 33% {
39
+ transform: translate(30px, -50px) scale(1.1);
40
+ }
41
+ 66% {
42
+ transform: translate(-20px, 20px) scale(0.9);
43
+ }
44
+ 100% {
45
+ transform: translate(0, 0) scale(1);
46
+ }
47
+ }
48
+
49
+ .animate-blob {
50
+ animation: blob 7s infinite cubic-bezier(0.68, -0.55, 0.27, 1.55);
51
+ }
52
+
53
+ /* Delay for blob animation */
54
+ .animation-delay-2000 {
55
+ animation-delay: 2s;
56
+ }
57
+ .animation-delay-4000 {
58
+ animation-delay: 4s;
59
+ }
60
+
61
+ /* Styles for example tags */
62
+ .example-tag {
63
+ @apply inline-block bg-blue-100 text-blue-700 rounded-full px-3 py-1 text-sm cursor-pointer transition-all duration-200 ease-in-out hover:bg-blue-200 hover:text-blue-800 hover:shadow-sm active:scale-95;
64
+ }
static/js/main.js ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // static/js/main.js - Frontend logic for TruthCheck
2
+
3
+ document.addEventListener('DOMContentLoaded', () => {
4
+ // Get references to DOM elements
5
+ const claimInput = document.getElementById('claimInput');
6
+ const verifyButton = document.getElementById('verifyButton');
7
+ const buttonText = document.getElementById('buttonText');
8
+ const loadingSpinner = document.getElementById('loadingSpinner');
9
+ const resultContainer = document.getElementById('resultContainer');
10
+ const resultContent = document.getElementById('resultContent');
11
+ const userIdDisplay = document.getElementById('userIdDisplay');
12
+ const exampleTags = document.querySelectorAll('.example-tag'); // Get all example tags
13
+
14
+ // Set a placeholder user ID (in a real app, this would come from authentication)
15
+ userIdDisplay.textContent = 'Guest (Not authenticated)'; // Or generate a random one: `crypto.randomUUID()`
16
+
17
+ // Add event listener to the Verify button
18
+ verifyButton.addEventListener('click', async () => {
19
+ const claimText = claimInput.value.trim(); // Get text and remove leading/trailing whitespace
20
+
21
+ // Basic input validation
22
+ if (!claimText) {
23
+ displayResult('Error', 0.0, 'Please enter a claim to verify.', 'truth-red', '❗'); // Using custom color
24
+ return;
25
+ }
26
+
27
+ // Show loading state
28
+ setLoadingState(true);
29
+
30
+ try {
31
+ // Make a POST request to your Flask API
32
+ const response = await fetch('/api/verify', {
33
+ method: 'POST',
34
+ headers: {
35
+ 'Content-Type': 'application/json',
36
+ },
37
+ body: JSON.stringify({ claim: claimText }), // Send the claim as JSON
38
+ });
39
+
40
+ // Check if the response was successful
41
+ if (!response.ok) {
42
+ const errorData = await response.json();
43
+ throw new Error(errorData.error || `HTTP error! status: ${response.status}`);
44
+ }
45
+
46
+ // Parse the JSON response
47
+ const data = await response.json();
48
+
49
+ // Determine color and emoji based on the label (using custom colors)
50
+ let resultColor = 'truth-blue'; // Default for neutral
51
+ let emoji = '❓';
52
+ if (data.label === 'True') {
53
+ resultColor = 'truth-green';
54
+ emoji = '✅';
55
+ } else if (data.label === 'False') {
56
+ resultColor = 'truth-red';
57
+ emoji = '❌';
58
+ } else if (data.label === 'Low Confidence') {
59
+ resultColor = 'truth-orange';
60
+ emoji = '⚠️';
61
+ } else if (data.label === 'Error') {
62
+ resultColor = 'truth-red';
63
+ emoji = '❗';
64
+ }
65
+
66
+ // Display the result
67
+ displayResult(data.label, data.confidence, data.evidence, resultColor, emoji);
68
+
69
+ } catch (error) {
70
+ // Handle any errors during the fetch operation
71
+ console.error('Fetch error:', error);
72
+ displayResult('Error', 0.0, `Could not verify claim. ${error.message}`, 'truth-red', '❗'); // Using custom color
73
+ } finally {
74
+ // Hide loading state regardless of success or failure
75
+ setLoadingState(false);
76
+ }
77
+ });
78
+
79
+ // Add event listeners for example tags
80
+ exampleTags.forEach(tag => {
81
+ tag.addEventListener('click', () => {
82
+ claimInput.value = tag.dataset.claim; // Set claim input to the tag's data-claim
83
+ verifyButton.click(); // Trigger the verify button click
84
+ });
85
+ });
86
+
87
+
88
+ /**
89
+ * Sets the loading state of the button and input.
90
+ * @param {boolean} isLoading - True to show loading, false to hide.
91
+ */
92
+ function setLoadingState(isLoading) {
93
+ if (isLoading) {
94
+ buttonText.textContent = 'Verifying...';
95
+ loadingSpinner.classList.remove('hidden');
96
+ verifyButton.disabled = true; // Disable button to prevent multiple clicks
97
+ claimInput.disabled = true; // Disable input during processing
98
+ resultContainer.classList.add('hidden'); // Hide previous results
99
+ verifyButton.classList.add('opacity-75', 'cursor-not-allowed'); // Dim button
100
+ } else {
101
+ buttonText.textContent = 'Verify Claim';
102
+ loadingSpinner.classList.add('hidden');
103
+ verifyButton.disabled = false; // Re-enable button
104
+ claimInput.disabled = false; // Re-enable input
105
+ verifyButton.classList.remove('opacity-75', 'cursor-not-allowed'); // Restore button style
106
+ }
107
+ }
108
+
109
+ /**
110
+ * Displays the verification result in the UI.
111
+ * @param {string} label - The verification label (True, False, Low Confidence, Error).
112
+ * @param {number} confidence - The confidence score.
113
+ * @param {string} evidence - The evidence snippet.
114
+ * @param {string} color - Tailwind color class (e.g., 'truth-green', 'truth-red', 'truth-orange').
115
+ * @param {string} emoji - Emoji to display with the label.
116
+ */
117
+ function displayResult(label, confidence, evidence, color, emoji) {
118
+ resultContainer.classList.remove('hidden'); // Show the result container
119
+
120
+ // Format confidence as percentage
121
+ const confidencePct = (confidence * 100).toFixed(1) + '%';
122
+
123
+ // Construct the HTML for the result content
124
+ resultContent.innerHTML = `
125
+ <div class="p-6 sm:p-8 rounded-lg border-4 border-${color}-500 bg-${color}-50 text-${color}-800 shadow-xl transition-all duration-300 ease-in-out transform hover:scale-[1.005]">
126
+ <h3 class="text-3xl sm:text-4xl font-extrabold mb-4 flex items-center justify-center">
127
+ ${emoji} <span class="ml-3">${label}</span>
128
+ </h3>
129
+ <p class="text-xl sm:text-2xl font-semibold mb-3">
130
+ <strong>Confidence:</strong> <span class="text-${color}-700">${confidencePct}</span>
131
+ </p>
132
+ <p class="text-lg sm:text-xl font-medium mb-3"><strong>Evidence:</strong></p>
133
+ <div class="bg-white p-4 sm:p-5 rounded-md shadow-inner border border-gray-200 text-gray-700 whitespace-pre-wrap overflow-auto max-h-64 text-base leading-relaxed">
134
+ ${evidence}
135
+ </div>
136
+ </div>
137
+ `;
138
+ // Scroll to the result container for better UX on mobile
139
+ resultContainer.scrollIntoView({ behavior: 'smooth', block: 'start' });
140
+ }
141
+ });
templates/index.html ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>🔍 TruthCheck: AI-Powered Fact Verification</title>
7
+ <link rel="icon" href="https://placehold.co/32x32/1E3A8A/FFFFFF?text=TC" type="image/x-icon">
8
+
9
+ <script src="https://cdn.tailwindcss.com"></script>
10
+ <script>
11
+ tailwind.config = {
12
+ theme: {
13
+ extend: {
14
+ colors: {
15
+ 'truth-blue': '#1E3A8A', // A custom primary blue
16
+ 'truth-green': '#059669', // Stronger green for true
17
+ 'truth-red': '#DC2626', // Stronger red for false
18
+ 'truth-orange': '#EA580C', // Stronger orange for low confidence
19
+ },
20
+ boxShadow: {
21
+ '3xl': '0 35px 60px -15px rgba(0, 0, 0, 0.3)',
22
+ }
23
+ }
24
+ }
25
+ }
26
+ </script>
27
+ <link rel="stylesheet" href="/static/css/style.css">
28
+ </head>
29
+ <body class="min-h-screen flex flex-col items-center justify-center p-4 sm:p-6 lg:p-8 bg-gradient-to-br from-blue-50 to-indigo-100 font-inter text-gray-800 antialiased">
30
+
31
+ <div class="max-w-4xl w-full bg-white p-8 sm:p-10 rounded-3xl shadow-3xl space-y-10 border border-gray-100 relative overflow-hidden">
32
+ <div class="absolute top-0 left-0 w-40 h-40 bg-blue-200 rounded-full mix-blend-multiply filter blur-xl opacity-30 animate-blob"></div>
33
+ <div class="absolute top-0 right-0 w-40 h-40 bg-purple-200 rounded-full mix-blend-multiply filter blur-xl opacity-30 animate-blob animation-delay-2000"></div>
34
+ <div class="absolute bottom-0 left-1/2 w-40 h-40 bg-pink-200 rounded-full mix-blend-multiply filter blur-xl opacity-30 animate-blob animation-delay-4000"></div>
35
+
36
+
37
+ <div class="text-center z-10 relative">
38
+ <h1 class="text-6xl sm:text-7xl font-extrabold text-gray-900 mb-4 tracking-tight leading-tight drop-shadow-lg">
39
+ <span class="text-truth-blue">🔍 TruthCheck</span>
40
+ </h1>
41
+ <p class="text-2xl sm:text-3xl text-gray-700 font-semibold mb-3">
42
+ AI-Powered Fact Verification System
43
+ </p>
44
+ <p class="text-lg text-gray-600 max-w-2xl mx-auto">
45
+ Enter any factual claim below and get it verified using cutting-edge AI and real-time evidence from credible sources.
46
+ </p>
47
+ </div>
48
+
49
+ <div class="space-y-6 z-10 relative">
50
+ <label for="claimInput" class="block text-xl font-semibold text-gray-800">
51
+ Enter a claim to verify:
52
+ </label>
53
+ <textarea
54
+ id="claimInput"
55
+ rows="5"
56
+ class="mt-1 block w-full p-4 border-2 border-gray-300 rounded-xl shadow-inner-lg focus:outline-none focus:ring-4 focus:ring-truth-blue/30 focus:border-truth-blue text-lg placeholder-gray-500 bg-gray-50 transition-all duration-300 ease-in-out transform hover:scale-[1.005] hover:shadow-md resize-y"
57
+ placeholder="Example: 'NASA landed a rover on Mars in 2021'"
58
+ ></textarea>
59
+ <button
60
+ id="verifyButton"
61
+ class="w-full flex items-center justify-center py-4 px-8 border border-transparent rounded-xl shadow-xl text-xl font-bold text-white bg-gradient-to-r from-truth-blue to-indigo-700 hover:from-indigo-700 hover:to-truth-blue focus:outline-none focus:ring-4 focus:ring-offset-2 focus:ring-truth-blue/50 transition-all duration-300 ease-in-out transform hover:-translate-y-1 hover:scale-105 group"
62
+ >
63
+ <span id="buttonText" class="transition-all duration-300 group-hover:scale-105">Verify Claim</span>
64
+ <svg id="loadingSpinner" class="animate-spin -ml-1 mr-3 h-7 w-7 text-white hidden" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
65
+ <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
66
+ <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
67
+ </svg>
68
+ </button>
69
+ </div>
70
+
71
+ <div class="text-center text-gray-600 text-md z-10 relative">
72
+ <p class="font-medium mb-2">Try these examples:</p>
73
+ <div class="flex flex-wrap justify-center gap-2">
74
+ <span class="example-tag" data-claim="NASA landed a rover on Mars in 2021">NASA landed a rover on Mars in 2021</span>
75
+ <span class="example-tag" data-claim="India is a continent">India is a continent</span>
76
+ <span class="example-tag" data-claim="The Earth is flat">The Earth is flat</span>
77
+ <span class="example-tag" data-claim="Donald Trump won the 2024 US Presidential election">Donald Trump won the 2024 US Presidential election</span>
78
+ <span class="example-tag" data-claim="COVID-19 vaccines contain microchips">COVID-19 vaccines contain microchips</span>
79
+ </div>
80
+ </div>
81
+
82
+ <div id="resultContainer" class="hidden mt-8 p-8 bg-gradient-to-br from-gray-50 to-gray-100 rounded-2xl shadow-xl border border-gray-200 z-10 relative">
83
+ <h2 class="text-4xl font-bold text-gray-900 mb-6 text-center drop-shadow-sm">Verification Result</h2>
84
+ <div id="resultContent" class="space-y-6">
85
+ </div>
86
+ </div>
87
+
88
+ <div class="text-center text-sm text-gray-400 mt-8 z-10 relative">
89
+ <p>Current User ID: <span id="userIdDisplay" class="font-medium text-gray-500">Guest (Authentication not implemented)</span></p>
90
+ <p class="text-xs mt-2">© 2024 TruthCheck. All rights reserved.</p>
91
+ </div>
92
+ </div>
93
+
94
+ <script src="/static/js/main.js"></script>
95
+ </body>
96
+ </html>
utils/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ TruthCheck Utilities Package
3
+ """
utils/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (192 Bytes). View file
 
utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (175 Bytes). View file
 
utils/__pycache__/config.cpython-311.pyc ADDED
Binary file (1.02 kB). View file
 
utils/__pycache__/config.cpython-312.pyc ADDED
Binary file (941 Bytes). View file
 
utils/__pycache__/similarity.cpython-311.pyc ADDED
Binary file (2.45 kB). View file
 
utils/__pycache__/similarity.cpython-312.pyc ADDED
Binary file (2.09 kB). View file
 
utils/config.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/config.py
2
+ import os
3
+
4
+ class Config:
5
+ """Configuration settings for TruthCheck"""
6
+
7
+ # Model settings
8
+ SIMILARITY_THRESHOLD = 0.5
9
+ CONFIDENCE_THRESHOLD = 0.7 # This threshold is currently not explicitly used in app.py logic but can be for stricter classification
10
+
11
+ # API settings
12
+ WIKIPEDIA_TIMEOUT = 10
13
+ # NEWS_TIMEOUT is less relevant now as we use google_search tool directly
14
+ MAX_EVIDENCE_SOURCES = 5 # Max number of evidence sources to retrieve (e.g., 2 wiki, 3 web search)
15
+
16
+ # Flask settings
17
+ SECRET_KEY = os.environ.get('SECRET_KEY', 'truthcheck-secret-key-2024')
18
+ DEBUG = os.environ.get('DEBUG', 'True').lower() == 'true'
19
+
20
+ # Model paths (for local models if needed)
21
+ SPACY_MODEL = "en_core_web_sm"
22
+ SBERT_MODEL = "all-MiniLM-L6-v2"
23
+ NLI_MODEL = "roberta-large-mnli" # Or "distilbert-base-uncased-mnli" as a fallback
24
+
utils/similarity.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/similarity.py
2
+ from sentence_transformers import SentenceTransformer
3
+ import numpy as np
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+
6
+ class SimilarityCalculator:
7
+ def __init__(self): # Corrected __init__
8
+ """Initialize sentence transformer model"""
9
+ try:
10
+ self.model = SentenceTransformer('all-MiniLM-L6-v2')
11
+ except Exception as e:
12
+ print(f"Error loading similarity model: {e}")
13
+ self.model = None
14
+
15
+ def calculate_similarity(self, text1, text2):
16
+ """Calculate semantic similarity between two texts"""
17
+ if not self.model:
18
+ print("Similarity model not loaded. Returning fallback similarity.")
19
+ return 0.5 # Fallback similarity
20
+
21
+ try:
22
+ # Encode texts to embeddings
23
+ embeddings = self.model.encode([text1, text2])
24
+
25
+ # Calculate cosine similarity
26
+ similarity = cosine_similarity(
27
+ embeddings[0].reshape(1, -1),
28
+ embeddings[1].reshape(1, -1)
29
+ )[0][0]
30
+
31
+ return float(similarity)
32
+
33
+ except Exception as e:
34
+ print(f"Similarity calculation error: {e}")
35
+ return 0.5
36
+
37
+ # Global similarity calculator instance
38
+ _similarity_calculator = SimilarityCalculator()
39
+
40
+ def calculate_similarity(text1, text2):
41
+ """Global function to calculate similarity"""
42
+ return _similarity_calculator.calculate_similarity(text1, text2)
43
+