import os import re from pathlib import Path import gradio as gr # ---------------- OpenAI Client (for AI Sherlock) ---------------- # try: from openai import OpenAI client = OpenAI() except Exception: client = None # ---------------- Sherlock Logic ---------------- # def extract_clues(text: str): """ Very simple heuristic clue extractor. """ clues = { "suspects": set(), "times": set(), "claims": [], } # 1) Look for lines like "Suspect: John Doe" for line in text.splitlines(): if "suspect:" in line.lower(): try: part = line.split(":", 1)[1].strip() if part: clues["suspects"].add(part) except Exception: continue # 2) Look for times like "8 PM", "7AM", "10:30 pm" time_pattern = r"\b\d{1,2}(:\d{2})?\s?(AM|PM|am|pm)\b" for m in re.finditer(time_pattern, text): clues["times"].add(m.group(0)) # 3) Naive "claims" – sentences with keywords like 'claims', 'said', 'reported' claim_keywords = ["claims", "claimed", "says", "said", "reports", "reported"] sentences = re.split(r"(?<=[.!?])\s+", text) for sent in sentences: lower = sent.lower() if any(kw in lower for kw in claim_keywords): sent = sent.strip() if sent: clues["claims"].append(sent) clues["suspects"] = sorted(clues["suspects"]) clues["times"] = sorted(clues["times"]) return clues def assess_risk(clues: dict, text: str): """ Simple heuristic risk assessment based on clues + keywords. Returns (risk_level, reasons_list). """ reasons = [] lower_text = text.lower() financial_keywords = [ "bank", "payment", "transfer", "otp", "password", "account", "fraud", "scam", "urgent", ] contradiction_markers = ["however", "but", "whereas", "although", "yet"] has_financial_terms = any(k in lower_text for k in financial_keywords) has_contradiction_words = any(m in lower_text for m in contradiction_markers) many_claims = len(clues.get("claims", [])) >= 3 many_times = len(clues.get("times", [])) >= 2 if has_financial_terms: reasons.append("Mentions of financial / account-related terms.") if has_contradiction_words: reasons.append("Contains potential contradiction markers (e.g. 'however', 'but').") if many_claims: reasons.append("Multiple claims detected that may need verification.") if many_times: reasons.append("Multiple time references detected; timeline may be important.") if has_financial_terms and has_contradiction_words: risk = "High" elif has_financial_terms or has_contradiction_words or many_claims: risk = "Medium" else: risk = "Low" if not reasons: reasons.append("No obvious red flags detected by heuristic rules.") return risk, reasons def build_timeline(text: str): """ Extract a simple timeline of events from the evidence text. Heuristic: for each sentence with a time expression, create a bullet point "