Spaces:
Sleeping
Sleeping
| # models/claim_extractor.py | |
| import re | |
| import spacy | |
| class ClaimExtractor: | |
| def __init__(self): # Corrected __init__ | |
| try: | |
| self.nlp = spacy.load("en_core_web_sm") | |
| except OSError: | |
| print("Please install spaCy English model: python -m spacy download en_core_web_sm") | |
| raise | |
| def extract_claims(self, text): | |
| """Extract factual claims from text""" | |
| if not text or len(text.strip()) < 10: | |
| return [] | |
| # Use spaCy for sentence segmentation | |
| doc = self.nlp(text) | |
| claims = [] | |
| for sent in doc.sents: | |
| sentence = sent.text.strip() | |
| # Filter out questions, commands, and short sentences | |
| if (len(sentence.split()) > 5 and | |
| not sentence.endswith('?') and | |
| not sentence.startswith(('How', 'What', 'When', 'Where', 'Why', 'Who')) and | |
| not re.match(r'^(Please|Let|Can you)', sentence, re.IGNORECASE)): | |
| claims.append(sentence) | |
| return claims if claims else [text.strip()] | |