File size: 1,132 Bytes
622a0b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# models/claim_extractor.py
import re
import spacy

class ClaimExtractor:
    def __init__(self): # Corrected __init__
        try:
            self.nlp = spacy.load("en_core_web_sm")
        except OSError:
            print("Please install spaCy English model: python -m spacy download en_core_web_sm")
            raise
    
    def extract_claims(self, text):
        """Extract factual claims from text"""
        if not text or len(text.strip()) < 10:
            return []
        
        # Use spaCy for sentence segmentation
        doc = self.nlp(text)
        claims = []
        
        for sent in doc.sents:
            sentence = sent.text.strip()
            
            # Filter out questions, commands, and short sentences
            if (len(sentence.split()) > 5 and 
                not sentence.endswith('?') and 
                not sentence.startswith(('How', 'What', 'When', 'Where', 'Why', 'Who')) and
                not re.match(r'^(Please|Let|Can you)', sentence, re.IGNORECASE)):
                
                claims.append(sentence)
        
        return claims if claims else [text.strip()]