# models/claim_extractor.py
import re
import spacy

class ClaimExtractor:
    def __init__(self): # Corrected __init__
        try:
            self.nlp = spacy.load("en_core_web_sm")
        except OSError:
            print("Please install spaCy English model: python -m spacy download en_core_web_sm")
            raise
    
    def extract_claims(self, text):
        """Extract factual claims from text"""
        if not text or len(text.strip()) < 10:
            return []
        
        # Use spaCy for sentence segmentation
        doc = self.nlp(text)
        claims = []
        
        for sent in doc.sents:
            sentence = sent.text.strip()
            
            # Filter out questions, commands, and short sentences
            if (len(sentence.split()) > 5 and 
                not sentence.endswith('?') and 
                not sentence.startswith(('How', 'What', 'When', 'Where', 'Why', 'Who')) and
                not re.match(r'^(Please|Let|Can you)', sentence, re.IGNORECASE)):
                
                claims.append(sentence)
        
        return claims if claims else [text.strip()]