Spaces:
Sleeping
Sleeping
Update predictor.py
Browse files- predictor.py +47 -27
predictor.py
CHANGED
|
@@ -4,6 +4,9 @@ import onnxruntime as ort
|
|
| 4 |
import numpy as np
|
| 5 |
from typing import List, Dict, Set, Tuple
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
class SentenceExtractor:
|
| 8 |
def __init__(self, main_keywords_path: str, eval_keywords_path: str, model_path: str = "model_quantized.onnx"):
|
| 9 |
"""
|
|
@@ -148,36 +151,53 @@ class SentenceExtractor:
|
|
| 148 |
return relevant_sentences, categorized_sentences
|
| 149 |
|
| 150 |
def extract(self, text: str) -> Dict[str, any]:
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
return {
|
| 158 |
-
"relevant_sentences": [],
|
| 159 |
-
"scored_sentences": [],
|
| 160 |
-
"categorized_sentences": {}
|
| 161 |
-
}
|
| 162 |
-
|
| 163 |
-
relevant_sentences, categorized_sentences = self._extract_relevant_sentences(text)
|
| 164 |
-
|
| 165 |
-
# 为每个相关句子评分
|
| 166 |
-
scored_sentences = []
|
| 167 |
-
for sentence in relevant_sentences:
|
| 168 |
-
grade = self._predict_grade(sentence)
|
| 169 |
-
scored_sentences.append({
|
| 170 |
-
"sentence": sentence,
|
| 171 |
-
"grade": grade
|
| 172 |
-
})
|
| 173 |
-
|
| 174 |
return {
|
| 175 |
-
"relevant_sentences":
|
| 176 |
-
"scored_sentences":
|
| 177 |
-
"categorized_sentences":
|
| 178 |
-
"count":
|
|
|
|
| 179 |
}
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
# 使用示例
|
| 182 |
if __name__ == "__main__":
|
| 183 |
# 假设主关键词文件名为main_keywords.json
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
from typing import List, Dict, Set, Tuple
|
| 6 |
|
| 7 |
+
score_map = {'A': 5, 'B': 4, 'C': 3, 'D': 2, 'E': 1}
|
| 8 |
+
|
| 9 |
+
|
| 10 |
class SentenceExtractor:
|
| 11 |
def __init__(self, main_keywords_path: str, eval_keywords_path: str, model_path: str = "model_quantized.onnx"):
|
| 12 |
"""
|
|
|
|
| 151 |
return relevant_sentences, categorized_sentences
|
| 152 |
|
| 153 |
def extract(self, text: str) -> Dict[str, any]:
|
| 154 |
+
"""
|
| 155 |
+
提取文本中与关键词相关的句子并使用模型评分
|
| 156 |
+
:param text: 输入文本
|
| 157 |
+
:return: 包含相关句子、分类信息、评分及综合等级的字典
|
| 158 |
+
"""
|
| 159 |
+
if not text:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
return {
|
| 161 |
+
"relevant_sentences": [],
|
| 162 |
+
"scored_sentences": [],
|
| 163 |
+
"categorized_sentences": {},
|
| 164 |
+
"count": 0,
|
| 165 |
+
"overall_grade": "C" # 空文本默认C
|
| 166 |
}
|
| 167 |
|
| 168 |
+
relevant_sentences, categorized_sentences = self._extract_relevant_sentences(text)
|
| 169 |
+
|
| 170 |
+
# 为每个相关句子评分
|
| 171 |
+
scored_sentences = []
|
| 172 |
+
score_map = {'A': 5, 'B': 4, 'C': 3, 'D': 2, 'E': 1}
|
| 173 |
+
total_score = 0
|
| 174 |
+
|
| 175 |
+
for sentence in relevant_sentences:
|
| 176 |
+
grade = self._predict_grade(sentence)
|
| 177 |
+
scored_sentences.append({
|
| 178 |
+
"sentence": sentence,
|
| 179 |
+
"grade": grade
|
| 180 |
+
})
|
| 181 |
+
total_score += score_map.get(grade, 3) # 没匹配默认C=3
|
| 182 |
+
|
| 183 |
+
# 计算综合得分(四舍五入)
|
| 184 |
+
overall_grade = "C"
|
| 185 |
+
if relevant_sentences:
|
| 186 |
+
avg_score = total_score / len(relevant_sentences)
|
| 187 |
+
rounded_score = int(round(avg_score)) # 四舍五入到整数1-5
|
| 188 |
+
# 反向映射
|
| 189 |
+
reverse_map = {5:'A', 4:'B', 3:'C', 2:'D', 1:'E'}
|
| 190 |
+
overall_grade = reverse_map.get(rounded_score, "C")
|
| 191 |
+
|
| 192 |
+
return {
|
| 193 |
+
"relevant_sentences": relevant_sentences,
|
| 194 |
+
"scored_sentences": scored_sentences,
|
| 195 |
+
"categorized_sentences": categorized_sentences,
|
| 196 |
+
"count": len(relevant_sentences),
|
| 197 |
+
"overall_grade": overall_grade
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
|
| 201 |
# 使用示例
|
| 202 |
if __name__ == "__main__":
|
| 203 |
# 假设主关键词文件名为main_keywords.json
|