Spaces:
Running
Running
Joschka Strueber
commited on
Commit
·
c608f7f
1
Parent(s):
3dfa66b
[Ref, Fix] indentation error in answer key selection, longer explanation in demo, exclusion of broken dataset
Browse files- app.py +1 -1
- src/dataloading.py +3 -1
- src/utils.py +6 -4
app.py
CHANGED
|
@@ -69,7 +69,7 @@ with gr.Blocks(title="LLM Similarity Analyzer", css=app_util.custom_css) as demo
|
|
| 69 |
outputs=heatmap
|
| 70 |
)
|
| 71 |
|
| 72 |
-
gr.Markdown("\* Self-similarity is only 1.0 for
|
| 73 |
|
| 74 |
clear_btn = gr.Button("Clear Selection")
|
| 75 |
clear_btn.click(
|
|
|
|
| 69 |
outputs=heatmap
|
| 70 |
)
|
| 71 |
|
| 72 |
+
gr.Markdown("\* Self-similarity is only 1.0 for CAPA if the model predicts a single option with 100% confidence for each question. If the model is uncertain, the self-similarity will be lower.")
|
| 73 |
|
| 74 |
clear_btn = gr.Button("Clear Selection")
|
| 75 |
clear_btn.click(
|
src/dataloading.py
CHANGED
|
@@ -88,7 +88,7 @@ def get_leaderboard_datasets(model_ids):
|
|
| 88 |
common_datasets = set.intersection(*model_datasets.values())
|
| 89 |
|
| 90 |
# Filter datasets that are not MCQ or currently do not work
|
| 91 |
-
ignore = ["math_", "ifeval"]
|
| 92 |
discard = []
|
| 93 |
for dataset in common_datasets:
|
| 94 |
for ignore_data in ignore:
|
|
@@ -132,6 +132,8 @@ def filter_labels(dataset_name, doc):
|
|
| 132 |
labels.append(1)
|
| 133 |
elif test_target.isdigit():
|
| 134 |
labels = [int(d[target_key]) for d in doc]
|
|
|
|
|
|
|
| 135 |
|
| 136 |
return labels
|
| 137 |
|
|
|
|
| 88 |
common_datasets = set.intersection(*model_datasets.values())
|
| 89 |
|
| 90 |
# Filter datasets that are not MCQ or currently do not work
|
| 91 |
+
ignore = ["bbh_temporal_sequences", "math_", "ifeval"]
|
| 92 |
discard = []
|
| 93 |
for dataset in common_datasets:
|
| 94 |
for ignore_data in ignore:
|
|
|
|
| 132 |
labels.append(1)
|
| 133 |
elif test_target.isdigit():
|
| 134 |
labels = [int(d[target_key]) for d in doc]
|
| 135 |
+
|
| 136 |
+
print(f"Number of labels: {len(labels)}")
|
| 137 |
|
| 138 |
return labels
|
| 139 |
|
src/utils.py
CHANGED
|
@@ -18,7 +18,9 @@ def opt_in_pars_to_index(s):
|
|
| 18 |
raise ValueError("Invalid format")
|
| 19 |
|
| 20 |
def get_test_target(doc):
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
| 18 |
raise ValueError("Invalid format")
|
| 19 |
|
| 20 |
def get_test_target(doc):
|
| 21 |
+
if "target" in doc:
|
| 22 |
+
return doc["target"], "target"
|
| 23 |
+
elif "answer" in doc:
|
| 24 |
+
return doc["answer"], "answer"
|
| 25 |
+
else:
|
| 26 |
+
return "", ""
|