kg_llm_leaderboard_test

Runtime error

App Files Files Community

b1sheng commited on Jul 27, 2023

Commit

a80d977

1 Parent(s): 6ce58aa

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -125

app.py CHANGED Viewed

@@ -72,7 +72,7 @@ def has_nan_values(df, columns):
     return df[columns].isna().any(axis=1)
-def get_leaderboard_df():
     if eval_results:
         print("Pulling evaluation results for the leaderboard.")
         eval_results.git_pull()
@@ -99,6 +99,22 @@ def get_leaderboard_df():
     print(type(df))
     return df
 def get_evaluation_queue_df():
     if eval_queue:
@@ -299,29 +315,8 @@ with demo:
             )
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark (lite)", elem_id="llm-benchmark-tab-table", id=0):
-            leaderboard_table_lite = gr.components.Dataframe(
-                value=leaderboard_df[COLS_LITE],
-                headers=COLS_LITE,
-                datatype=TYPES_LITE,
-                max_rows=None,
-                elem_id="leaderboard-table-lite",
-            )
-            # Dummy leaderboard for handling the case when the user uses backspace key
-            hidden_leaderboard_table_for_search_lite = gr.components.Dataframe(
-                value=original_df[COLS_LITE],
-                headers=COLS_LITE,
-                datatype=TYPES_LITE,
-                max_rows=None,
-                visible=False,
-            )
-            search_bar.submit(
-                search_table,
-                [hidden_leaderboard_table_for_search_lite, search_bar],
-                leaderboard_table_lite,
-            )
-        with gr.TabItem("📊 Extended view", elem_id="llm-benchmark-tab-table", id=1):
             leaderboard_table = gr.components.Dataframe(
                 value=leaderboard_df,
                 headers=COLS,
@@ -346,107 +341,7 @@ with demo:
         with gr.TabItem("About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("✉️✨ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
-            with gr.Column():
-                with gr.Row():
-                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                with gr.Column():
-                    with gr.Accordion(f"✅ Finished Evaluations: {len(finished_eval_queue_df)}", open=False):
-                        with gr.Row():
-                            finished_eval_table = gr.components.Dataframe(
-                                value=finished_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                max_rows=5,
-                            )
-                    with gr.Accordion(f"🔄 Running Evaluation Queue: {len(running_eval_queue_df)}", open=False):
-                        with gr.Row():
-                            running_eval_table = gr.components.Dataframe(
-                                value=running_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                max_rows=5,
-                            )
-                    with gr.Accordion(f"⏳ Pending Evaluation Queue: {len(pending_eval_queue_df)}", open=False):
-                        with gr.Row():
-                            pending_eval_table = gr.components.Dataframe(
-                                value=pending_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                max_rows=5,
-                            )
-            with gr.Row():
-                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(
-                        label="revision", placeholder="main"
-                    )
-                    private = gr.Checkbox(
-                        False, label="Private", visible=not IS_PUBLIC
-                    )
-                    model_type = gr.Dropdown(
-                        choices=["pretrained", "fine-tuned", "with RL"],
-                        label="Model type",
-                        multiselect=False,
-                        value="pretrained",
-                        max_choices=1,
-                        interactive=True,
-                    )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=["float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)"],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        max_choices=1,
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=["Original", "Delta", "Adapter"],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        max_choices=1,
-                        interactive=True,
-                    )
-                    base_model_name_textbox = gr.Textbox(
-                        label="Base model (for delta or adapter weights)"
-                    )
-            submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
-            submit_button.click(
-                add_new_eval,
-                [
-                    model_name_textbox,
-                    base_model_name_textbox,
-                    revision_name_textbox,
-                    precision,
-                    private,
-                    weight_type,
-                    model_type
-                ],
-                submission_result,
-            )
-        with gr.Row():
-            refresh_button = gr.Button("Refresh")
-            refresh_button.click(
-                refresh,
-                inputs=[],
-                outputs=[
-                    leaderboard_table,
-                    finished_eval_table,
-                    running_eval_table,
-                    pending_eval_table,
-                ],
-            )
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):

     return df[columns].isna().any(axis=1)
+def get_leaderboard_df_1():
     if eval_results:
         print("Pulling evaluation results for the leaderboard.")
         eval_results.git_pull()
     print(type(df))
     return df
+def get_leaderboard_df():
+    data = {
+        'Datasets': ['SOTA(FT)', 'SOTA(ZS)', 'FLAN-T5', 'GPT-3', 'GPT-3.5v2', 'GPT-3.5v3', 'ChatGPT', 'GPT-4'],
+        'KQApro': [93.85, 94.20, 37.27, 38.28, 38.01, 40.35, 47.93, 57.20],
+        'LC-quad2': [33.10, '-', 30.14, 33.04, 33.77, 39.04, 42.76, 54.95],
+        'WQSP': [73.10, 62.98, 59.87, 67.68, 72.34, 79.60, 83.70, 90.45],
+        'CWQ': [72.20, '-', 46.69, 51.77, 53.96, 57.54, 64.02, 71.00],
+        'GrailQA': [76.31, '-', 29.02, 27.58, 30.50, 35.43, 46.77, 51.40],
+        'GraphQ': [41.30, '-', 32.27, 38.32, 40.85, 47.95, 53.10, 63.20],
+        'QALD-9': [67.82, '-', 30.17, 38.54, 44.96, 46.19, 45.71, 57.20],
+        'MKQA': [46.00, '-', 20.17, 26.97, 30.14, 39.05, 44.30, 59.20]
+    }
+    df = pd.DataFrame(data)
+    return df
 def get_evaluation_queue_df():
     if eval_queue:
             )
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=1):
             leaderboard_table = gr.components.Dataframe(
                 value=leaderboard_df,
                 headers=COLS,
         with gr.TabItem("About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):