update tamil page
Browse files- app/content.py +3 -0
- app/summarization.py +8 -2
- model_information.py +2 -2
- results_organized/wer/asr_tamil.csv +18 -18
app/content.py
CHANGED
|
@@ -212,6 +212,7 @@ wer_displayname2datasetname = {
|
|
| 212 |
'YouTube ASR: Malay with English Prompt': 'ytb_asr_batch3_malay',
|
| 213 |
'YouTube ASR: Chinese with English Prompt': 'ytb_asr_batch3_chinese',
|
| 214 |
'YouTube ASR: Tamil with English Prompt': 'ytb_asr_batch3_tamil',
|
|
|
|
| 215 |
|
| 216 |
'YouTube ASR: Malay with Malay Prompt': 'ytb_asr_batch3_ms_ms_prompt',
|
| 217 |
'YouTube ASR: Chinese with Chinese Prompt': 'ytb_asr_batch3_zh_zh_prompt',
|
|
@@ -384,6 +385,8 @@ dataset_diaplay_information = {
|
|
| 384 |
|
| 385 |
'YouTube ASR: Tamil with English Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains Tamil and some Tamil-English codeswitch audio clips, featuring with English prompts. <br> It includes approximately 2.44 hours of audio, with individual clips ranging from 30 seconds to 324 seconds in length.',
|
| 386 |
|
|
|
|
|
|
|
| 387 |
'YouTube ASR Translation: Malay2English': 'YouTube Evaluation Dataset for ASR Task: <br> The audio of dataset is same as <i>YouTube ASR: Malay<i>',
|
| 388 |
|
| 389 |
# 'YouTube ASR Translation: Chinese2English': 'YouTube Evaluation Dataset for ASR Task: <br> The audio of dataset is same as <i>YouTube ASR: Chinese<i>',
|
|
|
|
| 212 |
'YouTube ASR: Malay with English Prompt': 'ytb_asr_batch3_malay',
|
| 213 |
'YouTube ASR: Chinese with English Prompt': 'ytb_asr_batch3_chinese',
|
| 214 |
'YouTube ASR: Tamil with English Prompt': 'ytb_asr_batch3_tamil',
|
| 215 |
+
'YouTube ASR: Tamil with English Prompt V2': 'ytb_asr_batch3_tamil_v2',
|
| 216 |
|
| 217 |
'YouTube ASR: Malay with Malay Prompt': 'ytb_asr_batch3_ms_ms_prompt',
|
| 218 |
'YouTube ASR: Chinese with Chinese Prompt': 'ytb_asr_batch3_zh_zh_prompt',
|
|
|
|
| 385 |
|
| 386 |
'YouTube ASR: Tamil with English Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains Tamil and some Tamil-English codeswitch audio clips, featuring with English prompts. <br> It includes approximately 2.44 hours of audio, with individual clips ranging from 30 seconds to 324 seconds in length.',
|
| 387 |
|
| 388 |
+
'YouTube ASR: Tamil with English Prompt V2': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains Tamil and some Tamil-English codeswitch audio clips, featuring with English prompts. <br> It includes approximately 2.44 hours of audio, with individual clips ranging from 30 seconds to 324 seconds in length.',
|
| 389 |
+
|
| 390 |
'YouTube ASR Translation: Malay2English': 'YouTube Evaluation Dataset for ASR Task: <br> The audio of dataset is same as <i>YouTube ASR: Malay<i>',
|
| 391 |
|
| 392 |
# 'YouTube ASR Translation: Chinese2English': 'YouTube Evaluation Dataset for ASR Task: <br> The audio of dataset is same as <i>YouTube ASR: Chinese<i>',
|
app/summarization.py
CHANGED
|
@@ -29,6 +29,9 @@ def sum_table_mulit_metrix(task_name, metrics_lists: List[str]):
|
|
| 29 |
chart_data = pd.merge(chart_data, one_chart_data, on='Model', how='outer')
|
| 30 |
|
| 31 |
selected_columns = [i for i in chart_data.columns if i != 'Model']
|
|
|
|
|
|
|
|
|
|
| 32 |
chart_data['Average'] = chart_data[selected_columns].mean(axis=1)
|
| 33 |
|
| 34 |
# Update dataset name in table
|
|
@@ -54,8 +57,11 @@ def sum_table_mulit_metrix(task_name, metrics_lists: List[str]):
|
|
| 54 |
sorted(chart_data['model_show'].tolist()),
|
| 55 |
default = sorted(chart_data['model_show'].tolist()),
|
| 56 |
)
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
if len(chart_data) == 0: return
|
| 61 |
|
|
|
|
| 29 |
chart_data = pd.merge(chart_data, one_chart_data, on='Model', how='outer')
|
| 30 |
|
| 31 |
selected_columns = [i for i in chart_data.columns if i != 'Model']
|
| 32 |
+
# TODO: temp code. delete this after ytb tamil vs is fully tested.
|
| 33 |
+
_columns_to_exclude_from_average = ["ytb_asr_batch3_tamil_v2"]
|
| 34 |
+
selected_columns = [col for col in selected_columns if col not in _columns_to_exclude_from_average]
|
| 35 |
chart_data['Average'] = chart_data[selected_columns].mean(axis=1)
|
| 36 |
|
| 37 |
# Update dataset name in table
|
|
|
|
| 57 |
sorted(chart_data['model_show'].tolist()),
|
| 58 |
default = sorted(chart_data['model_show'].tolist()),
|
| 59 |
)
|
| 60 |
+
# TODO: delete this after ytb tamil v2 is fully utilized.
|
| 61 |
+
if task_name == 'asr_tamil':
|
| 62 |
+
chart_data = chart_data[chart_data['model_show'].isin(models)]
|
| 63 |
+
else:
|
| 64 |
+
chart_data = chart_data[chart_data['model_show'].isin(models)].dropna(axis=0)
|
| 65 |
|
| 66 |
if len(chart_data) == 0: return
|
| 67 |
|
model_information.py
CHANGED
|
@@ -14,7 +14,7 @@ data['Link'].append('https://arxiv.org/abs/2511.09690')
|
|
| 14 |
|
| 15 |
data['Original Name'].append('MERaLiON-ASR-dev-1215')
|
| 16 |
data['Proper Display Name'].append('🌟 API: MERaLiON-ASR-dev-1215')
|
| 17 |
-
data['Link'].append(
|
| 18 |
|
| 19 |
data['Original Name'].append('SALMONN_7B')
|
| 20 |
data['Proper Display Name'].append('Fusion: SALMONN-7B')
|
|
@@ -95,6 +95,6 @@ def get_dataframe():
|
|
| 95 |
Returns a DataFrame with the data and drops rows with missing values.
|
| 96 |
"""
|
| 97 |
df = pd.DataFrame(data)
|
| 98 |
-
return df
|
| 99 |
|
| 100 |
|
|
|
|
| 14 |
|
| 15 |
data['Original Name'].append('MERaLiON-ASR-dev-1215')
|
| 16 |
data['Proper Display Name'].append('🌟 API: MERaLiON-ASR-dev-1215')
|
| 17 |
+
data['Link'].append(None)
|
| 18 |
|
| 19 |
data['Original Name'].append('SALMONN_7B')
|
| 20 |
data['Proper Display Name'].append('Fusion: SALMONN-7B')
|
|
|
|
| 95 |
Returns a DataFrame with the data and drops rows with missing values.
|
| 96 |
"""
|
| 97 |
df = pd.DataFrame(data)
|
| 98 |
+
return df
|
| 99 |
|
| 100 |
|
results_organized/wer/asr_tamil.csv
CHANGED
|
@@ -1,18 +1,18 @@
|
|
| 1 |
-
Model,commonvoice_17_ta_asr,fleurs_tamil_ta_30_asr,ytb_asr_batch3_tamil
|
| 2 |
-
MERaLiON-AudioLLM-Whisper-SEA-LION,0.5284951114826634,0.4624736472241743,0.6929759165018962
|
| 3 |
-
MERaLiON-AudioLLM-v2-2b,0.1385300804387941,0.1432185523541813,0.7504943113675407
|
| 4 |
-
MERaLiON-AudioLLM-v2-9b,0.1559177057102368,0.1608573436401967,0.6644679264853651
|
| 5 |
-
MERaLiON-AudioLLM-v2-9b-asr,0.1287122656417262,0.1383345045678145,0.5467894071504975
|
| 6 |
-
Qwen2.5-Omni-3B,0.8307319012713203,1.653935347856641,1.460763022268322
|
| 7 |
-
Qwen2.5-Omni-7B,0.8465494917777076,0.8666549543218552,1.3615441962983372
|
| 8 |
-
SALMONN_7B,1.4272941368377052,1.507519325368939,0.985267900554277
|
| 9 |
-
SeaLLMs-Audio-7B,1.2968793010286783,2.061876317638791,3.617451622313701
|
| 10 |
-
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.2380539724938065,0.2724525650035137,0.9665002755178114
|
| 11 |
-
cascade_whisper_large_v3_llama_3_8b_instruct,0.2440435531721838,0.283977512297962,0.8976532365239376
|
| 12 |
-
hy_whisper_local_cs,0.3179371374392121,0.3311314125087842,0.8339924151567211
|
| 13 |
-
phi_4_multimodal_instruct,1.1784589191228196,1.7016514406184118,2.750056724255292
|
| 14 |
-
whisper_large_v3,0.2448438631011245,0.2314476458186929,0.8481572720495284
|
| 15 |
-
MERaLiON-SpeechEncoder2-ASR-CTC,0.1442,0.1632,0.6578
|
| 16 |
-
Omnilingual-ASR-7B,0.3144055763521363,0.1062680115273775,0.867817443980474
|
| 17 |
-
Fusion: Omnilingual-LLM-ASR-7B[with language code],0.3144055763521363,0.1062680115273775,0.8675527848026818
|
| 18 |
-
MERaLiON-ASR-dev-1215,0.12422135451181095,0.13475052705551652,0.49745551197692134
|
|
|
|
| 1 |
+
Model,commonvoice_17_ta_asr,fleurs_tamil_ta_30_asr,ytb_asr_batch3_tamil,ytb_asr_batch3_tamil_v2
|
| 2 |
+
MERaLiON-AudioLLM-Whisper-SEA-LION,0.5284951114826634,0.4624736472241743,0.6929759165018962,
|
| 3 |
+
MERaLiON-AudioLLM-v2-2b,0.1385300804387941,0.1432185523541813,0.7504943113675407,
|
| 4 |
+
MERaLiON-AudioLLM-v2-9b,0.1559177057102368,0.1608573436401967,0.6644679264853651,0.5082255864938114
|
| 5 |
+
MERaLiON-AudioLLM-v2-9b-asr,0.1287122656417262,0.1383345045678145,0.5467894071504975,0.3790156916225034
|
| 6 |
+
Qwen2.5-Omni-3B,0.8307319012713203,1.653935347856641,1.460763022268322,
|
| 7 |
+
Qwen2.5-Omni-7B,0.8465494917777076,0.8666549543218552,1.3615441962983372,
|
| 8 |
+
SALMONN_7B,1.4272941368377052,1.507519325368939,0.985267900554277,
|
| 9 |
+
SeaLLMs-Audio-7B,1.2968793010286783,2.061876317638791,3.617451622313701,
|
| 10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.2380539724938065,0.2724525650035137,0.9665002755178114,
|
| 11 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,0.2440435531721838,0.283977512297962,0.8976532365239376,
|
| 12 |
+
hy_whisper_local_cs,0.3179371374392121,0.3311314125087842,0.8339924151567211,
|
| 13 |
+
phi_4_multimodal_instruct,1.1784589191228196,1.7016514406184118,2.750056724255292,
|
| 14 |
+
whisper_large_v3,0.2448438631011245,0.2314476458186929,0.8481572720495284,
|
| 15 |
+
MERaLiON-SpeechEncoder2-ASR-CTC,0.1442,0.1632,0.6578,
|
| 16 |
+
Omnilingual-ASR-7B,0.3144055763521363,0.1062680115273775,0.867817443980474,
|
| 17 |
+
Fusion: Omnilingual-LLM-ASR-7B[with language code],0.3144055763521363,0.1062680115273775,0.8675527848026818,
|
| 18 |
+
MERaLiON-ASR-dev-1215,0.12422135451181095,0.13475052705551652,0.49745551197692134,0.3112948609504739
|