Spaces:
Running
Running
| import pandas as pd | |
| import json | |
| from datetime import datetime | |
| def process_csv_to_json(): | |
| # Read the CSV file | |
| df = pd.read_csv('src/record.csv') | |
| # Clean the data: remove empty rows, rename columns | |
| df = df.dropna(how='all') | |
| df = df.rename(columns={ | |
| 'dataset': 'Dataset', | |
| 'llm': 'LLM', | |
| 'score\n(EM)': 'Score', | |
| 'pass rate': 'Pass rate', | |
| 'Cost($)': 'Cost($)', | |
| 'Eval Date': 'Eval Date', | |
| 'framework': 'Framework', | |
| 'X-shot': 'X-shot', | |
| 'Nums': 'Samples', | |
| 'All tokens': 'All tokens', | |
| 'Total input tokens': 'Total input tokens', | |
| 'Average input tokens': 'Average input tokens', | |
| 'Total output tokens': 'Total output tokens', | |
| 'Average output tokens': 'Average output tokens' | |
| }) | |
| # Helper function: handle number strings with commas | |
| def parse_number(value): | |
| if pd.isna(value) or value == '-': | |
| return 0 | |
| # Remove commas, convert to float, then to int | |
| return int(float(str(value).replace(',', ''))) | |
| # Initialize result dictionary | |
| result = { | |
| "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
| "results": {} | |
| } | |
| # Get all unique LLMs | |
| llms = df['LLM'].dropna().unique() | |
| # Iterate through each algorithm | |
| for algorithm in df['Algorithm'].dropna().unique(): | |
| if not isinstance(algorithm, str): | |
| continue | |
| result['results'][algorithm] = {} | |
| # Process each LLM | |
| for llm in llms: | |
| llm_data = df[(df['Algorithm'] == algorithm) & (df['LLM'] == llm)] | |
| if llm_data.empty: | |
| continue | |
| # Create dictionary for each LLM | |
| result['results'][algorithm][llm] = { | |
| 'META': { | |
| 'Algorithm': str(algorithm), | |
| 'LLM': str(llm), | |
| 'Eval Date': str(llm_data['Eval Date'].iloc[0]) | |
| } | |
| } | |
| # Process each dataset | |
| for dataset in df['Dataset'].dropna().unique(): | |
| if not isinstance(dataset, str): | |
| continue | |
| dataset_data = llm_data[llm_data['Dataset'] == dataset] | |
| if not dataset_data.empty: | |
| data_row = dataset_data.iloc[0] | |
| result['results'][algorithm][llm][dataset] = { | |
| 'Score': round(float(data_row['Score']) if data_row['Score'] != '-' else 0, 2), # Keep two decimal places | |
| 'Pass rate': round(float(data_row['Pass rate']) / 100, 4) if data_row['Pass rate'] != '-' else 0.0, # Convert to decimal and keep two decimal places | |
| 'Cost($)': float(data_row['Cost($)']) if pd.notnull(data_row['Cost($)']) and data_row['Cost($)'] != '-' else 0.0, | |
| 'Framework': str(data_row['Framework']) if 'Framework' in data_row and pd.notnull(data_row['Framework']) else '', | |
| 'X-shot': str(data_row['X-shot']) if pd.notnull(data_row['X-shot']) else '', | |
| 'Samples': parse_number(data_row['Samples']), | |
| 'All tokens': parse_number(data_row['All tokens']), | |
| 'Total input tokens': parse_number(data_row['Total input tokens']), | |
| 'Average input tokens': parse_number(data_row['Average input tokens']), | |
| 'Total output tokens': parse_number(data_row['Total output tokens']), | |
| 'Average output tokens': parse_number(data_row['Average output tokens']) | |
| } | |
| # Check if each field exists | |
| required_fields = ['Score', 'Pass rate', 'Cost($)', 'Framework', 'X-shot', 'Samples', 'All tokens', 'Total input tokens', 'Average input tokens', 'Total output tokens', 'Average output tokens'] | |
| for key, value in result['results'].items(): | |
| for llm, datasets in value.items(): | |
| # Check META information | |
| meta = datasets.get('META', {}) | |
| if 'LLM' not in meta or 'Eval Date' not in meta: | |
| print(f"Missing META fields in algorithm '{key}' for LLM '{llm}'") | |
| for dataset, data in datasets.items(): | |
| if dataset == 'META': | |
| continue | |
| missing_fields = [field for field in required_fields if field not in data] | |
| if missing_fields: | |
| print(f"Missing fields {missing_fields} in dataset '{dataset}' for LLM '{llm}' in algorithm '{key}'") | |
| # Save as JSON file | |
| with open('src/detail_math_score.json', 'w', encoding='utf-8') as f: | |
| json.dump(result, f, indent=4, ensure_ascii=False) | |
| def process_csv_to_overall_json(): | |
| # Read the CSV file | |
| df = pd.read_csv('src/record.csv') | |
| # Clean the data: remove empty rows, rename columns | |
| df = df.dropna(how='all') | |
| df = df.rename(columns={ | |
| 'dataset': 'Dataset', | |
| 'llm': 'LLM', | |
| 'score\n(EM)': 'Score', | |
| 'Cost($)': 'Cost($)', | |
| 'Eval Date': 'Eval Date' | |
| }) | |
| # Initialize result dictionary | |
| result = { | |
| "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
| "results": {} | |
| } | |
| # Get all unique LLMs | |
| llms = df['LLM'].dropna().unique() | |
| for llm in llms: | |
| # Process base algorithms | |
| for algorithm in df['Algorithm'].dropna().unique(): | |
| if not isinstance(algorithm, str): | |
| continue | |
| # Add suffix for non-gpt-3.5-turbo models | |
| # Modification: add more information for llama models to ensure uniqueness | |
| algo_key = algorithm if llm == 'gpt-3.5-turbo' else f"{algorithm}-{llm}" | |
| # Check if the algorithm-LLM combination exists | |
| algo_data = df[(df['Algorithm'] == algorithm) & (df['LLM'] == llm)] | |
| if algo_data.empty: | |
| print(f"No data found for algorithm '{algorithm}' and LLM '{llm}'") | |
| continue | |
| result['results'][algo_key] = { | |
| "META": { | |
| "Algorithm": algorithm, | |
| "LLM": llm, | |
| "Eval Date": str(algo_data['Eval Date'].iloc[0]) | |
| } | |
| } | |
| # Process each dataset | |
| for dataset in ['gsm8k', 'AQuA', 'MATH-500']: | |
| dataset_data = df[(df['Algorithm'] == algorithm) & | |
| (df['Dataset'] == dataset) & | |
| (df['LLM'] == llm)] | |
| if not dataset_data.empty: | |
| result['results'][algo_key][dataset] = { | |
| "Score": float(dataset_data['Score'].iloc[0]) if pd.notnull(dataset_data['Score'].iloc[0]) and dataset_data['Score'].iloc[0] != '-' else 0.0, | |
| "Cost($)": float(dataset_data['Cost($)'].iloc[0]) if pd.notnull(dataset_data['Cost($)'].iloc[0]) and dataset_data['Cost($)'].iloc[0] != '-' else 0.0 | |
| } | |
| else: | |
| # If the dataset is empty, ensure the key exists and set default values | |
| result['results'][algo_key][dataset] = { | |
| "Score": 0.0, | |
| "Cost($)": 0.0 | |
| } | |
| # Save as JSON file | |
| with open('src/overall_math_score.json', 'w', encoding='utf-8') as f: | |
| json.dump(result, f, indent=4, ensure_ascii=False) | |
| def process_multi_modal_csv(): | |
| # Read the CSV file | |
| df = pd.read_csv('src/multi-modal.csv', skipinitialspace=True) | |
| # Clean and rename columns | |
| df.columns = df.columns.str.strip().str.replace('="', '').str.replace('"', '') | |
| df = df.rename(columns={ | |
| 'Agent': 'Agent', | |
| 'VLMs': 'VLMs', | |
| 'Score': 'Score', | |
| 'Pass Rate': 'Pass Rate', | |
| 'Total Input Tokens': 'Total Input Tokens', | |
| 'Total Output Tokens': 'Total Output Tokens', | |
| 'All Tokens': 'All Tokens' | |
| }) | |
| # Strip unwanted characters from all string values | |
| df = df.applymap(lambda x: str(x).replace('="', '').replace('"', '').strip() if isinstance(x, str) else x) | |
| # Helper function to parse numbers with commas | |
| def parse_number(value): | |
| if pd.isna(value) or value == '-': | |
| return 0 | |
| return int(float(str(value).replace(',', ''))) | |
| # Process numeric fields | |
| df['Score'] = df['Score'].apply(lambda x: round(float(x), 2) if pd.notnull(x) and x != '-' else 0.0) | |
| df['Pass Rate'] = df['Pass Rate'].apply(lambda x: round(float(x) / 100, 4) if pd.notnull(x) and x != '-' else 0.0) | |
| df['Total Input Tokens'] = df['Total Input Tokens'].apply(parse_number) | |
| df['Total Output Tokens'] = df['Total Output Tokens'].apply(parse_number) | |
| df['All Tokens'] = df['All Tokens'].apply(parse_number) | |
| # Convert to Hugging Face-compatible format | |
| result = { | |
| "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
| "multi_modal_results": df.to_dict(orient='records') | |
| } | |
| # Save as JSON file | |
| with open('src/multi_modal_results.json', 'w', encoding='utf-8') as f: | |
| json.dump(result, f, indent=4, ensure_ascii=False) | |
| if __name__ == "__main__": | |
| # Generate JSON files in three formats | |
| process_csv_to_json() | |
| process_csv_to_overall_json() | |
| process_multi_modal_csv() |