import gradio as gr import pandas as pd from pathlib import Path from scripts.summarize import summarize_events from scripts.data_cleansing import cleanse_data from dotenv import load_dotenv import os import numpy as np import joblib ROOT = Path(__file__).resolve().parents[0] load_dotenv(ROOT / '.env') def preview_csv(file_obj): try: df = pd.read_csv(file_obj.name, dtype=str) return df.head(10).to_html(index=False) except Exception as e: return f"Error reading file: {e}" def parse_row_selection(df, rows_text: str): if not rows_text: return df idx = [] for token in rows_text.split(','): token = token.strip() if token.isdigit(): idx.append(int(token)) return df.iloc[idx] with gr.Blocks() as demo: gr.Markdown("# OMS Analyze — Prototype") with gr.Tabs(): # Upload & Preview tab with gr.TabItem('Upload & Preview'): gr.Markdown("**อัปโหลด & แสดงตัวอย่าง**: อัปโหลดไฟล์ CSV เพื่อแสดงตัวอย่างข้อมูล ใช้ algorithm ทำความสะอาดข้อมูล (ลบข้อมูลซ้ำ, จัดการค่าที่หายไป) และเปรียบเทียบข้อมูลเดิมกับข้อมูลที่ทำความสะอาด ดาวน์โหลด CSV ที่ทำความสะอาดแล้ว") csv_up = gr.File(label='Upload CSV (data.csv)') with gr.Row(): remove_dup = gr.Checkbox(label='Remove Duplicates', value=False) missing_handling = gr.Radio(choices=['drop','impute_mean','impute_median','impute_mode'], value='drop', label='Missing Values Handling') apply_clean = gr.Button('Apply Cleansing') with gr.Tabs(): with gr.TabItem('Original Data'): original_preview = gr.Dataframe(label='Original Data Preview') with gr.TabItem('Cleansed Data'): cleansed_preview = gr.Dataframe(label='Cleansed Data Preview') download_cleansed = gr.File(label='Download Cleansed CSV') clean_status = gr.Textbox(label='Cleansing Status', interactive=False) def initial_preview(file): if file is None: return pd.DataFrame(), pd.DataFrame(), "Upload a file" df = pd.read_csv(file.name, dtype=str) return df.head(100), pd.DataFrame(), "File uploaded, apply cleansing if needed" def apply_cleansing(file, remove_duplicates, missing_strategy): if file is None: return pd.DataFrame(), "No file", None try: df = pd.read_csv(file.name, dtype=str) df_clean, orig_shape, clean_shape = cleanse_data(df, remove_duplicates, missing_strategy) status = f"Original: {orig_shape[0]} rows, {orig_shape[1]} cols → Cleaned: {clean_shape[0]} rows, {clean_shape[1]} cols" # Save cleansed data for download out_file = ROOT / 'outputs' / 'cleansed_data.csv' out_file.parent.mkdir(exist_ok=True) df_clean.to_csv(out_file, index=False, encoding='utf-8-sig') return df_clean.head(100), status, str(out_file) except Exception as e: return pd.DataFrame(), f"Error: {e}", None csv_up.change(fn=initial_preview, inputs=csv_up, outputs=[original_preview, cleansed_preview, clean_status]) apply_clean.click(fn=apply_cleansing, inputs=[csv_up, remove_dup, missing_handling], outputs=[cleansed_preview, clean_status, download_cleansed]) # Summarization tab with gr.TabItem('Summarization'): gr.Markdown("**สรุปเหตุการณ์**: สร้างสรุปข้อความสำหรับเหตุการณ์ไฟฟ้าล้ม เลือกแถวเฉพาะ, ระดับรายละเอียด และเลือกใช้ AI (Hugging Face) เพื่อเพิ่มความละเอียด ดาวน์โหลด CSV สรุป") csv_in = gr.File(label='Upload CSV (data.csv)') with gr.Row(): rows = gr.Textbox(label='Rows (comma-separated indexes) or empty = all', placeholder='e.g. 0,1,2') use_hf = gr.Checkbox(label='Use Generative AI', value=False) verbosity = gr.Radio(choices=['analyze','recommend'], value='analyze', label='Summary Type', interactive=True) run_btn = gr.Button('Generate Summaries', interactive=True) with gr.Row(): model_selector = gr.Dropdown( choices=[ 'meta-llama/Llama-3.1-8B-Instruct:novita', 'meta-llama/Llama-4-Scout-17B-16E-Instruct:novita', 'Qwen/Qwen3-VL-235B-A22B-Instruct:novita', 'deepseek-ai/DeepSeek-R1:novita' ], value='meta-llama/Llama-3.1-8B-Instruct:novita', label='GenAI Model', interactive=True, visible=False ) out = gr.Dataframe(headers=['EventNumber','OutageDateTime','Summary']) status = gr.Textbox(label='Status', interactive=False) download = gr.File(label='Download summaries') def run_summarize(file, rows_text, use_hf_flag, verbosity_level): print(f"Debug: file={file}, rows_text={rows_text}, use_hf_flag={use_hf_flag}, verbosity_level={verbosity_level}") if file is None: return pd.DataFrame([], columns=['EventNumber','OutageDateTime','Summary']), 'No file provided', None df = pd.read_csv(file.name, dtype=str) df_sel = parse_row_selection(df, rows_text) model = 'meta-llama/Llama-3.1-8B-Instruct:novita' # default res = summarize_events(df_sel, use_hf=use_hf_flag, verbosity=verbosity_level, model=model) out_df = pd.DataFrame(res) out_file = ROOT / 'outputs' / 'summaries_from_ui.csv' out_file.parent.mkdir(exist_ok=True) out_df.to_csv(out_file, index=False, encoding='utf-8-sig') status_text = f"Summaries generated: {len(out_df)} rows. HF used: {use_hf_flag}" return out_df, status_text, str(out_file) def update_model_visibility(use_hf_flag): return gr.update(visible=use_hf_flag, interactive=use_hf_flag) use_hf.change(fn=update_model_visibility, inputs=use_hf, outputs=model_selector) run_btn.click(fn=run_summarize, inputs=[csv_in, rows, use_hf, verbosity], outputs=[out, status, download]) with gr.TabItem('Anomaly Detection'): gr.Markdown("**ตรวจจับความผิดปกติ**: ตรวจจับเหตุการณ์ไฟฟ้าล้มที่ผิดปกติโดยใช้ algorithm การเรียนรู้ของเครื่อง (Isolation Forest, Local Outlier Factor, Autoencoder) ตั้งระดับการปนเปื้อนและดาวน์โหลดผลลัพธ์พร้อมธงความผิดปกติ") csv_in_anom = gr.File(label='Upload CSV for Anomaly') with gr.Row(): alg = gr.Radio(choices=['both','iso','lof','autoencoder'], value='both', label='Algorithm') contamination = gr.Slider(minimum=0.01, maximum=0.2, value=0.05, step=0.01, label='Contamination') run_anom = gr.Button('Run Anomaly Detection') anom_out = gr.Dataframe() anom_status = gr.Textbox(label='Anomaly Status', interactive=False) anom_download = gr.File(label='Download anomalies CSV') def run_anomaly_ui(file, algorithm, contamination): if file is None: return pd.DataFrame(), 'No file provided', None from scripts.anomaly import detect_anomalies df = pd.read_csv(file.name, dtype=str) res = detect_anomalies(df, contamination=contamination, algorithm=algorithm) out_file = ROOT / 'outputs' / 'anomalies_from_ui.csv' out_file.parent.mkdir(exist_ok=True) res.to_csv(out_file, index=False, encoding='utf-8-sig') status = f"Anomaly detection done. Rows: {len(res)}. Flags: {res['final_flag'].sum()}" return res, status, str(out_file) run_anom.click(fn=run_anomaly_ui, inputs=[csv_in_anom, alg, contamination], outputs=[anom_out, anom_status, anom_download]) # Forecasting tab with gr.TabItem('Forecasting'): gr.Markdown("**พยากรณ์**: พยากรณ์จำนวนเหตุการณ์หรือเวลาหยุดทำงานในอนาคตโดยใช้การวิเคราะห์อนุกรมเวลา (Prophet) เลือกเมตริกและช่วงพยากรณ์ ดาวน์โหลด CSV พยากรณ์") csv_in_fc = gr.File(label='Upload CSV for Forecast') with gr.Row(): metric_fc = gr.Radio(choices=['count','downtime_minutes'], value='count', label='Metric') horizon = gr.Slider(minimum=7, maximum=90, value=14, step=1, label='Horizon (days)') run_fc = gr.Button('Run Forecast') fc_out = gr.Dataframe() fc_status = gr.Textbox(label='Forecast Status', interactive=False) fc_download = gr.File(label='Download forecast CSV') def run_forecast_ui(file, metric, horizon_days): if file is None: return pd.DataFrame(), 'No file provided', None from scripts.forecast import prepare_timeseries, run_forecast df = pd.read_csv(file.name, dtype=str) ts, fcst = run_forecast(df, metric=metric, periods=int(horizon_days)) out_file = ROOT / 'outputs' / f'forecast_{metric}_from_ui.csv' out_file.parent.mkdir(exist_ok=True) fcst.to_csv(out_file, index=False, encoding='utf-8-sig') status = f"Forecast produced: {len(fcst)} rows (horizon {horizon_days} days)." display_df = pd.concat([ts.tail(30).rename(columns={'y':'actual'}).set_index('ds'), fcst.set_index('ds')], axis=1).reset_index() return display_df, status, str(out_file) run_fc.click(fn=run_forecast_ui, inputs=[csv_in_fc, metric_fc, horizon], outputs=[fc_out, fc_status, fc_download]) # Classification tab with gr.TabItem('Classification'): gr.Markdown("**จำแนกประเภท**: ฝึกโมเดลการเรียนรู้ของเครื่องเพื่อจำแนกสาเหตุของไฟฟ้าล้ม เลือกประเภทโมเดล (Random Forest, Gradient Boosting, MLP), เปิดใช้งาน weak labeling หรือ grid search ดาวน์โหลดโมเดลที่ฝึกแล้วและการทำนาย") csv_in_cls = gr.File(label='Upload CSV for Classification') with gr.Row(): label_col = gr.Dropdown(choices=['CauseType','SubCauseType'], value='CauseType', label='Target Column') do_weak = gr.Checkbox(label='Run weak-labeling using HF (requires HF_TOKEN)', value=False) model_type = gr.Radio(choices=['rf','gb','mlp'], value='rf', label='Model Type') run_cls = gr.Button('Train Classifier') def update_hyperparams_visibility(model_choice): rf_visible = model_choice == 'rf' gb_visible = model_choice == 'gb' mlp_visible = model_choice == 'mlp' return [ gr.update(visible=rf_visible), gr.update(visible=rf_visible), gr.update(visible=rf_visible), gr.update(visible=rf_visible), gr.update(visible=gb_visible), gr.update(visible=gb_visible), gr.update(visible=gb_visible), gr.update(visible=mlp_visible), gr.update(visible=mlp_visible), gr.update(visible=mlp_visible), ] with gr.Accordion("Hyperparameters (Advanced)", open=False): gr.Markdown("Adjust hyperparameters for the selected model. Defaults are set for good performance.") rf_n_estimators = gr.Slider(minimum=50, maximum=500, value=100, step=10, label="RF: n_estimators", visible=True) rf_max_depth = gr.Slider(minimum=5, maximum=50, value=10, step=1, label="RF: max_depth", visible=True) rf_min_samples_split = gr.Slider(minimum=2, maximum=10, value=2, step=1, label="RF: min_samples_split", visible=True) rf_min_samples_leaf = gr.Slider(minimum=1, maximum=5, value=1, step=1, label="RF: min_samples_leaf", visible=True) gb_n_estimators = gr.Slider(minimum=50, maximum=500, value=100, step=10, label="GB: n_estimators", visible=False) gb_max_depth = gr.Slider(minimum=3, maximum=20, value=3, step=1, label="GB: max_depth", visible=False) gb_learning_rate = gr.Slider(minimum=0.01, maximum=0.3, value=0.1, step=0.01, label="GB: learning_rate", visible=False) mlp_hidden_layer_sizes = gr.Textbox(value="(100,)", label="MLP: hidden_layer_sizes (tuple)", visible=False) mlp_alpha = gr.Slider(minimum=0.0001, maximum=0.01, value=0.0001, step=0.0001, label="MLP: alpha", visible=False) mlp_max_iter = gr.Slider(minimum=100, maximum=4000, value=500, step=50, label="MLP: max_iter", visible=False) model_type.change(fn=update_hyperparams_visibility, inputs=model_type, outputs=[rf_n_estimators, rf_max_depth, rf_min_samples_split, rf_min_samples_leaf, gb_n_estimators, gb_max_depth, gb_learning_rate, mlp_hidden_layer_sizes, mlp_alpha, mlp_max_iter]) cls_out = gr.Textbox(label='Classification Report') model_path_state = gr.State() cls_download_model = gr.File(label='Download saved model') cls_download_preds = gr.File(label='Download predictions CSV') # Test section gr.Markdown("---") gr.Markdown("**ทดสอบโมเดล**: อัปโหลดไฟล์ CSV ใหม่เพื่อทดสอบโมเดลที่ฝึกแล้ว") test_csv = gr.File(label='Upload CSV for Testing') run_test = gr.Button('Test Model') test_out = gr.Dataframe(label='Test Predictions') test_status = gr.Textbox(label='Test Status', interactive=False) test_download = gr.File(label='Download Test Predictions') def run_classify_ui(file, label_col_choice, use_weak, model_choice, rf_n_est, rf_max_d, rf_min_ss, rf_min_sl, gb_n_est, gb_max_d, gb_lr, mlp_hls, mlp_a, mlp_mi): if file is None: return 'No file provided', None, None, None from scripts.classify import train_classifier df = pd.read_csv(file.name, dtype=str) try: hyperparams = {} if model_choice == 'rf': hyperparams = {'n_estimators': int(rf_n_est), 'max_depth': int(rf_max_d), 'min_samples_split': int(rf_min_ss), 'min_samples_leaf': int(rf_min_sl)} elif model_choice == 'gb': hyperparams = {'n_estimators': int(gb_n_est), 'max_depth': int(gb_max_d), 'learning_rate': gb_lr} elif model_choice == 'mlp': import ast hyperparams = {'hidden_layer_sizes': ast.literal_eval(mlp_hls), 'alpha': mlp_a, 'max_iter': int(mlp_mi)} res = train_classifier(df, label_col=label_col_choice, model_type=model_choice, hyperparams=hyperparams) report = res.get('report','') model_file = res.get('model_file') preds_file = res.get('predictions_file') # ensure returned file paths are strings for Gradio return report, model_file, preds_file, model_file except Exception as e: return f'Training failed: {e}', None, None, None def run_test_ui(test_file, model_path): if test_file is None: return pd.DataFrame(), 'No test file provided', None if model_path is None: return pd.DataFrame(), 'No trained model available. Please train a model first.', None try: from scripts.classify import parse_and_features # Load model model_data = joblib.load(model_path) pipeline = model_data['pipeline'] le = model_data['label_encoder'] # Load and preprocess test data df_test = pd.read_csv(test_file.name, dtype=str) df_test = parse_and_features(df_test) # Define features (same as training) feature_cols = ['duration_min','Load(MW)_num','Capacity(kVA)_num','AffectedCustomer_num','hour','weekday','device_freq','OpDeviceType','Owner','Weather','EventType'] X_test = df_test[feature_cols] # Predict y_pred_encoded = pipeline.predict(X_test) y_pred = le.inverse_transform(y_pred_encoded) # Create output df pred_df = df_test.copy() pred_df['Predicted_CauseType'] = y_pred # Save predictions out_file = ROOT / 'outputs' / 'test_predictions.csv' out_file.parent.mkdir(exist_ok=True) pred_df.to_csv(out_file, index=False, encoding='utf-8-sig') status = f"Test completed. Predictions for {len(pred_df)} rows." return pred_df.head(100), status, str(out_file) except Exception as e: return pd.DataFrame(), f'Test failed: {e}', None run_cls.click(fn=run_classify_ui, inputs=[csv_in_cls, label_col, do_weak, model_type, rf_n_estimators, rf_max_depth, rf_min_samples_split, rf_min_samples_leaf, gb_n_estimators, gb_max_depth, gb_learning_rate, mlp_hidden_layer_sizes, mlp_alpha, mlp_max_iter], outputs=[cls_out, cls_download_model, cls_download_preds, model_path_state]) run_test.click(fn=run_test_ui, inputs=[test_csv, model_path_state], outputs=[test_out, test_status, test_download]) # Label Suggestion tab with gr.TabItem('Label Suggestion'): gr.Markdown("**แนะนำป้ายกำกับ**: แนะนำป้ายกำกับสาเหตุที่เป็นไปได้สำหรับเหตุการณ์ไฟฟ้าล้มที่ไม่รู้สาเหตุ โดยอิงจากความคล้ายกับสาเหตุที่รู้จัก ตั้งจำนวนคำแนะนำสูงสุด ดาวน์โหลด CSV คำแนะนำ") csv_in_ls = gr.File(label='Upload CSV (defaults to data/data_3.csv)') with gr.Row(): top_k = gr.Slider(minimum=1, maximum=5, value=1, step=1, label='Top K suggestions') run_ls = gr.Button('Run Label Suggestion') ls_out = gr.Dataframe() ls_status = gr.Textbox(label='Label Suggestion Status', interactive=False) ls_download = gr.File(label='Download label suggestions') def run_label_suggestion(file, top_k_suggest): # delegate to scripts.label_suggestion from scripts.label_suggestion import suggest_labels_to_file if file is None: default = ROOT / 'data' / 'data_3.csv' if not default.exists(): return pd.DataFrame(), 'No file provided and default data/data_3.csv not found', None df = pd.read_csv(default, dtype=str) else: df = pd.read_csv(file.name, dtype=str) out_file = ROOT / 'outputs' / 'label_suggestions.csv' out_df = suggest_labels_to_file(df, out_path=str(out_file), top_k=int(top_k_suggest)) status = f"Label suggestion done. Unknown rows processed: {len(out_df)}. Output: {out_file}" return out_df, status, str(out_file) if len(out_df)>0 else None run_ls.click(fn=run_label_suggestion, inputs=[csv_in_ls, top_k], outputs=[ls_out, ls_status, ls_download]) if __name__ == '__main__': demo.launch()