import gradio as gr
import pandas as pd
from pathlib import Path
from scripts.summarize import summarize_events
from scripts.data_cleansing import cleanse_data
from dotenv import load_dotenv
import os
import numpy as np
import joblib

ROOT = Path(__file__).resolve().parents[0]
load_dotenv(ROOT / '.env')


def preview_csv(file_obj):
    try:
        df = pd.read_csv(file_obj.name, dtype=str)
        return df.head(10).to_html(index=False)
    except Exception as e:
        return f"Error reading file: {e}"


def parse_row_selection(df, rows_text: str):
    if not rows_text:
        return df
    idx = []
    for token in rows_text.split(','):
        token = token.strip()
        if token.isdigit():
            idx.append(int(token))
    return df.iloc[idx]


with gr.Blocks() as demo:
    gr.Markdown("# OMS Analyze — Prototype")
    with gr.Tabs():
        # Upload & Preview tab
        with gr.TabItem('Upload & Preview'):
            gr.Markdown("**อัปโหลด & แสดงตัวอย่าง**: อัปโหลดไฟล์ CSV เพื่อแสดงตัวอย่างข้อมูล ใช้ algorithm ทำความสะอาดข้อมูล (ลบข้อมูลซ้ำ, จัดการค่าที่หายไป) และเปรียบเทียบข้อมูลเดิมกับข้อมูลที่ทำความสะอาด ดาวน์โหลด CSV ที่ทำความสะอาดแล้ว")
            csv_up = gr.File(label='Upload CSV (data.csv)')
            with gr.Row():
                remove_dup = gr.Checkbox(label='Remove Duplicates', value=False)
                missing_handling = gr.Radio(choices=['drop','impute_mean','impute_median','impute_mode'], value='drop', label='Missing Values Handling')
                apply_clean = gr.Button('Apply Cleansing')
            with gr.Tabs():
                with gr.TabItem('Original Data'):
                    original_preview = gr.Dataframe(label='Original Data Preview')
                with gr.TabItem('Cleansed Data'):
                    cleansed_preview = gr.Dataframe(label='Cleansed Data Preview')
                    download_cleansed = gr.File(label='Download Cleansed CSV')
            clean_status = gr.Textbox(label='Cleansing Status', interactive=False)
            
            def initial_preview(file):
                if file is None:
                    return pd.DataFrame(), pd.DataFrame(), "Upload a file"
                df = pd.read_csv(file.name, dtype=str)
                return df.head(100), pd.DataFrame(), "File uploaded, apply cleansing if needed"
            
            def apply_cleansing(file, remove_duplicates, missing_strategy):
                if file is None:
                    return pd.DataFrame(), "No file", None
                try:
                    df = pd.read_csv(file.name, dtype=str)
                    df_clean, orig_shape, clean_shape = cleanse_data(df, remove_duplicates, missing_strategy)
                    status = f"Original: {orig_shape[0]} rows, {orig_shape[1]} cols → Cleaned: {clean_shape[0]} rows, {clean_shape[1]} cols"
                    # Save cleansed data for download
                    out_file = ROOT / 'outputs' / 'cleansed_data.csv'
                    out_file.parent.mkdir(exist_ok=True)
                    df_clean.to_csv(out_file, index=False, encoding='utf-8-sig')
                    return df_clean.head(100), status, str(out_file)
                except Exception as e:
                    return pd.DataFrame(), f"Error: {e}", None
            
            csv_up.change(fn=initial_preview, inputs=csv_up, outputs=[original_preview, cleansed_preview, clean_status])
            apply_clean.click(fn=apply_cleansing, inputs=[csv_up, remove_dup, missing_handling], outputs=[cleansed_preview, clean_status, download_cleansed])

        # Summarization tab
        with gr.TabItem('Summarization'):
            gr.Markdown("**สรุปเหตุการณ์**: สร้างสรุปข้อความสำหรับเหตุการณ์ไฟฟ้าล้ม เลือกแถวเฉพาะ, ระดับรายละเอียด และเลือกใช้ AI (Hugging Face) เพื่อเพิ่มความละเอียด ดาวน์โหลด CSV สรุป")
            csv_in = gr.File(label='Upload CSV (data.csv)')
            with gr.Row():
                rows = gr.Textbox(label='Rows (comma-separated indexes) or empty = all', placeholder='e.g. 0,1,2')
                use_hf = gr.Checkbox(label='Use Generative AI', value=False)
                verbosity = gr.Radio(choices=['analyze','recommend'], value='analyze', label='Summary Type', interactive=True)
                run_btn = gr.Button('Generate Summaries', interactive=True)
            with gr.Row():
                model_selector = gr.Dropdown(
                    choices=[
                        'meta-llama/Llama-3.1-8B-Instruct:novita',
                        'meta-llama/Llama-4-Scout-17B-16E-Instruct:novita',
                        'Qwen/Qwen3-VL-235B-A22B-Instruct:novita',
                        'deepseek-ai/DeepSeek-R1:novita'
                    ],
                    value='meta-llama/Llama-3.1-8B-Instruct:novita',
                    label='GenAI Model',
                    interactive=True,
                    visible=False
                )
            out = gr.Dataframe(headers=['EventNumber','OutageDateTime','Summary'])
            status = gr.Textbox(label='Status', interactive=False)
            download = gr.File(label='Download summaries')

            def run_summarize(file, rows_text, use_hf_flag, verbosity_level):
                print(f"Debug: file={file}, rows_text={rows_text}, use_hf_flag={use_hf_flag}, verbosity_level={verbosity_level}")
                if file is None:
                    return pd.DataFrame([], columns=['EventNumber','OutageDateTime','Summary']), 'No file provided', None
                df = pd.read_csv(file.name, dtype=str)
                df_sel = parse_row_selection(df, rows_text)
                model = 'meta-llama/Llama-3.1-8B-Instruct:novita'  # default
                res = summarize_events(df_sel, use_hf=use_hf_flag, verbosity=verbosity_level, model=model)
                out_df = pd.DataFrame(res)
                out_file = ROOT / 'outputs' / 'summaries_from_ui.csv'
                out_file.parent.mkdir(exist_ok=True)
                out_df.to_csv(out_file, index=False, encoding='utf-8-sig')
                status_text = f"Summaries generated: {len(out_df)} rows. HF used: {use_hf_flag}"
                return out_df, status_text, str(out_file)

            def update_model_visibility(use_hf_flag):
                return gr.update(visible=use_hf_flag, interactive=use_hf_flag)
            
            use_hf.change(fn=update_model_visibility, inputs=use_hf, outputs=model_selector)
            
            run_btn.click(fn=run_summarize, inputs=[csv_in, rows, use_hf, verbosity], outputs=[out, status, download])
        with gr.TabItem('Anomaly Detection'):
            gr.Markdown("**ตรวจจับความผิดปกติ**: ตรวจจับเหตุการณ์ไฟฟ้าล้มที่ผิดปกติโดยใช้ algorithm การเรียนรู้ของเครื่อง (Isolation Forest, Local Outlier Factor, Autoencoder) ตั้งระดับการปนเปื้อนและดาวน์โหลดผลลัพธ์พร้อมธงความผิดปกติ")
            csv_in_anom = gr.File(label='Upload CSV for Anomaly')
            with gr.Row():
                alg = gr.Radio(choices=['both','iso','lof','autoencoder'], value='both', label='Algorithm')
                contamination = gr.Slider(minimum=0.01, maximum=0.2, value=0.05, step=0.01, label='Contamination')
                run_anom = gr.Button('Run Anomaly Detection')
            anom_out = gr.Dataframe()
            anom_status = gr.Textbox(label='Anomaly Status', interactive=False)
            anom_download = gr.File(label='Download anomalies CSV')

            def run_anomaly_ui(file, algorithm, contamination):
                if file is None:
                    return pd.DataFrame(), 'No file provided', None
                from scripts.anomaly import detect_anomalies
                df = pd.read_csv(file.name, dtype=str)
                res = detect_anomalies(df, contamination=contamination, algorithm=algorithm)
                out_file = ROOT / 'outputs' / 'anomalies_from_ui.csv'
                out_file.parent.mkdir(exist_ok=True)
                res.to_csv(out_file, index=False, encoding='utf-8-sig')
                status = f"Anomaly detection done. Rows: {len(res)}. Flags: {res['final_flag'].sum()}"
                return res, status, str(out_file)

            run_anom.click(fn=run_anomaly_ui, inputs=[csv_in_anom, alg, contamination], outputs=[anom_out, anom_status, anom_download])

        # Forecasting tab
        with gr.TabItem('Forecasting'):
            gr.Markdown("**พยากรณ์**: พยากรณ์จำนวนเหตุการณ์หรือเวลาหยุดทำงานในอนาคตโดยใช้การวิเคราะห์อนุกรมเวลา (Prophet) เลือกเมตริกและช่วงพยากรณ์ ดาวน์โหลด CSV พยากรณ์")
            csv_in_fc = gr.File(label='Upload CSV for Forecast')
            with gr.Row():
                metric_fc = gr.Radio(choices=['count','downtime_minutes'], value='count', label='Metric')
                horizon = gr.Slider(minimum=7, maximum=90, value=14, step=1, label='Horizon (days)')
                run_fc = gr.Button('Run Forecast')
            fc_out = gr.Dataframe()
            fc_status = gr.Textbox(label='Forecast Status', interactive=False)
            fc_download = gr.File(label='Download forecast CSV')

            def run_forecast_ui(file, metric, horizon_days):
                if file is None:
                    return pd.DataFrame(), 'No file provided', None
                from scripts.forecast import prepare_timeseries, run_forecast
                df = pd.read_csv(file.name, dtype=str)
                ts, fcst = run_forecast(df, metric=metric, periods=int(horizon_days))
                out_file = ROOT / 'outputs' / f'forecast_{metric}_from_ui.csv'
                out_file.parent.mkdir(exist_ok=True)
                fcst.to_csv(out_file, index=False, encoding='utf-8-sig')
                status = f"Forecast produced: {len(fcst)} rows (horizon {horizon_days} days)."
                display_df = pd.concat([ts.tail(30).rename(columns={'y':'actual'}).set_index('ds'), fcst.set_index('ds')], axis=1).reset_index()
                return display_df, status, str(out_file)

            run_fc.click(fn=run_forecast_ui, inputs=[csv_in_fc, metric_fc, horizon], outputs=[fc_out, fc_status, fc_download])

        # Classification tab
        with gr.TabItem('Classification'):
            gr.Markdown("**จำแนกประเภท**: ฝึกโมเดลการเรียนรู้ของเครื่องเพื่อจำแนกสาเหตุของไฟฟ้าล้ม เลือกประเภทโมเดล (Random Forest, Gradient Boosting, MLP), เปิดใช้งาน weak labeling หรือ grid search ดาวน์โหลดโมเดลที่ฝึกแล้วและการทำนาย")
            csv_in_cls = gr.File(label='Upload CSV for Classification')
            with gr.Row():
                label_col = gr.Dropdown(choices=['CauseType','SubCauseType'], value='CauseType', label='Target Column')
                do_weak = gr.Checkbox(label='Run weak-labeling using HF (requires HF_TOKEN)', value=False)
                model_type = gr.Radio(choices=['rf','gb','mlp'], value='rf', label='Model Type')
                run_cls = gr.Button('Train Classifier')
            def update_hyperparams_visibility(model_choice):
                rf_visible = model_choice == 'rf'
                gb_visible = model_choice == 'gb'
                mlp_visible = model_choice == 'mlp'
                return [
                    gr.update(visible=rf_visible),
                    gr.update(visible=rf_visible),
                    gr.update(visible=rf_visible),
                    gr.update(visible=rf_visible),
                    gr.update(visible=gb_visible),
                    gr.update(visible=gb_visible),
                    gr.update(visible=gb_visible),
                    gr.update(visible=mlp_visible),
                    gr.update(visible=mlp_visible),
                    gr.update(visible=mlp_visible),
                ]

            with gr.Accordion("Hyperparameters (Advanced)", open=False):
                gr.Markdown("Adjust hyperparameters for the selected model. Defaults are set for good performance.")
                rf_n_estimators = gr.Slider(minimum=50, maximum=500, value=100, step=10, label="RF: n_estimators", visible=True)
                rf_max_depth = gr.Slider(minimum=5, maximum=50, value=10, step=1, label="RF: max_depth", visible=True)
                rf_min_samples_split = gr.Slider(minimum=2, maximum=10, value=2, step=1, label="RF: min_samples_split", visible=True)
                rf_min_samples_leaf = gr.Slider(minimum=1, maximum=5, value=1, step=1, label="RF: min_samples_leaf", visible=True)
                gb_n_estimators = gr.Slider(minimum=50, maximum=500, value=100, step=10, label="GB: n_estimators", visible=False)
                gb_max_depth = gr.Slider(minimum=3, maximum=20, value=3, step=1, label="GB: max_depth", visible=False)
                gb_learning_rate = gr.Slider(minimum=0.01, maximum=0.3, value=0.1, step=0.01, label="GB: learning_rate", visible=False)
                mlp_hidden_layer_sizes = gr.Textbox(value="(100,)", label="MLP: hidden_layer_sizes (tuple)", visible=False)
                mlp_alpha = gr.Slider(minimum=0.0001, maximum=0.01, value=0.0001, step=0.0001, label="MLP: alpha", visible=False)
                mlp_max_iter = gr.Slider(minimum=100, maximum=4000, value=500, step=50, label="MLP: max_iter", visible=False)

            model_type.change(fn=update_hyperparams_visibility, inputs=model_type, outputs=[rf_n_estimators, rf_max_depth, rf_min_samples_split, rf_min_samples_leaf, gb_n_estimators, gb_max_depth, gb_learning_rate, mlp_hidden_layer_sizes, mlp_alpha, mlp_max_iter])

            cls_out = gr.Textbox(label='Classification Report')
            model_path_state = gr.State()
            cls_download_model = gr.File(label='Download saved model')
            cls_download_preds = gr.File(label='Download predictions CSV')
            
            # Test section
            gr.Markdown("---")
            gr.Markdown("**ทดสอบโมเดล**: อัปโหลดไฟล์ CSV ใหม่เพื่อทดสอบโมเดลที่ฝึกแล้ว")
            test_csv = gr.File(label='Upload CSV for Testing')
            run_test = gr.Button('Test Model')
            test_out = gr.Dataframe(label='Test Predictions')
            test_status = gr.Textbox(label='Test Status', interactive=False)
            test_download = gr.File(label='Download Test Predictions')

            def run_classify_ui(file, label_col_choice, use_weak, model_choice, rf_n_est, rf_max_d, rf_min_ss, rf_min_sl, gb_n_est, gb_max_d, gb_lr, mlp_hls, mlp_a, mlp_mi):
                if file is None:
                    return 'No file provided', None, None, None
                from scripts.classify import train_classifier
                df = pd.read_csv(file.name, dtype=str)
                try:
                    hyperparams = {}
                    if model_choice == 'rf':
                        hyperparams = {'n_estimators': int(rf_n_est), 'max_depth': int(rf_max_d), 'min_samples_split': int(rf_min_ss), 'min_samples_leaf': int(rf_min_sl)}
                    elif model_choice == 'gb':
                        hyperparams = {'n_estimators': int(gb_n_est), 'max_depth': int(gb_max_d), 'learning_rate': gb_lr}
                    elif model_choice == 'mlp':
                        import ast
                        hyperparams = {'hidden_layer_sizes': ast.literal_eval(mlp_hls), 'alpha': mlp_a, 'max_iter': int(mlp_mi)}
                    res = train_classifier(df, label_col=label_col_choice, model_type=model_choice, hyperparams=hyperparams)
                    report = res.get('report','')
                    model_file = res.get('model_file')
                    preds_file = res.get('predictions_file')
                    # ensure returned file paths are strings for Gradio
                    return report, model_file, preds_file, model_file
                except Exception as e:
                    return f'Training failed: {e}', None, None, None
            
            def run_test_ui(test_file, model_path):
                if test_file is None:
                    return pd.DataFrame(), 'No test file provided', None
                if model_path is None:
                    return pd.DataFrame(), 'No trained model available. Please train a model first.', None
                try:
                    from scripts.classify import parse_and_features
                    # Load model
                    model_data = joblib.load(model_path)
                    pipeline = model_data['pipeline']
                    le = model_data['label_encoder']
                    
                    # Load and preprocess test data
                    df_test = pd.read_csv(test_file.name, dtype=str)
                    df_test = parse_and_features(df_test)
                    
                    # Define features (same as training)
                    feature_cols = ['duration_min','Load(MW)_num','Capacity(kVA)_num','AffectedCustomer_num','hour','weekday','device_freq','OpDeviceType','Owner','Weather','EventType']
                    X_test = df_test[feature_cols]
                    
                    # Predict
                    y_pred_encoded = pipeline.predict(X_test)
                    y_pred = le.inverse_transform(y_pred_encoded)
                    
                    # Create output df
                    pred_df = df_test.copy()
                    pred_df['Predicted_CauseType'] = y_pred
                    
                    # Save predictions
                    out_file = ROOT / 'outputs' / 'test_predictions.csv'
                    out_file.parent.mkdir(exist_ok=True)
                    pred_df.to_csv(out_file, index=False, encoding='utf-8-sig')
                    
                    status = f"Test completed. Predictions for {len(pred_df)} rows."
                    return pred_df.head(100), status, str(out_file)
                except Exception as e:
                    return pd.DataFrame(), f'Test failed: {e}', None

            run_cls.click(fn=run_classify_ui, inputs=[csv_in_cls, label_col, do_weak, model_type, rf_n_estimators, rf_max_depth, rf_min_samples_split, rf_min_samples_leaf, gb_n_estimators, gb_max_depth, gb_learning_rate, mlp_hidden_layer_sizes, mlp_alpha, mlp_max_iter], outputs=[cls_out, cls_download_model, cls_download_preds, model_path_state])
            run_test.click(fn=run_test_ui, inputs=[test_csv, model_path_state], outputs=[test_out, test_status, test_download])

        # Label Suggestion tab
        with gr.TabItem('Label Suggestion'):
            gr.Markdown("**แนะนำป้ายกำกับ**: แนะนำป้ายกำกับสาเหตุที่เป็นไปได้สำหรับเหตุการณ์ไฟฟ้าล้มที่ไม่รู้สาเหตุ โดยอิงจากความคล้ายกับสาเหตุที่รู้จัก ตั้งจำนวนคำแนะนำสูงสุด ดาวน์โหลด CSV คำแนะนำ")
            csv_in_ls = gr.File(label='Upload CSV (defaults to data/data_3.csv)')
            with gr.Row():
                top_k = gr.Slider(minimum=1, maximum=5, value=1, step=1, label='Top K suggestions')
                run_ls = gr.Button('Run Label Suggestion')
            ls_out = gr.Dataframe()
            ls_status = gr.Textbox(label='Label Suggestion Status', interactive=False)
            ls_download = gr.File(label='Download label suggestions')

            def run_label_suggestion(file, top_k_suggest):
                # delegate to scripts.label_suggestion
                from scripts.label_suggestion import suggest_labels_to_file
                if file is None:
                    default = ROOT / 'data' / 'data_3.csv'
                    if not default.exists():
                        return pd.DataFrame(), 'No file provided and default data/data_3.csv not found', None
                    df = pd.read_csv(default, dtype=str)
                else:
                    df = pd.read_csv(file.name, dtype=str)

                out_file = ROOT / 'outputs' / 'label_suggestions.csv'
                out_df = suggest_labels_to_file(df, out_path=str(out_file), top_k=int(top_k_suggest))
                status = f"Label suggestion done. Unknown rows processed: {len(out_df)}. Output: {out_file}"
                return out_df, status, str(out_file) if len(out_df)>0 else None

            run_ls.click(fn=run_label_suggestion, inputs=[csv_in_ls, top_k], outputs=[ls_out, ls_status, ls_download])

if __name__ == '__main__':
    demo.launch()