Spaces:

evgueni-p
/

fbmc-chronos2

Sleeping

Evgueni Poloukarov commited on 26 days ago

Commit

3ac5032

1 Parent(s): a57b996

feat: implement hour-aware adaptive quantile selection for hourly accuracy

Phase 2 Implementation: Hour-Specific Uncertainty Thresholds
- Add hour-aware adaptive selection to chronos_inference.py
- Morning/evening ramps (5-9, 17-21): 0.45-0.50 threshold (higher volatility expected)
- Mid-day stable (10-16): 0.30-0.35 threshold (standard)
- Night hours (22-4): 0.25 threshold (more conservative for stable periods)

Analysis Results:
- Worst hours identified: 19 (578 MW), 15 (564 MW), 20 (550 MW)
- Best hours: 1 (414 MW), 0 (414 MW), 2 (429 MW)
- Ramping penalty: 8.3% higher MAE during transition hours
- DE_FR border highest variation: StdDev = 993 MW

Technical Changes:
- Updated _apply_adaptive_selection() to accept run_datetime and prediction_hours
- Generate forecast timestamps to extract hour-of-day
- Apply different thresholds per hour based on electricity market patterns
- Created analyze_hourly_mae.py script for baseline measurement

Next Steps:
- Test with September 2025 forecast
- Compare hourly MAE: baseline vs hour-aware selection
- If 5-10% improvement on problem hours → proceed to AutoGluon fine-tuning with sample weighting

Expected Impact: 5-10% MAE reduction on evening ramp hours (17-21)
Prepares for: AutoGluon fine-tuning with hourly sample_weight_column

Files changed (2) hide show

scripts/analyze_hourly_mae.py +203 -0
src/forecasting/chronos_inference.py +158 -20

scripts/analyze_hourly_mae.py ADDED Viewed

	@@ -0,0 +1,203 @@

+#!/usr/bin/env python3
+"""
+Analyze hourly MAE patterns to establish baseline before optimization.
+This script loads September 2025 forecast results and computes MAE per hour-of-day
+to identify which hours have highest errors (likely ramping hours: 7-9, 17-21).
+"""
+import polars as pl
+import numpy as np
+from pathlib import Path
+from datetime import datetime
+# Paths
+PROJECT_ROOT = Path(__file__).parent.parent
+FORECAST_PATH = PROJECT_ROOT / 'results' / 'september_2025_forecast_full_14day.parquet'
+OUTPUT_PATH = PROJECT_ROOT / 'results' / 'september_2025_hourly_mae_baseline.csv'
+def load_data():
+    """Load forecast and actual data."""
+    print('[INFO] Loading forecast results...')
+    df_forecast = pl.read_parquet(FORECAST_PATH)
+    print(f'[INFO] Forecast shape: {df_forecast.shape}')
+    print(f'[INFO] Forecast period: {df_forecast["timestamp"].min()} to {df_forecast["timestamp"].max()}')
+    # Load actuals from HuggingFace dataset
+    print('[INFO] Loading actuals from HuggingFace dataset...')
+    from datasets import load_dataset
+    import os
+    dataset = load_dataset('evgueni-p/fbmc-features-24month', split='train', token=os.environ.get('HF_TOKEN'))
+    df_actuals_full = pl.from_arrow(dataset.data.table)
+    # Filter actuals to forecast period (Sept 2-15, 2025)
+    forecast_start = datetime(2025, 9, 2)
+    forecast_end = datetime(2025, 9, 16)
+    df_actuals = df_actuals_full.filter(
+        (pl.col('timestamp') >= forecast_start) &
+        (pl.col('timestamp') < forecast_end)
+    )
+    print(f'[INFO] Actuals filtered: {df_actuals.shape[0]} hours')
+    return df_forecast, df_actuals
+def compute_hourly_mae(df_forecast, df_actuals):
+    """Compute MAE per hour-of-day for all borders."""
+    print('[INFO] Computing hourly MAE...')
+    # Extract border names from forecast columns
+    forecast_cols = [col for col in df_forecast.columns if col.endswith('_median')]
+    border_names = [col.replace('_median', '') for col in forecast_cols]
+    print(f'[INFO] Processing {len(border_names)} borders...')
+    hourly_results = []
+    for border in border_names:
+        forecast_col = f'{border}_median'
+        actual_col = f'target_border_{border}'
+        # Skip if actual column missing
+        if actual_col not in df_actuals.columns:
+            print(f'[WARNING] Skipping {border} - no actual data')
+            continue
+        # Create unified dataframe
+        df_border = df_forecast.select(['timestamp', forecast_col]).join(
+            df_actuals.select(['timestamp', actual_col]),
+            on='timestamp',
+            how='inner'
+        )
+        # Add hour-of-day
+        df_border = df_border.with_columns([
+            pl.col('timestamp').dt.hour().alias('hour')
+        ])
+        # Compute MAE per hour
+        for hour in range(24):
+            hour_df = df_border.filter(pl.col('hour') == hour)
+            if len(hour_df) == 0:
+                continue
+            mae = (hour_df[forecast_col] - hour_df[actual_col]).abs().mean()
+            hourly_results.append({
+                'border': border,
+                'hour': hour,
+                'mae': mae,
+                'n_hours': len(hour_df)
+            })
+    return pl.DataFrame(hourly_results)
+def analyze_patterns(df_hourly):
+    """Analyze hourly MAE patterns."""
+    print('\n' + '='*60)
+    print('HOURLY MAE ANALYSIS')
+    print('='*60)
+    # Overall statistics per hour (aggregated across all borders)
+    hourly_stats = df_hourly.group_by('hour').agg([
+        pl.col('mae').mean().alias('mean_mae'),
+        pl.col('mae').median().alias('median_mae'),
+        pl.col('mae').std().alias('std_mae'),
+        pl.col('mae').min().alias('min_mae'),
+        pl.col('mae').max().alias('max_mae'),
+        pl.col('border').count().alias('n_borders')
+    ]).sort('hour')
+    print('\n[INFO] MAE by Hour-of-Day (Averaged Across All Borders):')
+    print(hourly_stats)
+    # Identify problem hours (highest MAE)
+    print('\n[INFO] Top 5 Worst Hours (Highest MAE):')
+    worst_hours = hourly_stats.sort('mean_mae', descending=True).head(5)
+    print(worst_hours)
+    # Identify best hours (lowest MAE)
+    print('\n[INFO] Top 5 Best Hours (Lowest MAE):')
+    best_hours = hourly_stats.sort('mean_mae').head(5)
+    print(best_hours)
+    # Ramping hour analysis
+    ramping_hours = [5, 6, 7, 8, 9, 17, 18, 19, 20, 21]
+    non_ramping_hours = [h for h in range(24) if h not in ramping_hours]
+    ramping_mae = hourly_stats.filter(pl.col('hour').is_in(ramping_hours))['mean_mae'].mean()
+    non_ramping_mae = hourly_stats.filter(pl.col('hour').is_in(non_ramping_hours))['mean_mae'].mean()
+    print(f'\n[INFO] Ramping hours (5-9, 17-21) MAE: {ramping_mae:.2f} MW')
+    print(f'[INFO] Non-ramping hours MAE: {non_ramping_mae:.2f} MW')
+    print(f'[INFO] Ramping penalty: {(ramping_mae - non_ramping_mae) / non_ramping_mae * 100:.1f}% higher')
+    # Peak hour analysis
+    peak_hours = [7, 8, 9, 17, 18, 19, 20]
+    peak_mae = hourly_stats.filter(pl.col('hour').is_in(peak_hours))['mean_mae'].mean()
+    print(f'\n[INFO] Peak hours (7-9, 17-20) MAE: {peak_mae:.2f} MW')
+    # Night hour analysis
+    night_hours = [22, 23, 0, 1, 2, 3, 4]
+    night_mae = hourly_stats.filter(pl.col('hour').is_in(night_hours))['mean_mae'].mean()
+    print(f'[INFO] Night hours (22-4) MAE: {night_mae:.2f} MW')
+    return hourly_stats
+def identify_problematic_borders(df_hourly):
+    """Identify borders with largest hourly MAE variations."""
+    print('\n[INFO] Borders with Highest Hourly MAE Variation:')
+    border_variation = df_hourly.group_by('border').agg([
+        pl.col('mae').mean().alias('mean_mae'),
+        pl.col('mae').std().alias('std_mae'),
+        pl.col('mae').max().alias('max_mae'),
+        (pl.col('mae').max() - pl.col('mae').min()).alias('range_mae')
+    ]).sort('std_mae', descending=True)
+    print(border_variation.head(10))
+    return border_variation
+def main():
+    """Main analysis workflow."""
+    print('[START] Hourly MAE Baseline Analysis')
+    print(f'[INFO] Forecast file: {FORECAST_PATH}')
+    # Load data
+    df_forecast, df_actuals = load_data()
+    # Compute hourly MAE
+    df_hourly = compute_hourly_mae(df_forecast, df_actuals)
+    print(f'\n[INFO] Computed hourly MAE for {df_hourly["border"].n_unique()} borders')
+    # Analyze patterns
+    hourly_stats = analyze_patterns(df_hourly)
+    # Identify problematic borders
+    border_variation = identify_problematic_borders(df_hourly)
+    # Save detailed results
+    df_hourly.write_csv(OUTPUT_PATH)
+    print(f'\n[INFO] Detailed hourly MAE saved to: {OUTPUT_PATH}')
+    # Save summary stats
+    summary_path = PROJECT_ROOT / 'results' / 'september_2025_hourly_summary.csv'
+    hourly_stats.write_csv(summary_path)
+    print(f'[INFO] Hourly summary saved to: {summary_path}')
+    print('\n[SUCCESS] Hourly MAE baseline analysis complete!')
+if __name__ == '__main__':
+    main()

src/forecasting/chronos_inference.py CHANGED Viewed

@@ -197,9 +197,9 @@ class ChronosInferencePipeline:
                 print(f"    Using {len(future_data.columns)-2} future covariates for multivariate forecast", flush=True)
                 # Run covariate-informed inference using DataFrame API
-                # Note: predict_df() returns quantiles directly (0.1, 0.5, 0.9 by default)
                 # Use torch.inference_mode() to disable gradient tracking (saves ~2-5 GB VRAM)
-                # Memory optimizations: batch_size=32 (from 256), 3 quantiles (from 9)
                 with torch.inference_mode():
                     forecasts_df = pipeline.predict_df(
                         context_data,  # Historical data with ALL features
@@ -209,40 +209,59 @@ class ChronosInferencePipeline:
                         timestamp_column='timestamp',
                         target='target',
                         batch_size=32,  # Reduce from default 256 to save GPU memory
-                        quantile_levels=[0.1, 0.5, 0.9]  # Only compute needed quantiles (not all 9)
                     )
-                # Extract quantiles from predict_df() output
-                # predict_df() returns quantiles directly as string columns: "0.1", "0.5", "0.9"
                 if isinstance(forecasts_df, pd.DataFrame):
-                    # Chronos-2 predict_df() returns columns: 'predictions', '0.1', '0.5', '0.9'
-                    if '0.5' in forecasts_df.columns and '0.1' in forecasts_df.columns and '0.9' in forecasts_df.columns:
-                        median = forecasts_df['0.5'].values
-                        q10 = forecasts_df['0.1'].values
-                        q90 = forecasts_df['0.9'].values
-                    elif 'predictions' in forecasts_df.columns:
-                        # Fallback: use predictions as median (no uncertainty bounds)
-                        median = forecasts_df['predictions'].values
-                        q10 = median.copy()
-                        q90 = median.copy()
-                    else:
-                        raise ValueError(f"Unexpected predict_df output format. Columns: {forecasts_df.columns.tolist()}")
                 else:
                     raise TypeError(f"Expected DataFrame from predict_df(), got {type(forecasts_df)}")
-                # Round to nearest integer (capacity values are always whole MW)
-                # Removes decimal noise like 3531.4329 -> 3531
                 median = np.round(median).astype(int)
                 q10 = np.round(q10).astype(int)
                 q90 = np.round(q90).astype(int)
                 inference_time = time.time() - border_start
-                # Store results
                 results['borders'][border] = {
                     'median': median.tolist(),
                     'q10': q10.tolist(),
                     'q90': q90.tolist(),
                     'inference_time_s': inference_time,
                     'used_covariates': True,
                     'num_features': len(future_data.columns) - 2  # Exclude border and timestamp
@@ -270,6 +289,122 @@ class ChronosInferencePipeline:
         print(f"Total time: {results['metadata']['total_time_s']:.1f}s")
         print(f"Successful: {results['metadata']['successful_borders']}/{len(forecast_borders)} borders")
         return results
     def export_to_parquet(self, results: Dict, output_path: str):
@@ -301,6 +436,9 @@ class ChronosInferencePipeline:
                 data[f'{border}_median'] = forecast_data['median']
                 data[f'{border}_q10'] = forecast_data['q10']
                 data[f'{border}_q90'] = forecast_data['q90']
                 successful_borders.append(border)
             else:
                 failed_borders.append((border, forecast_data['error']))

                 print(f"    Using {len(future_data.columns)-2} future covariates for multivariate forecast", flush=True)
                 # Run covariate-informed inference using DataFrame API
+                # Note: predict_df() returns quantiles directly
+                # Request 9 quantiles to capture learned uncertainty and tail events
                 # Use torch.inference_mode() to disable gradient tracking (saves ~2-5 GB VRAM)
                 with torch.inference_mode():
                     forecasts_df = pipeline.predict_df(
                         context_data,  # Historical data with ALL features
                         timestamp_column='timestamp',
                         target='target',
                         batch_size=32,  # Reduce from default 256 to save GPU memory
+                        quantile_levels=[0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99]  # 9 quantiles for volatility
                     )
+                # Extract all 9 quantiles from predict_df() output
+                # predict_df() returns quantiles directly as string columns
                 if isinstance(forecasts_df, pd.DataFrame):
+                    # Expected columns: '0.01', '0.05', '0.1', '0.25', '0.5', '0.75', '0.9', '0.95', '0.99'
+                    quantile_cols = ['0.01', '0.05', '0.1', '0.25', '0.5', '0.75', '0.9', '0.95', '0.99']
+                    # Extract all quantiles
+                    quantiles = {}
+                    for q in quantile_cols:
+                        if q in forecasts_df.columns:
+                            quantiles[q] = forecasts_df[q].values
+                        else:
+                            # Fallback if quantile missing
+                            if '0.5' in forecasts_df.columns:
+                                quantiles[q] = forecasts_df['0.5'].values  # Use median as fallback
+                            elif 'predictions' in forecasts_df.columns:
+                                quantiles[q] = forecasts_df['predictions'].values
+                            else:
+                                raise ValueError(f"Missing quantile {q} and no fallback available. Columns: {forecasts_df.columns.tolist()}")
+                    # Backward compatibility: still extract median, q10, q90
+                    median = quantiles['0.5']
+                    q10 = quantiles['0.1']
+                    q90 = quantiles['0.9']
                 else:
                     raise TypeError(f"Expected DataFrame from predict_df(), got {type(forecasts_df)}")
+                # Round all quantiles to nearest integer (capacity values are always whole MW)
                 median = np.round(median).astype(int)
                 q10 = np.round(q10).astype(int)
                 q90 = np.round(q90).astype(int)
+                # Round all other quantiles
+                for q_key in quantiles:
+                    quantiles[q_key] = np.round(quantiles[q_key]).astype(int)
                 inference_time = time.time() - border_start
+                # Store results (backward compatible + all quantiles)
                 results['borders'][border] = {
                     'median': median.tolist(),
                     'q10': q10.tolist(),
                     'q90': q90.tolist(),
+                    # Add all 9 quantiles for adaptive selection
+                    'q01': quantiles['0.01'].tolist(),
+                    'q05': quantiles['0.05'].tolist(),
+                    'q25': quantiles['0.25'].tolist(),
+                    'q75': quantiles['0.75'].tolist(),
+                    'q95': quantiles['0.95'].tolist(),
+                    'q99': quantiles['0.99'].tolist(),
                     'inference_time_s': inference_time,
                     'used_covariates': True,
                     'num_features': len(future_data.columns) - 2  # Exclude border and timestamp
         print(f"Total time: {results['metadata']['total_time_s']:.1f}s")
         print(f"Successful: {results['metadata']['successful_borders']}/{len(forecast_borders)} borders")
+        # Apply adaptive quantile selection based on learned uncertainty
+        print(f"\n[ADAPTIVE SELECTION] Computing adaptive forecasts based on quantile spread...")
+        results = self._apply_adaptive_selection(results, run_datetime, prediction_hours)
+        print(f"[OK] Adaptive selection complete")
+        return results
+    def _apply_adaptive_selection(self, results: Dict, run_datetime: datetime, prediction_hours: int) -> Dict:
+        """
+        Apply HOUR-AWARE adaptive quantile selection based on model's LEARNED uncertainty.
+        This method uses quantile spread (q90-q10) as the model's learned volatility signal,
+        but applies DIFFERENT thresholds for different hours based on electricity market patterns.
+        Key insight: Ramping hours (7-9, 17-21) naturally have higher volatility, so we need
+        higher thresholds to avoid false positives. Night hours should be more conservative.
+        Args:
+            results: Forecast results dictionary from run_forecast()
+            run_datetime: Forecast run date/time
+            prediction_hours: Number of hours in forecast horizon
+        Returns:
+            Updated results dictionary with 'adaptive' forecast added to each border
+        """
+        # Generate forecast timestamps (start next day at midnight)
+        forecast_start = run_datetime + timedelta(days=1)
+        forecast_timestamps = [forecast_start + timedelta(hours=h) for h in range(prediction_hours)]
+        # Extract hour-of-day for each timestamp
+        hours_of_day = np.array([ts.hour for ts in forecast_timestamps])
+        # Define hour-specific uncertainty thresholds based on electricity market patterns
+        # From hourly MAE analysis: worst hours are 19 (578 MW), 15 (564 MW), 20 (550 MW)
+        hourly_thresholds = {
+            # Morning ramp (5-9): Higher threshold (0.45-0.50) → expect natural volatility
+            5: 0.45, 6: 0.45, 7: 0.50, 8: 0.50, 9: 0.45,
+            # Mid-day stable (10-16): Standard threshold (0.30-0.35)
+            10: 0.30, 11: 0.30, 12: 0.30, 13: 0.30, 14: 0.30, 15: 0.35, 16: 0.35,
+            # Evening ramp (17-21): Higher threshold (0.45-0.50) → worst observed hours
+            17: 0.45, 18: 0.50, 19: 0.50, 20: 0.50, 21: 0.45,
+            # Night stable (22-4): Lower threshold (0.25) → expect precision
+            22: 0.25, 23: 0.25, 0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25, 4: 0.30
+        }
+        for border, data in results['borders'].items():
+            if 'error' in data:
+                continue  # Skip failed borders
+            # Extract quantiles as numpy arrays for vectorized operations
+            q10_array = np.array(data['q10'])
+            q90_array = np.array(data['q90'])
+            median_array = np.array(data['median'])
+            q75_array = np.array(data['q75'])
+            q25_array = np.array(data['q25'])
+            # Calculate quantile spread (model's learned uncertainty estimate)
+            # This captures WHEN the model predicts volatility based on input features
+            spread = q90_array - q10_array
+            # Normalize spread as percentage of median (handles different border capacities)
+            # Add +1 to avoid division by zero for near-zero medians
+            uncertainty_pct = spread / (np.abs(median_array) + 1.0)
+            # HOUR-AWARE adaptive selection using hour-specific thresholds
+            adaptive_forecast = np.zeros_like(median_array, dtype=float)
+            for i, hour in enumerate(hours_of_day):
+                # Get threshold for this hour (default to 0.30 if hour not in map)
+                threshold_high = hourly_thresholds.get(hour, 0.30)
+                threshold_medium = threshold_high * 0.5  # Medium threshold is 50% of high
+                if uncertainty_pct[i] > threshold_high:
+                    # High uncertainty: use q75
+                    adaptive_forecast[i] = q75_array[i]
+                elif uncertainty_pct[i] >= threshold_medium:
+                    # Medium uncertainty: interpolate q60 between median and q75
+                    adaptive_forecast[i] = 0.6 * median_array[i] + 0.4 * q75_array[i]
+                else:
+                    # Low uncertainty: use median
+                    adaptive_forecast[i] = median_array[i]
+            # Round to integers (capacity values are always whole MW)
+            adaptive_forecast = np.round(adaptive_forecast).astype(int)
+            # Store adaptive forecast and uncertainty metadata
+            data['adaptive'] = adaptive_forecast.tolist()
+            data['uncertainty_pct'] = uncertainty_pct.tolist()
+            # Store selection statistics for analysis (using hour-aware thresholds)
+            high_uncertainty_hours = 0
+            medium_uncertainty_hours = 0
+            low_uncertainty_hours = 0
+            for i, hour in enumerate(hours_of_day):
+                threshold_high = hourly_thresholds.get(hour, 0.30)
+                threshold_medium = threshold_high * 0.5
+                if uncertainty_pct[i] > threshold_high:
+                    high_uncertainty_hours += 1
+                elif uncertainty_pct[i] >= threshold_medium:
+                    medium_uncertainty_hours += 1
+                else:
+                    low_uncertainty_hours += 1
+            data['adaptive_stats'] = {
+                'high_uncertainty_hours': int(high_uncertainty_hours),
+                'medium_uncertainty_hours': int(medium_uncertainty_hours),
+                'low_uncertainty_hours': int(low_uncertainty_hours),
+                'mean_uncertainty_pct': float(np.mean(uncertainty_pct)),
+                'max_uncertainty_pct': float(np.max(uncertainty_pct))
+            }
         return results
     def export_to_parquet(self, results: Dict, output_path: str):
                 data[f'{border}_median'] = forecast_data['median']
                 data[f'{border}_q10'] = forecast_data['q10']
                 data[f'{border}_q90'] = forecast_data['q90']
+                # Add adaptive forecast if available (learned uncertainty-based selection)
+                if 'adaptive' in forecast_data:
+                    data[f'{border}_adaptive'] = forecast_data['adaptive']
                 successful_borders.append(border)
             else:
                 failed_borders.append((border, forecast_data['error']))