Evgueni Poloukarov commited on
Commit
3ac5032
·
1 Parent(s): a57b996

feat: implement hour-aware adaptive quantile selection for hourly accuracy

Browse files

Phase 2 Implementation: Hour-Specific Uncertainty Thresholds
- Add hour-aware adaptive selection to chronos_inference.py
- Morning/evening ramps (5-9, 17-21): 0.45-0.50 threshold (higher volatility expected)
- Mid-day stable (10-16): 0.30-0.35 threshold (standard)
- Night hours (22-4): 0.25 threshold (more conservative for stable periods)

Analysis Results:
- Worst hours identified: 19 (578 MW), 15 (564 MW), 20 (550 MW)
- Best hours: 1 (414 MW), 0 (414 MW), 2 (429 MW)
- Ramping penalty: 8.3% higher MAE during transition hours
- DE_FR border highest variation: StdDev = 993 MW

Technical Changes:
- Updated _apply_adaptive_selection() to accept run_datetime and prediction_hours
- Generate forecast timestamps to extract hour-of-day
- Apply different thresholds per hour based on electricity market patterns
- Created analyze_hourly_mae.py script for baseline measurement

Next Steps:
- Test with September 2025 forecast
- Compare hourly MAE: baseline vs hour-aware selection
- If 5-10% improvement on problem hours → proceed to AutoGluon fine-tuning with sample weighting

Expected Impact: 5-10% MAE reduction on evening ramp hours (17-21)
Prepares for: AutoGluon fine-tuning with hourly sample_weight_column

scripts/analyze_hourly_mae.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Analyze hourly MAE patterns to establish baseline before optimization.
4
+
5
+ This script loads September 2025 forecast results and computes MAE per hour-of-day
6
+ to identify which hours have highest errors (likely ramping hours: 7-9, 17-21).
7
+ """
8
+
9
+ import polars as pl
10
+ import numpy as np
11
+ from pathlib import Path
12
+ from datetime import datetime
13
+
14
+ # Paths
15
+ PROJECT_ROOT = Path(__file__).parent.parent
16
+ FORECAST_PATH = PROJECT_ROOT / 'results' / 'september_2025_forecast_full_14day.parquet'
17
+ OUTPUT_PATH = PROJECT_ROOT / 'results' / 'september_2025_hourly_mae_baseline.csv'
18
+
19
+ def load_data():
20
+ """Load forecast and actual data."""
21
+ print('[INFO] Loading forecast results...')
22
+ df_forecast = pl.read_parquet(FORECAST_PATH)
23
+ print(f'[INFO] Forecast shape: {df_forecast.shape}')
24
+ print(f'[INFO] Forecast period: {df_forecast["timestamp"].min()} to {df_forecast["timestamp"].max()}')
25
+
26
+ # Load actuals from HuggingFace dataset
27
+ print('[INFO] Loading actuals from HuggingFace dataset...')
28
+ from datasets import load_dataset
29
+ import os
30
+
31
+ dataset = load_dataset('evgueni-p/fbmc-features-24month', split='train', token=os.environ.get('HF_TOKEN'))
32
+ df_actuals_full = pl.from_arrow(dataset.data.table)
33
+
34
+ # Filter actuals to forecast period (Sept 2-15, 2025)
35
+ forecast_start = datetime(2025, 9, 2)
36
+ forecast_end = datetime(2025, 9, 16)
37
+
38
+ df_actuals = df_actuals_full.filter(
39
+ (pl.col('timestamp') >= forecast_start) &
40
+ (pl.col('timestamp') < forecast_end)
41
+ )
42
+
43
+ print(f'[INFO] Actuals filtered: {df_actuals.shape[0]} hours')
44
+
45
+ return df_forecast, df_actuals
46
+
47
+
48
+ def compute_hourly_mae(df_forecast, df_actuals):
49
+ """Compute MAE per hour-of-day for all borders."""
50
+ print('[INFO] Computing hourly MAE...')
51
+
52
+ # Extract border names from forecast columns
53
+ forecast_cols = [col for col in df_forecast.columns if col.endswith('_median')]
54
+ border_names = [col.replace('_median', '') for col in forecast_cols]
55
+
56
+ print(f'[INFO] Processing {len(border_names)} borders...')
57
+
58
+ hourly_results = []
59
+
60
+ for border in border_names:
61
+ forecast_col = f'{border}_median'
62
+ actual_col = f'target_border_{border}'
63
+
64
+ # Skip if actual column missing
65
+ if actual_col not in df_actuals.columns:
66
+ print(f'[WARNING] Skipping {border} - no actual data')
67
+ continue
68
+
69
+ # Create unified dataframe
70
+ df_border = df_forecast.select(['timestamp', forecast_col]).join(
71
+ df_actuals.select(['timestamp', actual_col]),
72
+ on='timestamp',
73
+ how='inner'
74
+ )
75
+
76
+ # Add hour-of-day
77
+ df_border = df_border.with_columns([
78
+ pl.col('timestamp').dt.hour().alias('hour')
79
+ ])
80
+
81
+ # Compute MAE per hour
82
+ for hour in range(24):
83
+ hour_df = df_border.filter(pl.col('hour') == hour)
84
+
85
+ if len(hour_df) == 0:
86
+ continue
87
+
88
+ mae = (hour_df[forecast_col] - hour_df[actual_col]).abs().mean()
89
+
90
+ hourly_results.append({
91
+ 'border': border,
92
+ 'hour': hour,
93
+ 'mae': mae,
94
+ 'n_hours': len(hour_df)
95
+ })
96
+
97
+ return pl.DataFrame(hourly_results)
98
+
99
+
100
+ def analyze_patterns(df_hourly):
101
+ """Analyze hourly MAE patterns."""
102
+ print('\n' + '='*60)
103
+ print('HOURLY MAE ANALYSIS')
104
+ print('='*60)
105
+
106
+ # Overall statistics per hour (aggregated across all borders)
107
+ hourly_stats = df_hourly.group_by('hour').agg([
108
+ pl.col('mae').mean().alias('mean_mae'),
109
+ pl.col('mae').median().alias('median_mae'),
110
+ pl.col('mae').std().alias('std_mae'),
111
+ pl.col('mae').min().alias('min_mae'),
112
+ pl.col('mae').max().alias('max_mae'),
113
+ pl.col('border').count().alias('n_borders')
114
+ ]).sort('hour')
115
+
116
+ print('\n[INFO] MAE by Hour-of-Day (Averaged Across All Borders):')
117
+ print(hourly_stats)
118
+
119
+ # Identify problem hours (highest MAE)
120
+ print('\n[INFO] Top 5 Worst Hours (Highest MAE):')
121
+ worst_hours = hourly_stats.sort('mean_mae', descending=True).head(5)
122
+ print(worst_hours)
123
+
124
+ # Identify best hours (lowest MAE)
125
+ print('\n[INFO] Top 5 Best Hours (Lowest MAE):')
126
+ best_hours = hourly_stats.sort('mean_mae').head(5)
127
+ print(best_hours)
128
+
129
+ # Ramping hour analysis
130
+ ramping_hours = [5, 6, 7, 8, 9, 17, 18, 19, 20, 21]
131
+ non_ramping_hours = [h for h in range(24) if h not in ramping_hours]
132
+
133
+ ramping_mae = hourly_stats.filter(pl.col('hour').is_in(ramping_hours))['mean_mae'].mean()
134
+ non_ramping_mae = hourly_stats.filter(pl.col('hour').is_in(non_ramping_hours))['mean_mae'].mean()
135
+
136
+ print(f'\n[INFO] Ramping hours (5-9, 17-21) MAE: {ramping_mae:.2f} MW')
137
+ print(f'[INFO] Non-ramping hours MAE: {non_ramping_mae:.2f} MW')
138
+ print(f'[INFO] Ramping penalty: {(ramping_mae - non_ramping_mae) / non_ramping_mae * 100:.1f}% higher')
139
+
140
+ # Peak hour analysis
141
+ peak_hours = [7, 8, 9, 17, 18, 19, 20]
142
+ peak_mae = hourly_stats.filter(pl.col('hour').is_in(peak_hours))['mean_mae'].mean()
143
+
144
+ print(f'\n[INFO] Peak hours (7-9, 17-20) MAE: {peak_mae:.2f} MW')
145
+
146
+ # Night hour analysis
147
+ night_hours = [22, 23, 0, 1, 2, 3, 4]
148
+ night_mae = hourly_stats.filter(pl.col('hour').is_in(night_hours))['mean_mae'].mean()
149
+
150
+ print(f'[INFO] Night hours (22-4) MAE: {night_mae:.2f} MW')
151
+
152
+ return hourly_stats
153
+
154
+
155
+ def identify_problematic_borders(df_hourly):
156
+ """Identify borders with largest hourly MAE variations."""
157
+ print('\n[INFO] Borders with Highest Hourly MAE Variation:')
158
+
159
+ border_variation = df_hourly.group_by('border').agg([
160
+ pl.col('mae').mean().alias('mean_mae'),
161
+ pl.col('mae').std().alias('std_mae'),
162
+ pl.col('mae').max().alias('max_mae'),
163
+ (pl.col('mae').max() - pl.col('mae').min()).alias('range_mae')
164
+ ]).sort('std_mae', descending=True)
165
+
166
+ print(border_variation.head(10))
167
+
168
+ return border_variation
169
+
170
+
171
+ def main():
172
+ """Main analysis workflow."""
173
+ print('[START] Hourly MAE Baseline Analysis')
174
+ print(f'[INFO] Forecast file: {FORECAST_PATH}')
175
+
176
+ # Load data
177
+ df_forecast, df_actuals = load_data()
178
+
179
+ # Compute hourly MAE
180
+ df_hourly = compute_hourly_mae(df_forecast, df_actuals)
181
+
182
+ print(f'\n[INFO] Computed hourly MAE for {df_hourly["border"].n_unique()} borders')
183
+
184
+ # Analyze patterns
185
+ hourly_stats = analyze_patterns(df_hourly)
186
+
187
+ # Identify problematic borders
188
+ border_variation = identify_problematic_borders(df_hourly)
189
+
190
+ # Save detailed results
191
+ df_hourly.write_csv(OUTPUT_PATH)
192
+ print(f'\n[INFO] Detailed hourly MAE saved to: {OUTPUT_PATH}')
193
+
194
+ # Save summary stats
195
+ summary_path = PROJECT_ROOT / 'results' / 'september_2025_hourly_summary.csv'
196
+ hourly_stats.write_csv(summary_path)
197
+ print(f'[INFO] Hourly summary saved to: {summary_path}')
198
+
199
+ print('\n[SUCCESS] Hourly MAE baseline analysis complete!')
200
+
201
+
202
+ if __name__ == '__main__':
203
+ main()
src/forecasting/chronos_inference.py CHANGED
@@ -197,9 +197,9 @@ class ChronosInferencePipeline:
197
  print(f" Using {len(future_data.columns)-2} future covariates for multivariate forecast", flush=True)
198
 
199
  # Run covariate-informed inference using DataFrame API
200
- # Note: predict_df() returns quantiles directly (0.1, 0.5, 0.9 by default)
 
201
  # Use torch.inference_mode() to disable gradient tracking (saves ~2-5 GB VRAM)
202
- # Memory optimizations: batch_size=32 (from 256), 3 quantiles (from 9)
203
  with torch.inference_mode():
204
  forecasts_df = pipeline.predict_df(
205
  context_data, # Historical data with ALL features
@@ -209,40 +209,59 @@ class ChronosInferencePipeline:
209
  timestamp_column='timestamp',
210
  target='target',
211
  batch_size=32, # Reduce from default 256 to save GPU memory
212
- quantile_levels=[0.1, 0.5, 0.9] # Only compute needed quantiles (not all 9)
213
  )
214
 
215
- # Extract quantiles from predict_df() output
216
- # predict_df() returns quantiles directly as string columns: "0.1", "0.5", "0.9"
217
  if isinstance(forecasts_df, pd.DataFrame):
218
- # Chronos-2 predict_df() returns columns: 'predictions', '0.1', '0.5', '0.9'
219
- if '0.5' in forecasts_df.columns and '0.1' in forecasts_df.columns and '0.9' in forecasts_df.columns:
220
- median = forecasts_df['0.5'].values
221
- q10 = forecasts_df['0.1'].values
222
- q90 = forecasts_df['0.9'].values
223
- elif 'predictions' in forecasts_df.columns:
224
- # Fallback: use predictions as median (no uncertainty bounds)
225
- median = forecasts_df['predictions'].values
226
- q10 = median.copy()
227
- q90 = median.copy()
228
- else:
229
- raise ValueError(f"Unexpected predict_df output format. Columns: {forecasts_df.columns.tolist()}")
 
 
 
 
 
 
 
 
 
230
  else:
231
  raise TypeError(f"Expected DataFrame from predict_df(), got {type(forecasts_df)}")
232
 
233
- # Round to nearest integer (capacity values are always whole MW)
234
- # Removes decimal noise like 3531.4329 -> 3531
235
  median = np.round(median).astype(int)
236
  q10 = np.round(q10).astype(int)
237
  q90 = np.round(q90).astype(int)
238
 
 
 
 
 
239
  inference_time = time.time() - border_start
240
 
241
- # Store results
242
  results['borders'][border] = {
243
  'median': median.tolist(),
244
  'q10': q10.tolist(),
245
  'q90': q90.tolist(),
 
 
 
 
 
 
 
246
  'inference_time_s': inference_time,
247
  'used_covariates': True,
248
  'num_features': len(future_data.columns) - 2 # Exclude border and timestamp
@@ -270,6 +289,122 @@ class ChronosInferencePipeline:
270
  print(f"Total time: {results['metadata']['total_time_s']:.1f}s")
271
  print(f"Successful: {results['metadata']['successful_borders']}/{len(forecast_borders)} borders")
272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  return results
274
 
275
  def export_to_parquet(self, results: Dict, output_path: str):
@@ -301,6 +436,9 @@ class ChronosInferencePipeline:
301
  data[f'{border}_median'] = forecast_data['median']
302
  data[f'{border}_q10'] = forecast_data['q10']
303
  data[f'{border}_q90'] = forecast_data['q90']
 
 
 
304
  successful_borders.append(border)
305
  else:
306
  failed_borders.append((border, forecast_data['error']))
 
197
  print(f" Using {len(future_data.columns)-2} future covariates for multivariate forecast", flush=True)
198
 
199
  # Run covariate-informed inference using DataFrame API
200
+ # Note: predict_df() returns quantiles directly
201
+ # Request 9 quantiles to capture learned uncertainty and tail events
202
  # Use torch.inference_mode() to disable gradient tracking (saves ~2-5 GB VRAM)
 
203
  with torch.inference_mode():
204
  forecasts_df = pipeline.predict_df(
205
  context_data, # Historical data with ALL features
 
209
  timestamp_column='timestamp',
210
  target='target',
211
  batch_size=32, # Reduce from default 256 to save GPU memory
212
+ quantile_levels=[0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99] # 9 quantiles for volatility
213
  )
214
 
215
+ # Extract all 9 quantiles from predict_df() output
216
+ # predict_df() returns quantiles directly as string columns
217
  if isinstance(forecasts_df, pd.DataFrame):
218
+ # Expected columns: '0.01', '0.05', '0.1', '0.25', '0.5', '0.75', '0.9', '0.95', '0.99'
219
+ quantile_cols = ['0.01', '0.05', '0.1', '0.25', '0.5', '0.75', '0.9', '0.95', '0.99']
220
+
221
+ # Extract all quantiles
222
+ quantiles = {}
223
+ for q in quantile_cols:
224
+ if q in forecasts_df.columns:
225
+ quantiles[q] = forecasts_df[q].values
226
+ else:
227
+ # Fallback if quantile missing
228
+ if '0.5' in forecasts_df.columns:
229
+ quantiles[q] = forecasts_df['0.5'].values # Use median as fallback
230
+ elif 'predictions' in forecasts_df.columns:
231
+ quantiles[q] = forecasts_df['predictions'].values
232
+ else:
233
+ raise ValueError(f"Missing quantile {q} and no fallback available. Columns: {forecasts_df.columns.tolist()}")
234
+
235
+ # Backward compatibility: still extract median, q10, q90
236
+ median = quantiles['0.5']
237
+ q10 = quantiles['0.1']
238
+ q90 = quantiles['0.9']
239
  else:
240
  raise TypeError(f"Expected DataFrame from predict_df(), got {type(forecasts_df)}")
241
 
242
+ # Round all quantiles to nearest integer (capacity values are always whole MW)
 
243
  median = np.round(median).astype(int)
244
  q10 = np.round(q10).astype(int)
245
  q90 = np.round(q90).astype(int)
246
 
247
+ # Round all other quantiles
248
+ for q_key in quantiles:
249
+ quantiles[q_key] = np.round(quantiles[q_key]).astype(int)
250
+
251
  inference_time = time.time() - border_start
252
 
253
+ # Store results (backward compatible + all quantiles)
254
  results['borders'][border] = {
255
  'median': median.tolist(),
256
  'q10': q10.tolist(),
257
  'q90': q90.tolist(),
258
+ # Add all 9 quantiles for adaptive selection
259
+ 'q01': quantiles['0.01'].tolist(),
260
+ 'q05': quantiles['0.05'].tolist(),
261
+ 'q25': quantiles['0.25'].tolist(),
262
+ 'q75': quantiles['0.75'].tolist(),
263
+ 'q95': quantiles['0.95'].tolist(),
264
+ 'q99': quantiles['0.99'].tolist(),
265
  'inference_time_s': inference_time,
266
  'used_covariates': True,
267
  'num_features': len(future_data.columns) - 2 # Exclude border and timestamp
 
289
  print(f"Total time: {results['metadata']['total_time_s']:.1f}s")
290
  print(f"Successful: {results['metadata']['successful_borders']}/{len(forecast_borders)} borders")
291
 
292
+ # Apply adaptive quantile selection based on learned uncertainty
293
+ print(f"\n[ADAPTIVE SELECTION] Computing adaptive forecasts based on quantile spread...")
294
+ results = self._apply_adaptive_selection(results, run_datetime, prediction_hours)
295
+ print(f"[OK] Adaptive selection complete")
296
+
297
+ return results
298
+
299
+ def _apply_adaptive_selection(self, results: Dict, run_datetime: datetime, prediction_hours: int) -> Dict:
300
+ """
301
+ Apply HOUR-AWARE adaptive quantile selection based on model's LEARNED uncertainty.
302
+
303
+ This method uses quantile spread (q90-q10) as the model's learned volatility signal,
304
+ but applies DIFFERENT thresholds for different hours based on electricity market patterns.
305
+
306
+ Key insight: Ramping hours (7-9, 17-21) naturally have higher volatility, so we need
307
+ higher thresholds to avoid false positives. Night hours should be more conservative.
308
+
309
+ Args:
310
+ results: Forecast results dictionary from run_forecast()
311
+ run_datetime: Forecast run date/time
312
+ prediction_hours: Number of hours in forecast horizon
313
+
314
+ Returns:
315
+ Updated results dictionary with 'adaptive' forecast added to each border
316
+ """
317
+ # Generate forecast timestamps (start next day at midnight)
318
+ forecast_start = run_datetime + timedelta(days=1)
319
+ forecast_timestamps = [forecast_start + timedelta(hours=h) for h in range(prediction_hours)]
320
+
321
+ # Extract hour-of-day for each timestamp
322
+ hours_of_day = np.array([ts.hour for ts in forecast_timestamps])
323
+
324
+ # Define hour-specific uncertainty thresholds based on electricity market patterns
325
+ # From hourly MAE analysis: worst hours are 19 (578 MW), 15 (564 MW), 20 (550 MW)
326
+ hourly_thresholds = {
327
+ # Morning ramp (5-9): Higher threshold (0.45-0.50) → expect natural volatility
328
+ 5: 0.45, 6: 0.45, 7: 0.50, 8: 0.50, 9: 0.45,
329
+
330
+ # Mid-day stable (10-16): Standard threshold (0.30-0.35)
331
+ 10: 0.30, 11: 0.30, 12: 0.30, 13: 0.30, 14: 0.30, 15: 0.35, 16: 0.35,
332
+
333
+ # Evening ramp (17-21): Higher threshold (0.45-0.50) → worst observed hours
334
+ 17: 0.45, 18: 0.50, 19: 0.50, 20: 0.50, 21: 0.45,
335
+
336
+ # Night stable (22-4): Lower threshold (0.25) → expect precision
337
+ 22: 0.25, 23: 0.25, 0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25, 4: 0.30
338
+ }
339
+
340
+ for border, data in results['borders'].items():
341
+ if 'error' in data:
342
+ continue # Skip failed borders
343
+
344
+ # Extract quantiles as numpy arrays for vectorized operations
345
+ q10_array = np.array(data['q10'])
346
+ q90_array = np.array(data['q90'])
347
+ median_array = np.array(data['median'])
348
+ q75_array = np.array(data['q75'])
349
+ q25_array = np.array(data['q25'])
350
+
351
+ # Calculate quantile spread (model's learned uncertainty estimate)
352
+ # This captures WHEN the model predicts volatility based on input features
353
+ spread = q90_array - q10_array
354
+
355
+ # Normalize spread as percentage of median (handles different border capacities)
356
+ # Add +1 to avoid division by zero for near-zero medians
357
+ uncertainty_pct = spread / (np.abs(median_array) + 1.0)
358
+
359
+ # HOUR-AWARE adaptive selection using hour-specific thresholds
360
+ adaptive_forecast = np.zeros_like(median_array, dtype=float)
361
+
362
+ for i, hour in enumerate(hours_of_day):
363
+ # Get threshold for this hour (default to 0.30 if hour not in map)
364
+ threshold_high = hourly_thresholds.get(hour, 0.30)
365
+ threshold_medium = threshold_high * 0.5 # Medium threshold is 50% of high
366
+
367
+ if uncertainty_pct[i] > threshold_high:
368
+ # High uncertainty: use q75
369
+ adaptive_forecast[i] = q75_array[i]
370
+ elif uncertainty_pct[i] >= threshold_medium:
371
+ # Medium uncertainty: interpolate q60 between median and q75
372
+ adaptive_forecast[i] = 0.6 * median_array[i] + 0.4 * q75_array[i]
373
+ else:
374
+ # Low uncertainty: use median
375
+ adaptive_forecast[i] = median_array[i]
376
+
377
+ # Round to integers (capacity values are always whole MW)
378
+ adaptive_forecast = np.round(adaptive_forecast).astype(int)
379
+
380
+ # Store adaptive forecast and uncertainty metadata
381
+ data['adaptive'] = adaptive_forecast.tolist()
382
+ data['uncertainty_pct'] = uncertainty_pct.tolist()
383
+
384
+ # Store selection statistics for analysis (using hour-aware thresholds)
385
+ high_uncertainty_hours = 0
386
+ medium_uncertainty_hours = 0
387
+ low_uncertainty_hours = 0
388
+
389
+ for i, hour in enumerate(hours_of_day):
390
+ threshold_high = hourly_thresholds.get(hour, 0.30)
391
+ threshold_medium = threshold_high * 0.5
392
+
393
+ if uncertainty_pct[i] > threshold_high:
394
+ high_uncertainty_hours += 1
395
+ elif uncertainty_pct[i] >= threshold_medium:
396
+ medium_uncertainty_hours += 1
397
+ else:
398
+ low_uncertainty_hours += 1
399
+
400
+ data['adaptive_stats'] = {
401
+ 'high_uncertainty_hours': int(high_uncertainty_hours),
402
+ 'medium_uncertainty_hours': int(medium_uncertainty_hours),
403
+ 'low_uncertainty_hours': int(low_uncertainty_hours),
404
+ 'mean_uncertainty_pct': float(np.mean(uncertainty_pct)),
405
+ 'max_uncertainty_pct': float(np.max(uncertainty_pct))
406
+ }
407
+
408
  return results
409
 
410
  def export_to_parquet(self, results: Dict, output_path: str):
 
436
  data[f'{border}_median'] = forecast_data['median']
437
  data[f'{border}_q10'] = forecast_data['q10']
438
  data[f'{border}_q90'] = forecast_data['q90']
439
+ # Add adaptive forecast if available (learned uncertainty-based selection)
440
+ if 'adaptive' in forecast_data:
441
+ data[f'{border}_adaptive'] = forecast_data['adaptive']
442
  successful_borders.append(border)
443
  else:
444
  failed_borders.append((border, forecast_data['error']))