Spaces:

dks1
/

tempoPFN

Sleeping

App Files Files Community

vlad-moroshan commited on Oct 30

Commit

1c8d125

1 Parent(s): c02fe3a

initial commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +167 -0
README.md +128 -2
configs/example.yaml +118 -0
data/nan_stats.json +0 -0
examples/generate_synthetic_data.py +324 -0
examples/quick_start_tempo_pfn.ipynb +286 -0
examples/quick_start_tempo_pfn.py +95 -0
examples/utils.py +115 -0
gift_eval/submission/all_results.csv +98 -0
gift_eval/submission/config.json +6 -0
pyproject.toml +57 -0
src/__init__.py +0 -0
src/data/__init__.py +0 -0
src/data/augmentations.py +1318 -0
src/data/batch_composer.py +705 -0
src/data/constants.py +25 -0
src/data/containers.py +272 -0
src/data/datasets.py +267 -0
src/data/filter.py +73 -0
src/data/frequency.py +538 -0
src/data/loaders.py +661 -0
src/data/scalers.py +360 -0
src/data/time_features.py +564 -0
src/data/utils.py +75 -0
src/gift_eval/__init__.py +0 -0
src/gift_eval/aggregate_results.py +160 -0
src/gift_eval/constants.py +83 -0
src/gift_eval/data.py +234 -0
src/gift_eval/dataset_properties.json +152 -0
src/gift_eval/evaluate.py +529 -0
src/gift_eval/model_wrapper.py +349 -0
src/models/__init__.py +0 -0
src/models/blocks.py +58 -0
src/models/model.py +427 -0
src/optim/lr_scheduler.py +360 -0
src/plotting/__init__.py +0 -0
src/plotting/gift_eval_utils.py +215 -0
src/plotting/plot_timeseries.py +292 -0
src/synthetic_generation/__init__.py +0 -0
src/synthetic_generation/abstract_classes.py +97 -0
src/synthetic_generation/anomalies/anomaly_generator.py +293 -0
src/synthetic_generation/anomalies/anomaly_generator_wrapper.py +64 -0
src/synthetic_generation/audio_generators/financial_volatility_generator.py +103 -0
src/synthetic_generation/audio_generators/financial_volatility_wrapper.py +91 -0
src/synthetic_generation/audio_generators/multi_scale_fractal_generator.py +75 -0
src/synthetic_generation/audio_generators/multi_scale_fractal_wrapper.py +77 -0
src/synthetic_generation/audio_generators/network_topology_generator.py +113 -0
src/synthetic_generation/audio_generators/network_topology_wrapper.py +93 -0
src/synthetic_generation/audio_generators/stochastic_rhythm_generator.py +86 -0
src/synthetic_generation/audio_generators/stochastic_rhythm_wrapper.py +81 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,167 @@

+logs/
+*.png
+*.pth
+# *.sh
+*.slurm
+*.pkl
+wandb/
+AutogluonModels/
+.vscode/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Datasets, logs, plots, etc.
+outputs/
+models/*
+*.arrow
+*.png
+*.pt
+*.pdf
+*.gif

README.md CHANGED Viewed

@@ -1,2 +1,128 @@
-# TempoPFN
-Official code release for the paper "TempoPFN: Synthetic Pre-training of Linear RNNs for Zero-shot Time Series Forecasting"

+# TempoPFN: Synthetic Pre-Training of Linear RNNs for Zero-Shot Time Series Forecasting
+[![arXiv](https://img.shields.io/badge/arXiv-2510.25502-b31b1b.svg)](https://arxiv.org/abs/2510.25502)
+[![License](https://img.shields.io/badge/License-Apache_2.0-green.svg)](https://github.com/automl/TempoPFN/blob/main/LICENSE)
+---
+**TempoPFN** introduced in [TempoPFN: Synthetic Pre-Training of Linear RNNs for Zero-Shot Time Series Forecasting](https://arxiv.org/abs/2510.25502), is a univariate time series foundation model pretrained **entirely on synthetic data**. It delivers top-tier zero-shot forecasting accuracy while remaining fully reproducible and free from real-data leakage.
+Built on a **Linear RNN (GatedDeltaProduct)** backbone, TempoPFN performs end-to-end forecasting without patching or windowing. Its design enables fully parallelizable training and inference while maintaining stable temporal state-tracking across long sequences.
+This repository includes the [**pretrained 35M parameter model,**](https://www.dropbox.com/scl/fi/5vmjr7nx9wj9w1vl2giuv/checkpoint.pth?rlkey=qmk08ojp7wj0l6kpm8hzgbzju&st=dyr07d00&dl=0), all training and inference code, and the **complete synthetic data generation pipeline** used for pretraining.
+## ✨ Why TempoPFN?
+* **High Performance, No Real Data:** Achieves top-tier competitive results on **GIFT-Eval, outperforming all existing synthetic-only approaches** and **surpassing the vast majority of models trained on real-world data**. This ensures full reproducibility and eliminates benchmark leakage.
+* **Parallel and Efficient:** The linear recurrence design enables full-sequence parallelization. This gives us the best of both worlds: the linear efficiency of an RNN, but with the training parallelism of a Transformer.
+* **Open and Reproducible:** Includes the full synthetic data pipeline, configurations, and scripts to reproduce training from scratch.
+* **State-Tracking Stability:** The GatedDeltaProduct recurrence and *state-weaving* mechanism preserve temporal continuity and information flow across long horizons, improving robustness without non-linear recurrence.
+![TempoPFN Overview](https://iili.io/KlUjfcP.png)
+## ⚙️ Installation
+```bash
+git clone https://github.com/automl/TempoPFN.git
+cd TempoPFN
+python -m venv venv && source venv/bin/activate
+# 1. Install PyTorch first (see PyTorch website for your specific CUDA version)
+# Example for CUDA 12.6:
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
+# 2. Install TempoPFN and all other dependencies
+pip install .
+export PYTHONPATH=$PWD
+```
+## 🚀 Quick Start: Run the Demo
+**Prerequisites:**
+* You must have a **CUDA-capable GPU** with a matching PyTorch version installed.
+* You have run `export PYTHONPATH=$PWD` from the repo's root directory (see Installation).
+### 1. Run the Quick Start Script
+Run a demo forecast on a synthetic sine wave:
+```bash
+python examples/quick_start_tempo_pfn.py
+```
+### 2. Run with a Local Checkpoint
+If you have already downloaded the model (e.g., to `models/checkpoint.pth`), you can point the script to it:
+```bash
+python examples/quick_start_tempo_pfn.py --checkpoint models/checkpoint.pth
+```
+### 3. Run the Notebook version
+```bash
+jupyter notebook examples/quick_start_tempo_pfn.ipynb
+```
+### Hardware & Performance Tips
+**GPU Required:** Inference requires a CUDA-capable GPU. Tested on NVIDIA A100/H100.
+**Triton Caches:** To prevent slowdowns from writing caches to a network filesystem, route caches to a local directory (like `/tmp`) before running:
+```bash
+LOCAL_CACHE_BASE="${TMPDIR:-/tmp}/tsf-$(date +%s)"
+mkdir -p "${LOCAL_CACHE_BASE}/triton" "${LOCAL_CACHE_BASE}/torchinductor"
+export TRITON_CACHE_DIR="${LOCAL_CACHE_BASE}/triton"
+export TORCHINDUCTOR_CACHE_DIR="${LOCAL_CACHE_BASE}/torchinductor"
+python examples/quick_start_tempo_pfn.py
+```
+## 🚂 Training
+### Single-GPU Training (for debugging)
+```bash
+torchrun --standalone --nproc_per_node=1 src/training/trainer_dist.py --config ./configs/train.yaml
+```
+### Multi-GPU Training (Single-Node)
+This example uses 8 GPUs. The training script uses PyTorch DistributedDataParallel (DDP).
+```bash
+torchrun --standalone --nproc_per_node=8 src/training/trainer_dist.py --config ./configs/train.yaml
+```
+### Configuration
+All training and model parameters are controlled via YAML files in `configs/` (architecture, optimizers, paths).
+## 💾 Synthetic Data Generation
+A core contribution of this work is our open-source synthetic data pipeline, located in `src/synthetic_generation/`. It combines diverse generators with a powerful augmentation cascade.
+**Generators Used:**
+* **Adapted Priors:** ForecastPFN, KernelSynth, GaussianProcess (GP), and CauKer (Structural Causal Models).
+* **Novel Priors:** SDE (a flexible regime-switching Ornstein-Uhlenbeck process), Sawtooth, StepFunction, Anomaly, Spikes, SineWave, and Audio-Inspired generators (Stochastic Rhythms, Financial Volatility, Network Topology, Multi-Scale Fractals).
+You can easily generate your own data by instantiating a generator wrapper. See `examples/generate_synthetic_data.py` for a minimal script, or inspect the generator code in `src/synthetic_generation/`.
+## 🤝 License
+This project is licensed under the Apache 2.0 License. See the LICENSE file for details. This permissive license allows for both academic and commercial use.
+## 📚 Citation
+If you find TempoPFN useful in your research, please consider citing our paper:
+```bibtex
+@misc{moroshan2025tempopfn,
+  title={TempoPFN: Synthetic Pre-Training of Linear RNNs for Zero-Shot Time Series Forecasting},
+  author={Vladyslav Moroshan and Julien Siems and Arber Zela and Timur Carstensen and Frank Hutter},
+  year={2025},
+  eprint={2510.25502},
+  archivePrefix={arXiv},
+  primaryClass={cs.LG}
+}
+```

configs/example.yaml ADDED Viewed

	@@ -0,0 +1,118 @@

+train_data_path: null # Replace with the path to root of the training data directory with subdirectories for each generator (e.g. gp, kernel, etc.)
+model_path: ./models # Path where the model will be saved
+model_name: TempoPFN
+continue_training: false
+checkpoint_path: null # Replace with the path to the checkpoint file
+seed: 2025
+wandb: true # whether to log to wandb
+wandb_project_name: TempoPFNTraining
+wandb_entity: university-of-freiburg-2024
+wandb_plots: false
+batch_size: 40
+num_training_iterations: 1000000 # 1M
+validation_batch_size: 64
+num_validation_batches: 1
+num_workers: 4
+gradient_accumulation_enabled: true
+accumulation_steps: 5  # Number of batches to accumulate before updating (effective batch size = batch_size * accumulation_steps)
+log_interval: 2048
+save_every: 100000
+generator_proportions:
+  forecast_pfn: 1.0
+  gp: 1.0
+  kernel: 1.0
+  sawtooth: 1.0
+  sinewave: 1.0
+  step: 1.0
+  anomaly: 1.0
+  spike: 1.0
+  cauker_univariate: 1.0
+  ou_process: 3.0
+  audio_financial_volatility: 0.1
+  audio_multi_scale_fractal: 0.1
+  audio_network_topology: 0.5
+  audio_stochastic_rhythm: 0.5
+  augmented_per_sample_2048: 2.0
+  augmented_temp_batch_2048: 2.0
+# Learning Rate Scheduler Configuration
+lr_scheduler: cosine  # Options: "warmup_stable_decay", "cosine_with_warmup", "cosine_with_restarts", "cosine"
+# Learning Rate Parameters
+peak_lr: 0.0002           # 2e-4 - Peak learning rate
+min_lr_ratio: 0.01        # Minimum LR as fraction of peak LR
+# WSD Scheduler Specific Parameters
+warmup_ratio: 0.003       # 0.3% of total steps for warmup
+stable_ratio: 0.90        # 90% of total steps at stable learning rate
+decay_type: cosine        # Type of decay: "cosine" or "linear"
+# Alternative Scheduler Parameters (if using different schedulers)
+num_cycles: 0.5           # For cosine_with_warmup: 0.5 = half cosine wave
+num_restart_cycles: 4     # For cosine_with_restarts: number of restart cycles
+# Optimizer Configuration
+weight_decay: 0.01        # Weight decay for AdamW
+beta1: 0.9               # Adam beta1 parameter
+beta2: 0.98              # Adam beta2 parameter (optimized for transformers)
+optimizer_eps: 1e-6      # Adam epsilon
+# Training Stability
+gradient_clip_val: 100.0
+scaler: custom_robust
+gift_eval:
+  evaluate_on_gift_eval: false
+  max_context_length: 3072
+  create_plots: false
+  max_plots: 5
+  dataset_storage_path: null # Replace with the path to the dataset storage path
+data_augmentation:
+  nan_augmentation: true
+  scaler_augmentation: false
+  length_shortening: true
+  nan_stats_path: ./data/nan_stats.json
+augmentation_probabilities:
+  scaler_augmentation: 0.5
+TimeSeriesModel:
+  # Core architecture
+  embed_size: 512
+  num_encoder_layers: 10
+  # Scaling and preprocessing
+  scaler: custom_robust
+  epsilon: 0.00001
+  scaler_clamp_value: null
+  handle_constants: false
+  # Time features
+  K_max: 25
+  time_feature_config:
+    use_enhanced_features: true
+    use_holiday_features: false
+    use_index_features: true
+    include_seasonality_info: true
+  drop_enc_allow: false
+  encoding_dropout: 0.0
+  # Encoder configuration
+  encoder_config:
+    attn_mode: chunk
+    num_heads: 4
+    expand_v: 1.0
+    use_gate: false
+    use_short_conv: true
+    conv_size: 16
+    allow_neg_eigval: true
+    use_forget_gate: true
+    num_householder: 4
+    weaving: true
+  loss_type: 'quantile'
+  quantiles: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

data/nan_stats.json ADDED Viewed

The diff for this file is too large to render. See raw diff

examples/generate_synthetic_data.py ADDED Viewed

	@@ -0,0 +1,324 @@

+import logging
+import os
+from typing import Optional
+import torch
+from src.data.containers import BatchTimeSeriesContainer
+from src.data.utils import sample_future_length
+from src.plotting.plot_multivariate_timeseries import plot_from_container
+from src.synthetic_generation.anomalies.anomaly_generator_wrapper import (
+    AnomalyGeneratorWrapper,
+)
+from src.synthetic_generation.audio_generators.financial_volatility_wrapper import (
+    FinancialVolatilityAudioWrapper,
+)
+from src.synthetic_generation.audio_generators.multi_scale_fractal_wrapper import (
+    MultiScaleFractalAudioWrapper,
+)
+from src.synthetic_generation.audio_generators.network_topology_wrapper import (
+    NetworkTopologyAudioWrapper,
+)
+from src.synthetic_generation.audio_generators.stochastic_rhythm_wrapper import (
+    StochasticRhythmAudioWrapper,
+)
+from src.synthetic_generation.cauker.cauker_generator_wrapper import (
+    CauKerGeneratorWrapper,
+)
+from src.synthetic_generation.forecast_pfn_prior.forecast_pfn_generator_wrapper import (
+    ForecastPFNGeneratorWrapper,
+)
+from src.synthetic_generation.generator_params import (
+    AnomalyGeneratorParams,
+    CauKerGeneratorParams,
+    FinancialVolatilityAudioParams,
+    ForecastPFNGeneratorParams,
+    GPGeneratorParams,
+    KernelGeneratorParams,
+    MultiScaleFractalAudioParams,
+    NetworkTopologyAudioParams,
+    OrnsteinUhlenbeckProcessGeneratorParams,
+    SawToothGeneratorParams,
+    SineWaveGeneratorParams,
+    SpikesGeneratorParams,
+    StepGeneratorParams,
+    StochasticRhythmAudioParams,
+)
+from src.synthetic_generation.gp_prior.gp_generator_wrapper import (
+    GPGeneratorWrapper,
+)
+from src.synthetic_generation.kernel_synth.kernel_generator_wrapper import (
+    KernelGeneratorWrapper,
+)
+from src.synthetic_generation.ornstein_uhlenbeck_process.ou_generator_wrapper import (
+    OrnsteinUhlenbeckProcessGeneratorWrapper,
+)
+from src.synthetic_generation.sawtooth.sawtooth_generator_wrapper import (
+    SawToothGeneratorWrapper,
+)
+from src.synthetic_generation.sine_waves.sine_wave_generator_wrapper import (
+    SineWaveGeneratorWrapper,
+)
+from src.synthetic_generation.spikes.spikes_generator_wrapper import (
+    SpikesGeneratorWrapper,
+)
+from src.synthetic_generation.steps.step_generator_wrapper import (
+    StepGeneratorWrapper,
+)
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+def visualize_batch_sample(
+    generator,
+    batch_size: int = 8,
+    output_dir: str = "outputs/plots",
+    sample_idx: Optional[int] = None,
+    prefix: str = "",
+    seed: Optional[int] = None,
+) -> None:
+    """
+    Visualize a sample from a batch of synthetic multivariate time series from any generator.
+    Also plot artificial predictions for demonstration if requested.
+    Args:
+        generator: Any generator wrapper (LMC, Kernel, GP, etc.)
+        batch_size: Number of samples to generate in the batch
+        output_dir: Directory to save plots
+        sample_idx: Index of the sample to visualize
+        seed: Seed for the generator
+    """
+    os.makedirs(output_dir, exist_ok=True)
+    generator_name = generator.__class__.__name__
+    logger.info(f"[{generator_name}] Generating batch of size {batch_size}")
+    batch = generator.generate_batch(batch_size=batch_size, seed=seed)
+    values = torch.from_numpy(batch.values)
+    if values.ndim == 2:
+        values = values.unsqueeze(-1)  # Add channel dimension: [batch_size, seq_len, 1]
+    future_length = sample_future_length(range="gift_eval")
+    # Slice along the time dimension (dimension 1)
+    history_values = values[:, :-future_length, :]
+    future_values = values[:, -future_length:, :]
+    batch = BatchTimeSeriesContainer(
+        history_values=history_values,
+        future_values=future_values,
+        start=batch.start,
+        frequency=batch.frequency,
+    )
+    logger.info(
+        f"[{generator_name}] Batch history values shape: {batch.history_values.shape}"
+    )
+    logger.info(
+        f"[{generator_name}] Batch future values shape: {batch.future_values.shape}"
+    )
+    logger.info(f"[{generator_name}] Batch start: {batch.start}")
+    logger.info(f"[{generator_name}] Batch frequency: {batch.frequency}")
+    if sample_idx is None:
+        for sample_idx in range(batch_size):
+            filename = f"{prefix}_{generator_name.lower().replace('generatorwrapper', '')}_sample_{sample_idx}.png"
+            output_file = os.path.join(output_dir, filename)
+            title = f"{prefix.capitalize()} {generator_name.replace('GeneratorWrapper', '')} Synthetic Time Series (Sample {sample_idx})"
+            plot_from_container(
+                batch=batch,
+                sample_idx=sample_idx,
+                output_file=output_file,
+                show=False,
+                title=title,
+            )
+            logger.info(
+                f"[{generator_name}] Saved plot for sample {sample_idx} to {output_file}"
+            )
+            logger.info("--------------------------------")
+if __name__ == "__main__":
+    # Configuration
+    batch_size = 2
+    total_length = 2048
+    output_dir = "outputs/plots"
+    global_seed = 2025
+    logger.info(f"Saving plots to {output_dir}")
+    kernel_params_univariate = KernelGeneratorParams(
+        global_seed=global_seed,
+        length=total_length,
+    )
+    kernel_gen_univariate = KernelGeneratorWrapper(kernel_params_univariate)
+    gp_params_univariate = GPGeneratorParams(
+        global_seed=global_seed,
+        length=total_length,
+    )
+    gp_gen_univariate = GPGeneratorWrapper(gp_params_univariate)
+    forecast_pfn_univariate_params = ForecastPFNGeneratorParams(
+        global_seed=global_seed,
+        length=total_length,
+    )
+    forecast_pfn_univariate_gen = ForecastPFNGeneratorWrapper(
+        forecast_pfn_univariate_params
+    )
+    sine_wave_params = SineWaveGeneratorParams(
+        global_seed=global_seed,
+        length=total_length,
+    )
+    sine_wave_univariate_gen = SineWaveGeneratorWrapper(sine_wave_params)
+    sawtooth_params = SawToothGeneratorParams(
+        global_seed=global_seed,
+        length=total_length,
+    )
+    sawtooth_univariate_gen = SawToothGeneratorWrapper(sawtooth_params)
+    step_params = params = StepGeneratorParams(
+        length=2048,
+        global_seed=42,
+    )
+    step_gen_univariate = StepGeneratorWrapper(step_params)
+    anomaly_params = AnomalyGeneratorParams(
+        global_seed=global_seed,
+        length=total_length,
+    )
+    anomaly_gen_univariate = AnomalyGeneratorWrapper(anomaly_params)
+    spikes_params = SpikesGeneratorParams(
+        global_seed=global_seed,
+        length=total_length,
+    )
+    spikes_gen_univariate = SpikesGeneratorWrapper(spikes_params)
+    cauker_params_multivariate = CauKerGeneratorParams(
+        global_seed=global_seed,
+        length=total_length,
+        num_channels=5,
+    )
+    cauker_gen_multivariate = CauKerGeneratorWrapper(cauker_params_multivariate)
+    ou_params = OrnsteinUhlenbeckProcessGeneratorParams(
+        global_seed=global_seed,
+        length=total_length,
+    )
+    ou_gen_univariate = OrnsteinUhlenbeckProcessGeneratorWrapper(ou_params)
+    stochastic_rhythm_params = StochasticRhythmAudioParams(
+        global_seed=global_seed,
+        length=total_length,
+    )
+    stochastic_rhythm_gen_univariate = StochasticRhythmAudioWrapper(
+        stochastic_rhythm_params
+    )
+    financial_volatility_params = FinancialVolatilityAudioParams(
+        global_seed=global_seed,
+        length=total_length,
+    )
+    financial_volatility_gen_univariate = FinancialVolatilityAudioWrapper(
+        financial_volatility_params
+    )
+    multi_scale_fractal_params = MultiScaleFractalAudioParams(
+        global_seed=global_seed,
+        length=total_length,
+    )
+    multi_scale_fractal_gen_univariate = MultiScaleFractalAudioWrapper(
+        multi_scale_fractal_params
+    )
+    network_topology_params = NetworkTopologyAudioParams(
+        global_seed=global_seed,
+        length=total_length,
+    )
+    network_topology_gen_univariate = NetworkTopologyAudioWrapper(
+        network_topology_params
+    )
+    # Visualize samples from all generators
+    visualize_batch_sample(
+        kernel_gen_univariate, batch_size=batch_size, output_dir=output_dir
+    )
+    visualize_batch_sample(
+        gp_gen_univariate, batch_size=batch_size, output_dir=output_dir
+    )
+    visualize_batch_sample(
+        forecast_pfn_univariate_gen, batch_size=batch_size, output_dir=output_dir
+    )
+    visualize_batch_sample(
+        sine_wave_univariate_gen,
+        batch_size=batch_size,
+        output_dir=output_dir,
+    )
+    visualize_batch_sample(
+        sawtooth_univariate_gen,
+        batch_size=batch_size,
+        output_dir=output_dir,
+    )
+    visualize_batch_sample(
+        step_gen_univariate,
+        batch_size=batch_size,
+        output_dir=output_dir,
+    )
+    visualize_batch_sample(
+        anomaly_gen_univariate,
+        batch_size=batch_size,
+        output_dir=output_dir,
+    )
+    visualize_batch_sample(
+        spikes_gen_univariate,
+        batch_size=batch_size,
+        output_dir=output_dir,
+    )
+    visualize_batch_sample(
+        cauker_gen_multivariate,
+        batch_size=batch_size,
+        output_dir=output_dir,
+        prefix="multivariate",
+    )
+    visualize_batch_sample(
+        ou_gen_univariate,
+        batch_size=batch_size,
+        output_dir=output_dir,
+        seed=global_seed,
+    )
+    visualize_batch_sample(
+        stochastic_rhythm_gen_univariate, batch_size=batch_size, output_dir=output_dir
+    )
+    visualize_batch_sample(
+        financial_volatility_gen_univariate,
+        batch_size=batch_size,
+        output_dir=output_dir,
+    )
+    visualize_batch_sample(
+        multi_scale_fractal_gen_univariate,
+        batch_size=batch_size,
+        output_dir=output_dir,
+    )
+    visualize_batch_sample(
+        network_topology_gen_univariate, batch_size=batch_size, output_dir=output_dir
+    )

examples/quick_start_tempo_pfn.ipynb ADDED Viewed

	@@ -0,0 +1,286 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "231c6227",
+   "metadata": {},
+   "source": [
+    "# Quick Start: Univariate Quantile Forecasting (CUDA, bfloat16)\n",
+    "\n",
+    "This notebook demonstrates how to:\n",
+    "- Generate synthetic sine wave time series data\n",
+    "- Pack data into `BatchTimeSeriesContainer`\n",
+    "- Load a pretrained model (from Dropbox)\n",
+    "- Run inference with bfloat16 on CUDA\n",
+    "- Visualize predictions\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bb6c5424-1c63-4cb0-a818-45d4199914e5",
+   "metadata": {},
+   "source": [
+    "## 1) Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "612a78e8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import urllib.request\n",
+    "import torch\n",
+    "import numpy as np\n",
+    "from pathlib import Path\n",
+    "\n",
+    "# Ensure CUDA is available\n",
+    "if not torch.cuda.is_available():\n",
+    "    raise RuntimeError(\"CUDA is required to run this demo. No CUDA device detected.\")\n",
+    "\n",
+    "device = torch.device(\"cuda:0\")\n",
+    "\n",
+    "# Resolve repository root to be robust to running from subdirectories (e.g., examples/)\n",
+    "repo_root = Path.cwd()\n",
+    "if not (repo_root / \"configs\").exists():\n",
+    "    repo_root = repo_root.parent\n",
+    "\n",
+    "# Inline plotting\n",
+    "%matplotlib inline\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3facf37d-0a77-4222-8464-6e42182547f8",
+   "metadata": {},
+   "source": [
+    "## 2) Download checkpoint from Dropbox"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16dcb883",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DROPBOX_URL = \"https://www.dropbox.com/scl/fi/5vmjr7nx9wj9w1vl2giuv/checkpoint.pth?rlkey=qmk08ojp7wj0l6kpm8hzgbzju&st=dyr07d00&dl=1\"\n",
+    "CHECKPOINT_DIR = repo_root / \"models\"\n",
+    "CHECKPOINT_PATH = CHECKPOINT_DIR / \"checkpoint.pth\"\n",
+    "\n",
+    "CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)\n",
+    "if not CHECKPOINT_PATH.exists():\n",
+    "    print(f\"Downloading checkpoint to {CHECKPOINT_PATH} ...\")\n",
+    "    urllib.request.urlretrieve(DROPBOX_URL, str(CHECKPOINT_PATH))\n",
+    "    print(\"Done.\")\n",
+    "else:\n",
+    "    print(f\"Using existing checkpoint at {CHECKPOINT_PATH}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9be77e34-0c7a-4056-822f-ed2e3e090c40",
+   "metadata": {},
+   "source": [
+    "## 3) Generate synthetic sine wave data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1127526c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from src.synthetic_generation.generator_params import SineWaveGeneratorParams\n",
+    "from src.synthetic_generation.sine_waves.sine_wave_generator_wrapper import (\n",
+    "    SineWaveGeneratorWrapper,\n",
+    ")\n",
+    "\n",
+    "batch_size = 3\n",
+    "total_length = 1024\n",
+    "seed = 2025\n",
+    "\n",
+    "sine_params = SineWaveGeneratorParams(global_seed=seed, length=total_length)\n",
+    "wrapper = SineWaveGeneratorWrapper(sine_params)\n",
+    "\n",
+    "batch = wrapper.generate_batch(batch_size=batch_size, seed=seed)\n",
+    "values = torch.from_numpy(batch.values).to(torch.float32)\n",
+    "if values.ndim == 2:\n",
+    "    values = values.unsqueeze(-1)  # [B, S, 1]\n",
+    "\n",
+    "future_length = 256\n",
+    "history_values = values[:, :-future_length, :]\n",
+    "future_values = values[:, -future_length:, :]\n",
+    "\n",
+    "print(\"History:\", history_values.shape, \"Future:\", future_values.shape)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a8844488-e51c-4805-baa9-491bfc67e8ca",
+   "metadata": {},
+   "source": [
+    "## 4) Build BatchTimeSeriesContainer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f3b4d361",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from src.data.containers import BatchTimeSeriesContainer\n",
+    "\n",
+    "container = BatchTimeSeriesContainer(\n",
+    "    history_values=history_values.to(device),\n",
+    "    future_values=future_values.to(device),\n",
+    "    start=batch.start,\n",
+    "    frequency=batch.frequency,\n",
+    ")\n",
+    "\n",
+    "container.batch_size, container.history_length, container.future_length\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b5e7e790-a9aa-49c2-9d45-2dc823036883",
+   "metadata": {},
+   "source": [
+    "## 5) Load model and run inference"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1dd4e0e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import yaml\n",
+    "from src.models.model import TimeSeriesModel\n",
+    "\n",
+    "with open(repo_root / \"configs/example.yaml\", \"r\") as f:\n",
+    "    config = yaml.safe_load(f)\n",
+    "\n",
+    "model = TimeSeriesModel(**config[\"TimeSeriesModel\"]).to(device)\n",
+    "ckpt = torch.load(CHECKPOINT_PATH, map_location=device)\n",
+    "model.load_state_dict(ckpt[\"model_state_dict\"])\n",
+    "model.eval()\n",
+    "\n",
+    "# bfloat16 autocast on CUDA\n",
+    "with (\n",
+    "    torch.no_grad(),\n",
+    "    torch.autocast(device_type=\"cuda\", dtype=torch.bfloat16, enabled=True),\n",
+    "):\n",
+    "    output = model(container)\n",
+    "\n",
+    "preds = output[\"result\"].to(torch.float32)\n",
+    "if hasattr(model, \"scaler\") and \"scale_statistics\" in output:\n",
+    "    preds = model.scaler.inverse_scale(preds, output[\"scale_statistics\"])\n",
+    "\n",
+    "preds.shape\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ba16120f-27c8-4462-91cb-c9b3e0630a9d",
+   "metadata": {},
+   "source": [
+    "## 6) Plot predictions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9bf02a0b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "plt.set_loglevel('error')  \n",
+    "\n",
+    "# preds: [B, P, N, Q] for quantiles (univariate -> N=1)\n",
+    "preds_np = preds.cpu().numpy()\n",
+    "\n",
+    "batch_size = preds_np.shape[0]\n",
+    "prediction_length = preds_np.shape[1]\n",
+    "num_quantiles = preds_np.shape[-1]\n",
+    "\n",
+    "for i in range(batch_size):\n",
+    "    fig, ax = plt.subplots(figsize=(12, 4))\n",
+    "\n",
+    "    history = container.history_values[i, :, 0].detach().cpu().numpy()\n",
+    "    future = container.future_values[i, :, 0].detach().cpu().numpy()\n",
+    "\n",
+    "    # Time axes\n",
+    "    hist_t = np.arange(len(history))\n",
+    "    fut_t = np.arange(len(history), len(history) + len(future))\n",
+    "\n",
+    "    # Plot history and ground truth future\n",
+    "    ax.plot(hist_t, history, label=\"History\", color=\"black\")\n",
+    "    ax.plot(fut_t, future, label=\"Ground Truth\", color=\"blue\")\n",
+    "\n",
+    "    # Plot quantiles\n",
+    "    median_idx = num_quantiles // 2\n",
+    "    ax.plot(\n",
+    "        fut_t,\n",
+    "        preds_np[i, :, 0, median_idx],\n",
+    "        label=\"Prediction (Median)\",\n",
+    "        color=\"orange\",\n",
+    "        linestyle=\"--\",\n",
+    "    )\n",
+    "    if num_quantiles >= 3:\n",
+    "        ax.fill_between(\n",
+    "            fut_t,\n",
+    "            preds_np[i, :, 0, 0],\n",
+    "            preds_np[i, :, 0, -1],\n",
+    "            color=\"orange\",\n",
+    "            alpha=0.2,\n",
+    "            label=\"Prediction Interval\",\n",
+    "        )\n",
+    "\n",
+    "    ax.axvline(x=len(history), color=\"k\", linestyle=\":\", alpha=0.7)\n",
+    "    ax.set_xlabel(\"Time Steps\")\n",
+    "    ax.set_ylabel(\"Value\")\n",
+    "    ax.set_title(f\"Sample {i + 1}\")\n",
+    "    ax.legend()\n",
+    "    ax.grid(True, alpha=0.3)\n",
+    "    plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d88bb77b-b6be-4b00-a881-a4b556cce86f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

examples/quick_start_tempo_pfn.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import argparse
+import logging
+import torch
+from examples.utils import (
+    download_checkpoint_if_needed,
+    load_model,
+    run_inference_and_plot,
+)
+from src.data.containers import BatchTimeSeriesContainer
+from src.synthetic_generation.generator_params import SineWaveGeneratorParams
+from src.synthetic_generation.sine_waves.sine_wave_generator_wrapper import (
+    SineWaveGeneratorWrapper,
+)
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+def main():
+    """Main execution function."""
+    # CLI
+    parser = argparse.ArgumentParser(description="Quick start demo for TimeSeriesModel")
+    parser.add_argument(
+        "--config",
+        default="configs/example.yaml",
+        help="Path to model config YAML (default: configs/example.yaml)",
+    )
+    parser.add_argument(
+        "--checkpoint",
+        default=None,
+        help="Path to model checkpoint. If omitted, downloads from Dropbox.",
+    )
+    parser.add_argument("--batch_size", type=int, default=3)
+    parser.add_argument("--total_length", type=int, default=2048)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--output_dir", default="outputs")
+    args = parser.parse_args()
+    # Configuration
+    batch_size = args.batch_size
+    total_length = args.total_length
+    output_dir = args.output_dir
+    seed = args.seed
+    config_path = args.config
+    if args.checkpoint:
+        model_path = args.checkpoint
+    else:
+        dropbox_url = "https://www.dropbox.com/scl/fi/5vmjr7nx9wj9w1vl2giuv/checkpoint.pth?rlkey=qmk08ojp7wj0l6kpm8hzgbzju&st=dyr07d00&dl=0"
+        model_path = download_checkpoint_if_needed(dropbox_url, target_dir="models")
+    logger.info("=== Time Series Model Demo (Univariate Quantile) ===")
+    # 1) Generate synthetic sine wave data
+    sine_params = SineWaveGeneratorParams(global_seed=seed, length=total_length)
+    sine_generator = SineWaveGeneratorWrapper(sine_params)
+    batch = sine_generator.generate_batch(batch_size=batch_size, seed=seed)
+    values = torch.from_numpy(batch.values).to(torch.float32)
+    if values.ndim == 2:
+        values = values.unsqueeze(-1)  # Ensure [B, S, 1] for univariate
+    future_length = 256
+    history_values = values[:, :-future_length, :]
+    future_values = values[:, -future_length:, :]
+    # 2) Load the pretrained model (CUDA-only). This demo requires a CUDA GPU.
+    if not torch.cuda.is_available():
+        raise RuntimeError(
+            "CUDA is required to run this demo. No CUDA device detected."
+        )
+    device = torch.device("cuda:0")
+    model = load_model(config_path=config_path, model_path=model_path, device=device)
+    # 3) Pack tensors into the model's input container
+    container = BatchTimeSeriesContainer(
+        history_values=history_values.to(device),
+        future_values=future_values.to(device),
+        start=batch.start,
+        frequency=batch.frequency,
+    )
+    # 4) Run inference (bfloat16 on CUDA) and plot results
+    run_inference_and_plot(
+        model=model, container=container, output_dir=output_dir, use_bfloat16=True
+    )
+    logger.info("=== Demo completed successfully! ===")
+if __name__ == "__main__":
+    main()

examples/utils.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import logging
+import os
+import urllib.request
+from typing import List
+import numpy as np
+import torch
+import yaml
+from src.data.containers import BatchTimeSeriesContainer
+from src.models.model import TimeSeriesModel
+from src.plotting.plot_timeseries import plot_from_container
+logger = logging.getLogger(__name__)
+def load_model(
+    config_path: str, model_path: str, device: torch.device
+) -> TimeSeriesModel:
+    """Load the TimeSeriesModel from config and checkpoint."""
+    with open(config_path, "r") as f:
+        config = yaml.safe_load(f)
+    model = TimeSeriesModel(**config["TimeSeriesModel"]).to(device)
+    checkpoint = torch.load(model_path, map_location=device)
+    model.load_state_dict(checkpoint["model_state_dict"])
+    model.eval()
+    logger.info(f"Successfully loaded TimeSeriesModel from {model_path} on {device}")
+    return model
+def download_checkpoint_if_needed(url: str, target_dir: str = "models") -> str:
+    """Download checkpoint from URL into target_dir if not present and return its path.
+    Ensures direct download for Dropbox links by forcing dl=1.
+    """
+    os.makedirs(target_dir, exist_ok=True)
+    target_path = os.path.join(target_dir, "checkpoint.pth")
+    # Normalize Dropbox URL to force direct download
+    if "dropbox.com" in url and "dl=0" in url:
+        url = url.replace("dl=0", "dl=1")
+    if not os.path.exists(target_path):
+        logger.info(f"Downloading checkpoint from {url} to {target_path}...")
+        urllib.request.urlretrieve(url, target_path)
+        logger.info("Checkpoint downloaded successfully.")
+    else:
+        logger.info(f"Using existing checkpoint at {target_path}")
+    return target_path
+def plot_with_library(
+    container: BatchTimeSeriesContainer,
+    predictions_np: np.ndarray,  # [B, P, N, Q]
+    model_quantiles: List[float] | None,
+    output_dir: str = "outputs",
+    show_plots: bool = True,
+    save_plots: bool = True,
+):
+    os.makedirs(output_dir, exist_ok=True)
+    batch_size = container.batch_size
+    for i in range(batch_size):
+        output_file = (
+            os.path.join(output_dir, f"sine_wave_prediction_sample_{i + 1}.png")
+            if save_plots
+            else None
+        )
+        plot_from_container(
+            batch=container,
+            sample_idx=i,
+            predicted_values=predictions_np,
+            model_quantiles=model_quantiles,
+            title=f"Sine Wave Time Series Prediction - Sample {i + 1}",
+            output_file=output_file,
+            show=show_plots,
+        )
+def run_inference_and_plot(
+    model: TimeSeriesModel,
+    container: BatchTimeSeriesContainer,
+    output_dir: str = "outputs",
+    use_bfloat16: bool = True,
+) -> None:
+    """Run model inference with optional bfloat16 and plot using shared utilities."""
+    device_type = "cuda" if (container.history_values.device.type == "cuda") else "cpu"
+    autocast_enabled = use_bfloat16 and device_type == "cuda"
+    with (
+        torch.no_grad(),
+        torch.autocast(
+            device_type=device_type, dtype=torch.bfloat16, enabled=autocast_enabled
+        ),
+    ):
+        model_output = model(container)
+    preds_full = model_output["result"].to(torch.float32)
+    if hasattr(model, "scaler") and "scale_statistics" in model_output:
+        preds_full = model.scaler.inverse_scale(
+            preds_full, model_output["scale_statistics"]
+        )
+    preds_np = preds_full.detach().cpu().numpy()
+    model_quantiles = (
+        model.quantiles if getattr(model, "loss_type", None) == "quantile" else None
+    )
+    plot_with_library(
+        container=container,
+        predictions_np=preds_np,
+        model_quantiles=model_quantiles,
+        output_dir=output_dir,
+        show_plots=True,
+        save_plots=True,
+    )

gift_eval/submission/all_results.csv ADDED Viewed

	@@ -0,0 +1,98 @@

+dataset,model,eval_metrics/MSE[mean],eval_metrics/MSE[0.5],eval_metrics/MAE[0.5],eval_metrics/MASE[0.5],eval_metrics/MAPE[0.5],eval_metrics/sMAPE[0.5],eval_metrics/MSIS,eval_metrics/RMSE[mean],eval_metrics/NRMSE[mean],eval_metrics/ND[0.5],eval_metrics/mean_weighted_sum_quantile_loss,domain,num_variates
+bitbrains_fast_storage/5T/long,TempoPFN,3457994.418690934,3457994.418690934,327.16564021921977,0.9615742426354904,4.317236176373819,0.8336409912081381,19.490867974425026,1859.5683420328853,4.914283864546929,0.8646010960833215,0.6536844929905335,Web/CloudOps,2
+bitbrains_fast_storage/5T/medium,TempoPFN,2800287.942594441,2800287.942594441,268.74003348343393,1.0930763358543272,3.2742632396336617,0.824830859725203,26.62443937663418,1673.406090162947,5.084269857453093,0.8165064414207498,0.6178037781792698,Web/CloudOps,2
+bitbrains_fast_storage/5T/short,TempoPFN,1690352.428591824,1690352.428591824,162.1872711788372,0.8136291545842834,1.776193812092158,0.7668176303900445,20.32296781949572,1300.13554239234,4.082104033944751,0.5092279168950156,0.4037249136258121,Web/CloudOps,2
+bitbrains_fast_storage/H/short,TempoPFN,2588949.8055826887,2588949.8055826887,300.81926572581165,1.1822543303101407,3.726231648106852,0.593958917999151,29.76405600888225,1609.0213813317362,4.586233656611166,0.8574326339199428,0.6458078536940699,Web/CloudOps,2
+bitbrains_rnd/5T/long,TempoPFN,2990910.5081971884,2990910.5081971884,210.74577292406275,3.5186544815564527,2.529824679515041,0.7193304407946397,123.74784003341853,1729.4249067817857,6.625037355191929,0.8073195965870753,0.7073195661600815,Web/CloudOps,2
+bitbrains_rnd/5T/medium,TempoPFN,2289257.9838845544,2289257.9838845544,152.20154125488185,4.54810611155951,0.5130282516077612,0.7426683109827628,167.72825817863563,1513.0294061532825,6.366085897951346,0.6403894606996178,0.6193602612637874,Web/CloudOps,2
+bitbrains_rnd/5T/short,TempoPFN,2014077.5501842,2014077.5501842,135.98742661578987,1.8517397044034425,0.696232985412152,0.6817326560343352,63.2175943612252,1419.182000373525,5.806883078969316,0.5564212950558665,0.5257236486996486,Web/CloudOps,2
+bitbrains_rnd/H/short,TempoPFN,2035943.7507352629,2035943.7507352629,166.20220717027726,6.030992589065415,1.762054722799612,0.5701899273183543,206.5459296074726,1426.865007888014,6.244659394585995,0.7273821760777641,0.6502124112431411,Web/CloudOps,2
+bizitobs_application/10S/long,TempoPFN,5740816.497777778,5740816.497777778,1423.993611111111,3.481128281924532,0.0486669582790798,0.0496452288049222,25.03746966635237,2396.0001038768296,0.0924041469204081,0.0549177417154226,0.0475831149960796,Web/CloudOps,2
+bizitobs_application/10S/medium,TempoPFN,2146376.1066666665,2146376.1066666665,832.4885416666667,2.154386004052589,0.0297427558898925,0.0299385500168018,15.690585782887144,1465.051571333469,0.0575282633018517,0.0326893748710731,0.0238959442034563,Web/CloudOps,2
+bizitobs_application/10S/short,TempoPFN,376800.9244444444,376800.9244444444,333.5807638888889,1.045550170879607,0.0145058769649929,0.0146063554228645,8.24955441258388,613.8411231291403,0.0236733985259999,0.0128648767027747,0.0102197624925825,Web/CloudOps,2
+bizitobs_l2c/5T/long,TempoPFN,332.849126984127,332.849126984127,12.822042410714284,1.22095704272019,0.9288389476299502,1.153716245249532,10.349816564139786,18.24415322738019,1.0243819286593263,0.7199385123737686,0.573918947104382,Web/CloudOps,7
+bizitobs_l2c/5T/medium,TempoPFN,146.4688988095238,146.4688988095238,8.26147693452381,0.7818771866617815,0.5654391202343231,0.8614376093159631,7.573703014323006,12.10243359037858,0.636765980847373,0.4346751770356495,0.3544980423033946,Web/CloudOps,7
+bizitobs_l2c/5T/short,TempoPFN,20.892621721540177,20.892621721540177,2.659001813616072,0.274018680480048,0.1498092349119431,0.2086212109555511,2.4052935303554963,4.570844749227453,0.1569813038038852,0.0913208814604556,0.0723605673125024,Web/CloudOps,7
+bizitobs_l2c/H/long,TempoPFN,108.49439484126984,108.49439484126984,6.4422584170386905,0.6815875823888798,0.673632736325078,0.8544856513482197,5.284687159594512,10.416064268295862,0.6362432681202789,0.3935117376155234,0.3131677794751992,Web/CloudOps,7
+bizitobs_l2c/H/medium,TempoPFN,85.11841517857142,85.11841517857142,5.809039015997024,0.6083751129472635,0.6200869245381406,0.8834569194800722,3.7736867069280655,9.225964186933062,0.55864614495367,0.3517461358421966,0.2719450016284953,Web/CloudOps,7
+bizitobs_l2c/H/short,TempoPFN,58.60240730406746,58.60240730406746,4.6713135734437,0.4630005137848145,0.3948283755807854,0.6419900636889204,2.951036267389131,7.6552209180445905,0.4126360128422088,0.251795765049438,0.1985720701597805,Web/CloudOps,7
+bizitobs_service/10S/long,TempoPFN,137172.65608465608,137172.65608465608,82.30403439153439,1.662345455331064,0.1011724369109623,0.0943146465267703,24.580062468505552,370.36827089352033,0.2743806554539279,0.0609734598710921,0.0557345053106378,Web/CloudOps,2
+bizitobs_service/10S/medium,TempoPFN,7740.426666666666,7740.426666666666,30.32351686507937,1.4210776938224114,0.103039076063368,0.0716119877124027,22.67250742405877,87.9796946270369,0.0662155914274661,0.0228221933696611,0.0187131317044051,Web/CloudOps,2
+bizitobs_service/10S/short,TempoPFN,2474.4792460317462,2474.4792460317462,19.176820798197756,1.1598653331648996,0.0666156292466259,0.0541983465988417,24.57783862628469,49.7441378056927,0.0368520491464996,0.0142068025239332,0.0115698518326863,Web/CloudOps,2
+car_parts/M/short,TempoPFN,1.3818172451286372,1.3818172451286372,0.4672690957004653,0.8407515421652917,0.8293590893689794,1.893282740388816,25.816739632958576,1.175507228871281,2.8187457507850047,1.1204633588204531,1.016162704205562,Sales,1
+covid_deaths/D/short,TempoPFN,290323.0616541353,290323.0616541353,95.88298186873432,36.82875379020991,0.0973305795059633,0.3802577731558227,838.2070203544016,538.8163524375772,0.2027322652277888,0.0360764368473681,0.0296493204600527,Healthcare,1
+electricity/15T/long,TempoPFN,679367.7835053491,679367.7835053491,90.37124592952328,1.3619071064235215,0.2042962727803589,0.1999499070070441,10.487232652692931,824.2376984252473,1.3003359982860876,0.1425717174992594,0.1137251456887457,Energy,1
+electricity/15T/medium,TempoPFN,410301.82361416105,410301.82361416105,74.23413740058417,1.1823607410007584,0.1802587787943053,0.1831106137567377,9.124865260310653,640.5480650303778,1.1037288021425813,0.1279128921376865,0.1022478853910184,Energy,1
+electricity/15T/short,TempoPFN,197999.487064066,197999.487064066,69.58886888658678,1.2890658846498282,0.1882592086631687,0.2060175097324122,12.440409392978095,444.97133285647294,0.8771262285479021,0.1371733808638283,0.1124443744530128,Energy,1
+electricity/D/short,TempoPFN,2460923936.675748,2460923936.675748,5265.180427927928,1.564314345775582,0.5004102473254829,0.1020489007479963,12.14133452277077,49607.70037681396,0.8152193034530106,0.0865244042438215,0.0664914998664846,Energy,1
+electricity/H/long,TempoPFN,9660838.018114114,9660838.018114114,315.4014256756757,1.3657739992140687,0.3528330126922677,0.1644909658307117,11.137350730901831,3108.1888646145867,1.225868818973913,0.1243942340819344,0.1000007258954307,Energy,1
+electricity/H/medium,TempoPFN,7117041.892843468,7117041.892843468,278.22569995777025,1.2130144034395085,0.2467986882167905,0.1500124404017645,9.945132926951258,2667.778456477125,1.0413218399707818,0.1086006587630108,0.0872520518606711,Energy,1
+electricity/H/short,TempoPFN,2001828.2401337277,2001828.2401337277,199.38613109691724,1.0786172149769229,0.2218964961368299,0.1440280778205061,11.294129947761776,1414.8597952213242,0.6679148696019,0.0941244936083527,0.0771797324638773,Energy,1
+electricity/W/short,TempoPFN,76183092615.61081,76183092615.61081,34727.13657094594,1.570365713856254,0.1694176463278896,0.0993488831601272,11.91788945218548,276012.8486422521,0.6297702574229203,0.0792358683497086,0.0606708906232695,Energy,1
+ett1/15T/long,TempoPFN,11.819432043650794,11.819432043650794,1.9247003658234128,1.2507084318317785,0.7671729308733772,0.4743360534288272,8.737143903915339,3.437940087268944,0.6552747798713814,0.3668497927592424,0.288230653138245,Energy,7
+ett1/15T/medium,TempoPFN,11.408509540860615,11.408509540860615,1.8437986488947795,1.1818547993600037,0.7281965758824728,0.4556773517772796,8.31158410608761,3.377648522398477,0.6437831670148321,0.3514298559046792,0.277877086121562,Energy,7
+ett1/15T/short,TempoPFN,6.0998437790643605,6.0998437790643605,1.2401007697695778,0.7754559371204721,0.4745998734729215,0.2709062529464219,5.124313668126548,2.469786180839216,0.466892406241743,0.2344306713155297,0.1816328117134165,Energy,7
+ett1/D/short,TempoPFN,40213.09841269841,40213.09841269841,133.03087797619048,1.6324476425289194,1.3032025049603175,0.4616568808156475,10.165232559778676,200.532038369679,0.5267040504936318,0.3494100136837688,0.2686181176947919,Energy,7
+ett1/H/long,TempoPFN,164.89584986772488,164.89584986772488,7.56153480489418,1.3859274542018474,6981196483236.346,0.4785638067874847,10.00708086784301,12.841177900322265,0.6149361975860038,0.362105524659008,0.2816536526299331,Energy,7
+ett1/H/medium,TempoPFN,158.99996279761905,158.99996279761905,7.330310639880952,1.3479604957613165,4318128927434.1104,0.442596366665559,9.590349301595682,12.609518737748044,0.6039660567793922,0.3511045032102148,0.2758386706555789,Energy,7
+ett1/H/short,TempoPFN,104.70541410900296,104.70541410900296,5.155340358189174,0.8547778511929055,0.5003422782534644,0.2702135929272071,5.451980617457977,10.23256635009043,0.4775957088062226,0.2406208128310768,0.1863945925056479,Energy,7
+ett1/W/short,TempoPFN,1183244.5,1183244.5,833.8333565848214,1.5044807548071886,0.5491453238895961,0.5423834221847527,8.346203452067822,1087.770426146988,0.4328498151953185,0.3318021943103104,0.2579454864080913,Energy,7
+ett2/15T/long,TempoPFN,13.920968501984127,13.920968501984127,2.4372794208829367,1.0266229456075762,0.162421735656956,0.1912512110170139,7.240519836720884,3.731081411867627,0.1988360821855705,0.1298870321345517,0.1019580288018046,Energy,7
+ett2/15T/medium,TempoPFN,12.987088216145834,12.987088216145834,2.3273520817832343,0.9702315616517476,0.1522595897167077,0.1778896720653481,6.742950364573618,3.6037602883857063,0.1920508963553819,0.1240287970544381,0.096317793490544,Energy,7
+ett2/15T/short,TempoPFN,8.553860618954612,8.553860618954612,1.8131163824172247,0.7804718358045571,0.1129981226167575,0.1330301091075509,5.350648546766824,2.924698380851368,0.1388458481191596,0.0860750918808183,0.0685052637873467,Energy,7
+ett2/D/short,TempoPFN,170239.08571428573,170239.08571428573,246.5058531746032,2.290326347664065,0.5867415170820932,0.181941641118808,19.77345328878349,412.6003947093189,0.2361420919863594,0.1410818036093808,0.1199711796314696,Energy,7
+ett2/H/long,TempoPFN,218.90458002645505,218.90458002645505,8.95805431547619,0.9923323101716812,0.1756859628750919,0.1666816776540073,8.879245079301075,14.795424293559648,0.202611948300768,0.1226736592226554,0.1036227735159358,Energy,7
+ett2/H/medium,TempoPFN,243.5093005952381,243.5093005952381,9.681060500372023,1.0446103612297537,0.1729210287876511,0.1670725712727777,7.986726475131007,15.60478454177558,0.2102845067618488,0.1304585158995543,0.1055031195534691,Energy,7
+ett2/H/short,TempoPFN,108.53523995535714,108.53523995535714,6.435656592959449,0.7470950747417008,0.1237944578658914,0.1119298434719161,5.231412537431336,10.418024762658089,0.132129541137475,0.0816220321911712,0.0649374365945919,Energy,7
+ett2/W/short,TempoPFN,3383120.0,3383120.0,1295.83203125,0.8951468014830303,0.1438319001879011,0.1607407323784273,9.470720058706704,1839.3259634985852,0.1542912497518785,0.1087004411060343,0.0918377144034967,Energy,7
+hierarchical_sales/D/short,TempoPFN,29.123292892853453,29.123292892853453,2.330811869009017,0.7515128310580963,0.6410289944380209,1.0602027814719133,7.300803117167325,5.396600123490108,1.655617154145591,0.7150672692276139,0.5842758796416178,Sales,1
+hierarchical_sales/W/short,TempoPFN,437.0786163847325,437.0786163847325,8.912233902236162,0.7289483130473005,0.5386128782220985,0.4609518352185516,6.64654723456629,20.906425241650773,0.9598242089373876,0.4091650177495076,0.3427086959502182,Sales,1
+hospital/M/short,TempoPFN,3146.256471371143,3146.256471371143,18.09600954747936,0.768169214106615,0.1945593078361289,0.1746747839340947,5.195230182484194,56.09150088356652,0.2036251734486402,0.0656927168071784,0.0524846811483977,Healthcare,1
+jena_weather/10T/long,TempoPFN,1802.1015893683864,1802.1015893683864,11.05077453290344,0.6480390269860509,0.8421329927032432,0.6608396658167077,6.431036388936636,42.45116711432544,0.2597659641659084,0.0676215825489287,0.0568675921221853,Nature,21
+jena_weather/10T/medium,TempoPFN,1753.326057449495,1753.326057449495,10.45327409136003,0.6205262365569862,0.800809384246282,0.6752596791783848,5.983639047813428,41.8727364456814,0.2569722819618993,0.0641515679471918,0.0526090633894538,Nature,21
+jena_weather/10T/short,TempoPFN,967.4834108746242,967.4834108746242,6.240461833893307,0.29785829799349,0.3495092146199383,0.5688907620872866,2.7136900415583747,31.104395362627194,0.1931972955145112,0.0387610926048839,0.0316042761177553,Nature,21
+jena_weather/D/short,TempoPFN,407.6343998015873,407.6343998015873,10.096466548859128,1.2762104692374598,0.7647620393136633,0.4792207233136718,8.470860420630778,20.18995789499293,0.1215765454839294,0.0607972305335396,0.0502315688594873,Nature,21
+jena_weather/H/long,TempoPFN,1450.035582010582,1450.035582010582,12.834169353505292,1.14925431188062,3.6454664207292495,0.6876010515835866,8.49444712038228,38.07933274114164,0.2292797721719729,0.0772759188138019,0.0615505784712624,Nature,21
+jena_weather/H/medium,TempoPFN,1601.863988095238,1601.863988095238,11.43108181423611,0.830294830457016,2.907100446024271,0.6853773994144349,6.671981430906583,40.02329306910213,0.2449096285447175,0.069948816959045,0.0562073146926605,Nature,21
+jena_weather/H/short,TempoPFN,1161.0002492396357,1161.0002492396357,8.45698316633054,0.5343309068765919,1.2899701263024146,0.6088007276136064,4.785975199754376,34.073453732189165,0.2088680861946653,0.0518407644501047,0.0420641895205845,Nature,21
+kdd_cup_2018/D/short,TempoPFN,2957.558623620254,2957.558623620254,21.449108262527844,1.2022252741099626,0.5317497971257803,0.4686441782744058,9.261473936192472,54.38344071149098,1.2182468123931351,0.4804827982128498,0.3781022537751671,Nature,1
+kdd_cup_2018/H/long,TempoPFN,4099.393834111588,4099.393834111588,24.37712764418711,1.0361222519400666,1.067596580941062,0.6143228628291789,7.771131914708889,64.02650883900814,1.502320679809611,0.5719859420463782,0.4457075391120335,Nature,1
+kdd_cup_2018/H/medium,TempoPFN,5187.082299587964,5187.082299587964,25.7997948872936,1.076371675935021,0.996384455388961,0.5780413204727044,8.857767467515062,72.02140167747338,1.5075383869258192,0.5400364372465801,0.4275271421385803,Nature,1
+kdd_cup_2018/H/short,TempoPFN,4423.398133663605,4423.398133663605,22.99868414981208,0.975075924949626,0.940964873703854,0.5088272138094274,7.590540688579701,66.50863202369753,1.3921461175037204,0.4814041106039762,0.3836356598458785,Nature,1
+loop_seattle/5T/long,TempoPFN,157.56740910947713,157.56740910947713,7.634268714704449,1.191248365481325,0.294509001597258,0.1619458107627965,11.066948051479228,12.552585753918478,0.2219322959299876,0.1349754398667968,0.1102272021373501,Transport,1
+loop_seattle/5T/medium,TempoPFN,150.1369470378612,150.1369470378612,7.426822445518092,1.1582245306263297,0.2787350445364528,0.1604399422057661,10.01895938551111,12.253038277825675,0.2180067277247357,0.1321384314672467,0.1067471651057607,Transport,1
+loop_seattle/5T/short,TempoPFN,56.20837674450948,56.20837674450948,4.151334465349668,0.6528856304300188,0.1189866291357144,0.0868257500989403,5.418227383070815,7.497224602778649,0.1285608499805451,0.0711862209945109,0.0566841136858997,Transport,1
+loop_seattle/D/short,TempoPFN,19.32995043617776,19.32995043617776,2.972921541920633,0.8969007376581118,0.0550185047066986,0.0549326847443657,7.26916754640511,4.396583950771071,0.0785728296906184,0.0531300802196517,0.043017058136015,Transport,1
+loop_seattle/H/long,TempoPFN,89.40608606381149,89.40608606381149,5.467254720555985,1.12318926581471,0.184156507997936,0.123748771373072,7.909106132499233,9.455479155696526,0.1689820440045555,0.0977071455143036,0.0757696938381596,Transport,1
+loop_seattle/H/medium,TempoPFN,82.05160402154283,82.05160402154283,5.249757043545214,1.067239394992798,0.1469916750156965,0.1198292414396702,7.71532453928058,9.058234045416514,0.1601032741074667,0.0927888688596669,0.0730646137379731,Transport,1
+loop_seattle/H/short,TempoPFN,79.93021155830753,79.93021155830753,4.978952884492776,1.0019999078644888,0.1463370876655166,0.1132828703040474,7.663887880517498,8.94036976630763,0.1582536394917618,0.0881325309159338,0.0697681163822325,Transport,1
+m4_daily/D/short,TempoPFN,1107302.1502247455,1107302.1502247455,200.0984529723884,4.4308070498735,0.0375582349397479,0.0325003448180839,73.85257371713408,1052.284253528839,0.162548835061418,0.0309096808387832,0.0255332842361311,Econ/Fin,1
+m4_hourly/H/short,TempoPFN,1900790.397793584,1900790.397793584,269.87303079307173,0.8295739315523254,0.1211194871128469,0.1069586238356769,5.763527621155884,1378.691552811427,0.1882218572798091,0.036843631182058,0.028529335870705,Econ/Fin,1
+m4_monthly/M/short,TempoPFN,1888038.791054398,1888038.791054398,550.6457703179253,0.9239334080470228,0.157533313108815,0.1295610419100214,7.634234953833278,1374.059238553563,0.2855766798961609,0.1144430942087997,0.0913401022550916,Econ/Fin,1
+m4_quarterly/Q/short,TempoPFN,1796236.6248541668,1796236.6248541668,559.6329735514323,1.1747941926308771,0.1171090269982814,0.1022490358727294,8.829971053307407,1340.2375255357413,0.2243321614232385,0.0936727051500237,0.0748259755560803,Econ/Fin,1
+m4_weekly/W/short,TempoPFN,583423.8894364688,583423.8894364688,281.9014021319905,2.5068974606709102,0.0621606022892198,0.0619608453649078,34.89581976015151,763.8218964107201,0.1391565014546586,0.0513580627371238,0.0419254044427488,Econ/Fin,1
+m4_yearly/A/short,TempoPFN,3788840.647688692,3788840.647688692,918.094150872091,3.421401346947248,0.1715710473134636,0.1539546917894726,30.445192708697025,1946.4944509781403,0.3121294070682495,0.1472206523889813,0.1196218198038522,Econ/Fin,1
+m_dense/D/short,TempoPFN,9482.795555555556,9482.795555555556,45.650052083333335,0.6899203610078747,0.1267966234392374,0.0999539874368826,7.043961677013339,97.3796465158688,0.1685785724555744,0.0790269926834956,0.0660205335795425,Transport,1
+m_dense/H/long,TempoPFN,75216.58469135803,75216.58469135803,131.98908950617283,1.1099402431673535,0.4539421329801232,0.3155182786007836,8.274772567214445,274.2564214222851,0.4747784079532156,0.2284926254705489,0.1835932985711755,Transport,1
+m_dense/H/medium,TempoPFN,66764.08444444444,66764.08444444444,124.88760416666666,1.0367512901513452,0.4164061887564455,0.3037215745855758,7.849365647315136,258.3874695964269,0.4495615533094931,0.2172886533776566,0.1733313995334337,Transport,1
+m_dense/H/short,TempoPFN,66010.75111111111,66010.75111111111,120.28962565104166,1.0160056830633992,0.4109345557059414,0.2858404757066923,8.963124852594527,256.9255750428733,0.4557220010023848,0.213363846290401,0.1721874459654968,Transport,1
+restaurant/D/short,TempoPFN,141.10532587289566,141.10532587289566,7.168732057134789,0.6880883048170244,0.6529455322995288,0.3943859221952507,4.677912027140814,11.878776278425976,0.5466888071812649,0.3299216590546327,0.2581917858443854,Sales,1
+saugeen/D/short,TempoPFN,1375.7590625,1375.7590625,14.092150065104166,3.1309744145587413,0.3216047922770182,0.3610168146588098,30.500399370336144,37.09122621995665,1.20139365482093,0.4564480982807553,0.3755489541578553,Nature,1
+saugeen/M/short,TempoPFN,450.8665829613095,450.8665829613095,13.476651146298362,0.7895196884776998,0.3773554620288667,0.3776235164111268,4.73649049060767,21.23361916775634,0.6374969476769328,0.4046094969866529,0.3102559834358562,Nature,1
+saugeen/W/short,TempoPFN,1229.458984375,1229.458984375,16.6321533203125,1.349179934912743,0.4265083312988281,0.4078161463980466,9.738107739351932,35.0636419154514,1.058425187524238,0.5020553780303745,0.4004677568480909,Nature,1
+solar/10T/long,TempoPFN,38.03510999594485,38.03510999594485,2.95217954414411,1.289682582185996,1.980939740009238,1.613347773805571,10.516784658746062,6.167261142188228,1.3329548105705944,0.6380663692211938,0.5039360963008123,Energy,1
+solar/10T/medium,TempoPFN,31.73545226443265,31.73545226443265,2.5354660658571637,1.1083795980449829,2.376593457700527,1.55737387319841,8.328948817077988,5.6334227841013895,1.2500789187217176,0.5626300030254013,0.4351566003689724,Energy,1
+solar/10T/short,TempoPFN,23.310370868023877,23.310370868023877,2.089681713540479,0.9104617574218964,3.088664790072517,1.572870007326998,5.75677115505075,4.828081489372758,1.4044883551066518,0.6078881723531575,0.4620934487157311,Energy,1
+solar/D/short,TempoPFN,128838.1800486618,128838.1800486618,250.6613594890511,0.9782785201693562,1.044265071376977,0.4281686406100531,5.697261783834341,358.94035723036467,0.5185465709334212,0.3621202960611567,0.2800056850496608,Energy,1
+solar/H/long,TempoPFN,875.8664436334144,875.8664436334144,13.072592888280615,0.9956236178493916,4.22992235524107,1.4246637276180487,7.401661699548643,29.59504086216835,1.0261141373748248,0.4532506793040977,0.3659397066604597,Energy,1
+solar/H/medium,TempoPFN,969.7141119221412,969.7141119221412,12.987529225593066,0.9916768572605807,4.270079450594054,1.4254524509643212,7.490816829317306,31.140233010081044,1.1217438746826656,0.4678411157474254,0.3867255305832896,Energy,1
+solar/H/short,TempoPFN,687.7860367694727,687.7860367694727,11.829851353484486,0.9153026829231484,3.298143647835289,1.4320977852338796,6.105712982423255,26.22567514420692,0.9666871244547572,0.4360522627061222,0.338930893375135,Energy,1
+solar/W/short,TempoPFN,1837604.379562044,1837604.379562044,1080.8457458941605,1.1818483501196966,0.2449082423300638,0.2096709326072068,7.4416251763844,1355.5826716073218,0.2767083080725079,0.2206276340779138,0.160040923371332,Energy,1
+sz_taxi/15T/long,TempoPFN,17.335278668091167,17.335278668091167,2.8752947771990742,0.5435739715185243,8150111586328.506,0.4184713652630097,4.380632638022632,4.163565619525069,0.3853721873400869,0.266132142205346,0.2125054942462848,Transport,1
+sz_taxi/15T/medium,TempoPFN,17.73923068576389,17.73923068576389,2.88089839576656,0.5629010694446301,12249100752073.3,0.4169614441536142,4.14479162506023,4.211796610208509,0.3919779681855994,0.2681156770449617,0.2116726820657642,Transport,1
+sz_taxi/15T/short,TempoPFN,17.57054233201694,17.57054233201694,2.8061145880283456,0.5602273611005313,959958212480.042,0.4052746826217428,4.012449587804747,4.191723074347462,0.3919440456333012,0.2623837225491499,0.2066414499332957,Transport,1
+sz_taxi/H/short,TempoPFN,7.529553437844301,7.529553437844301,1.8911764356825087,0.5763698315699721,1.1282697131491115,0.3028206124743695,4.102259224272375,2.744003177447924,0.2556208258093829,0.1761747530810148,0.1390065711740581,Transport,1
+temperature_rain/D/short,TempoPFN,186.81455040605852,186.81455040605852,5.9566782469540085,1.385070112921999,28.13298940652906,1.5239891474214418,25.51129792837534,13.668011940514925,1.60908637363345,0.7012585181375972,0.572396651054853,Nature,1
+us_births/D/short,TempoPFN,188935.12,188935.12,266.77953125,0.3928744382386066,0.0256256341934204,0.0252893901831033,3.346242220924745,434.6666768916154,0.040746473841947,0.0250084162636546,0.0202561474459704,Healthcare,1
+us_births/M/short,TempoPFN,53081706.66666666,53081706.66666666,5884.473958333333,0.6657763993191178,0.0181355277697245,0.018380189816312,3.8133237509773754,7285.719365077594,0.0226293946745239,0.0182771359947467,0.0136913144696876,Healthcare,1
+us_births/W/short,TempoPFN,1710668.857142857,1710668.857142857,1002.7292131696428,0.9115505789348626,0.0135757678321429,0.0135867246503487,5.849062097782842,1307.9254019793548,0.0177549590839956,0.0136119507467408,0.0106687511144078,Healthcare,1

gift_eval/submission/config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "model": "TempoPFN",
+    "model_type": "pretrained",
+    "model_dtype": "float32",
+    "model_link": "https://github.com/automl/TempoPFN/tree/main"
+}

pyproject.toml ADDED Viewed

	@@ -0,0 +1,57 @@

+[project]
+name = "TempoPFN"
+version = "0.1.0"
+description = "Univariate Time Series Forecasting Using Linear RNNs"
+authors = [
+]
+readme = "README.md"
+license = { file = "LICENSE" }
+requires-python = ">=3.10,<3.13"
+dependencies = [
+    "torch>=2.5.0",
+    "torchmetrics",
+    "triton==3.2.0",
+    "numpy",
+    "gpytorch",
+    "flash-linear-attention @ git+https://github.com/fla-org/flash-linear-attention@main",
+    "scikit-learn",
+]
+classifiers = [
+    "Intended Audience :: Science/Research",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python",
+    "Topic :: Software Development",
+    "Topic :: Scientific/Engineering",
+    "Operating System :: POSIX",
+    "Operating System :: Unix",
+    "Operating System :: MacOS",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+]
+[project.optional-dependencies]
+dev = [
+    "wandb",
+    "ujson",
+    "build",
+    "pre-commit",
+    "pytest>=8",
+    "ruff",
+    "mypy",
+    "commitizen",
+    "black",
+    "matplotlib",
+    "gluonts",
+    "pyo",
+    "statsmodels"
+]
+[build-system]
+requires = ["setuptools>=68.2.2", "wheel>=0.41.2"]
+build-backend = "setuptools.build_meta"
+package-dir = {"" = "src"}

src/__init__.py ADDED Viewed

File without changes

src/data/__init__.py ADDED Viewed

File without changes

src/data/augmentations.py ADDED Viewed

	@@ -0,0 +1,1318 @@

+import logging
+import math
+from collections import Counter
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+import numpy as np
+import torch
+import torch.nn as nn
+from joblib import Parallel, delayed
+from torch.quasirandom import SobolEngine
+import torch.nn.functional as F
+from src.gift_eval.data import Dataset
+logger = logging.getLogger(__name__)
+def find_consecutive_nan_lengths(series: np.ndarray) -> list[int]:
+    """Finds the lengths of all consecutive NaN blocks in a 1D array."""
+    if series.ndim > 1:
+        # For multivariate series, flatten to treat it as one long sequence
+        series = series.flatten()
+    is_nan = np.isnan(series)
+    padded_is_nan = np.concatenate(([False], is_nan, [False]))
+    diffs = np.diff(padded_is_nan.astype(int))
+    start_indices = np.where(diffs == 1)[0]
+    end_indices = np.where(diffs == -1)[0]
+    return (end_indices - start_indices).tolist()
+def analyze_datasets_for_augmentation(gift_eval_path_str: str) -> dict:
+    """
+    Analyzes all datasets to derive statistics needed for NaN augmentation.
+    This version collects the full distribution of NaN ratios.
+    """
+    logger.info(
+        "--- Starting Dataset Analysis for Augmentation (Full Distribution) ---"
+    )
+    path = Path(gift_eval_path_str)
+    if not path.exists():
+        raise FileNotFoundError(
+            f"Provided raw data path for augmentation analysis does not exist: {gift_eval_path_str}"
+        )
+    dataset_names = []
+    for dataset_dir in path.iterdir():
+        if dataset_dir.name.startswith(".") or not dataset_dir.is_dir():
+            continue
+        freq_dirs = [d for d in dataset_dir.iterdir() if d.is_dir()]
+        if freq_dirs:
+            for freq_dir in freq_dirs:
+                dataset_names.append(f"{dataset_dir.name}/{freq_dir.name}")
+        else:
+            dataset_names.append(dataset_dir.name)
+    total_series_count = 0
+    series_with_nans_count = 0
+    nan_ratio_distribution = []
+    all_consecutive_nan_lengths = Counter()
+    for ds_name in sorted(dataset_names):
+        try:
+            ds = Dataset(name=ds_name, term="short", to_univariate=False)
+            for series_data in ds.training_dataset:
+                total_series_count += 1
+                target = np.atleast_1d(series_data["target"])
+                num_nans = np.isnan(target).sum()
+                if num_nans > 0:
+                    series_with_nans_count += 1
+                    nan_ratio = num_nans / target.size
+                    nan_ratio_distribution.append(float(nan_ratio))
+                    nan_lengths = find_consecutive_nan_lengths(target)
+                    all_consecutive_nan_lengths.update(nan_lengths)
+        except Exception as e:
+            logger.warning(
+                f"Could not process {ds_name} for augmentation analysis: {e}"
+            )
+    if total_series_count == 0:
+        raise ValueError(
+            "No series were found during augmentation analysis. Check dataset path."
+        )
+    p_series_has_nan = (
+        series_with_nans_count / total_series_count if total_series_count > 0 else 0
+    )
+    logger.info("--- Augmentation Analysis Complete ---")
+    # Print summary statistics
+    logger.info(f"Total series analyzed: {total_series_count}")
+    logger.info(f"Series with NaNs: {series_with_nans_count} ({p_series_has_nan:.4f})")
+    logger.info(f"NaN ratio distribution: {Counter(nan_ratio_distribution)}")
+    logger.info(f"Consecutive NaN lengths distribution: {all_consecutive_nan_lengths}")
+    logger.info("--- End of Dataset Analysis for Augmentation ---")
+    return {
+        "p_series_has_nan": p_series_has_nan,
+        "nan_ratio_distribution": nan_ratio_distribution,
+        "nan_length_distribution": all_consecutive_nan_lengths,
+    }
+class NanAugmenter:
+    """
+    Applies realistic NaN augmentation by generating and caching NaN patterns on-demand
+    during the first transform call for a given data shape.
+    """
+    def __init__(
+        self,
+        p_series_has_nan: float,
+        nan_ratio_distribution: List[float],
+        nan_length_distribution: Counter,
+        num_patterns: int = 100000,
+        n_jobs: int = -1,
+        nan_patterns_path: Optional[str] = None,
+    ):
+        """
+        Initializes the augmenter. NaN patterns are not generated at this stage.
+        Args:
+            p_series_has_nan (float): Probability that a series in a batch will be augmented.
+            nan_ratio_distribution (List[float]): A list of NaN ratios observed in the dataset.
+            nan_length_distribution (Counter): A Counter of consecutive NaN block lengths.
+            num_patterns (int): The number of unique NaN patterns to generate per data shape.
+            n_jobs (int): The number of CPU cores to use for parallel pattern generation (-1 for all cores).
+        """
+        self.p_series_has_nan = p_series_has_nan
+        self.nan_ratio_distribution = nan_ratio_distribution
+        self.num_patterns = num_patterns
+        self.n_jobs = n_jobs
+        self.max_length = 2048
+        self.nan_patterns_path = nan_patterns_path
+        # Cache to store patterns: Dict[shape_tuple -> pattern_tensor]
+        self.pattern_cache: Dict[Tuple[int, ...], torch.BoolTensor] = {}
+        if not nan_length_distribution or sum(nan_length_distribution.values()) == 0:
+            self._has_block_distribution = False
+            logger.warning("NaN length distribution is empty. Augmentation disabled.")
+        else:
+            self._has_block_distribution = True
+            total_blocks = sum(nan_length_distribution.values())
+            self.dist_lengths = list(int(i) for i in nan_length_distribution.keys())
+            self.dist_probs = [
+                count / total_blocks for count in nan_length_distribution.values()
+            ]
+        if not self.nan_ratio_distribution:
+            logger.warning("NaN ratio distribution is empty. Augmentation disabled.")
+        # Try to load existing patterns from disk
+        self._load_existing_patterns()
+    def _load_existing_patterns(self):
+        """Load existing NaN patterns from disk if they exist."""
+        # Determine where to look for patterns
+        explicit_path: Optional[Path] = (
+            Path(self.nan_patterns_path).resolve()
+            if self.nan_patterns_path is not None
+            else None
+        )
+        candidate_files: List[Path] = []
+        if explicit_path is not None:
+            # If the explicit path exists, use it directly
+            if explicit_path.is_file():
+                candidate_files.append(explicit_path)
+            # Also search the directory of the explicit path for matching files
+            explicit_dir = explicit_path.parent
+            explicit_dir.mkdir(exist_ok=True, parents=True)
+            candidate_files.extend(
+                list(explicit_dir.glob(f"nan_patterns_{self.max_length}_*.pt"))
+            )
+        else:
+            # Default to the ./data directory
+            data_dir = Path("data")
+            data_dir.mkdir(exist_ok=True)
+            candidate_files.extend(
+                list(data_dir.glob(f"nan_patterns_{self.max_length}_*.pt"))
+            )
+        # De-duplicate candidate files while preserving order
+        seen: set[str] = set()
+        unique_candidates: List[Path] = []
+        for f in candidate_files:
+            key = str(f.resolve())
+            if key not in seen:
+                seen.add(key)
+                unique_candidates.append(f)
+        for pattern_file in unique_candidates:
+            try:
+                # Extract num_channels from filename
+                filename = pattern_file.stem
+                parts = filename.split("_")
+                if len(parts) >= 4:
+                    num_channels = int(parts[-1])
+                    # Load patterns
+                    patterns = torch.load(pattern_file, map_location="cpu")
+                    cache_key = (self.max_length, num_channels)
+                    self.pattern_cache[cache_key] = patterns
+                    logger.info(
+                        f"Loaded {patterns.shape[0]} patterns for shape {cache_key} from {pattern_file}"
+                    )
+            except (ValueError, RuntimeError, FileNotFoundError) as e:
+                logger.warning(f"Failed to load patterns from {pattern_file}: {e}")
+    def _get_pattern_file_path(self, num_channels: int) -> Path:
+        """Resolve the target file path for storing/loading patterns for a given channel count."""
+        # If user provided a file path, use its directory as the base directory
+        if self.nan_patterns_path is not None:
+            base_dir = Path(self.nan_patterns_path).resolve().parent
+            base_dir.mkdir(exist_ok=True, parents=True)
+        else:
+            base_dir = Path("data").resolve()
+            base_dir.mkdir(exist_ok=True, parents=True)
+        return base_dir / f"nan_patterns_{self.max_length}_{num_channels}.pt"
+    def _generate_nan_mask(self, series_shape: Tuple[int, ...]) -> np.ndarray:
+        """Generates a single boolean NaN mask for a given series shape."""
+        series_size = int(np.prod(series_shape))
+        sampled_ratio = np.random.choice(self.nan_ratio_distribution)
+        n_nans_to_add = int(round(series_size * sampled_ratio))
+        if n_nans_to_add == 0:
+            return np.zeros(series_shape, dtype=bool)
+        mask_flat = np.zeros(series_size, dtype=bool)
+        nans_added = 0
+        max_attempts = n_nans_to_add * 2
+        attempts = 0
+        while nans_added < n_nans_to_add and attempts < max_attempts:
+            attempts += 1
+            block_length = np.random.choice(self.dist_lengths, p=self.dist_probs)
+            if nans_added + block_length > n_nans_to_add:
+                block_length = n_nans_to_add - nans_added
+            if block_length <= 0:
+                break
+            nan_counts_in_window = np.convolve(
+                mask_flat, np.ones(block_length), mode="valid"
+            )
+            valid_starts = np.where(nan_counts_in_window == 0)[0]
+            if valid_starts.size == 0:
+                continue
+            start_pos = np.random.choice(valid_starts)
+            mask_flat[start_pos : start_pos + block_length] = True
+            nans_added += block_length
+        return mask_flat.reshape(series_shape)
+    def _pregenerate_patterns(self, series_shape: Tuple[int, ...]) -> torch.BoolTensor:
+        """Uses joblib to parallelize the generation of NaN masks for a given shape."""
+        if not self._has_block_distribution or not self.nan_ratio_distribution:
+            return torch.empty(0, *series_shape, dtype=torch.bool)
+        logger.info(
+            f"Generating {self.num_patterns} NaN patterns for shape {series_shape}..."
+        )
+        with Parallel(n_jobs=self.n_jobs, backend="loky") as parallel:
+            masks_list = parallel(
+                delayed(self._generate_nan_mask)(series_shape)
+                for _ in range(self.num_patterns)
+            )
+        logger.info(f"Pattern generation complete for shape {series_shape}.")
+        return torch.from_numpy(np.stack(masks_list)).bool()
+    def transform(self, time_series_batch: torch.Tensor) -> torch.Tensor:
+        """
+        Applies NaN patterns to a batch, generating them on-demand if the shape is new.
+        """
+        if self.p_series_has_nan == 0:
+            return time_series_batch
+        history_length, num_channels = time_series_batch.shape[1:]
+        assert history_length <= self.max_length, (
+            f"History length {history_length} exceeds maximum allowed {self.max_length}."
+        )
+        # 1. Check cache and generate patterns if the shape is new
+        if (
+            self.max_length,
+            num_channels,
+        ) not in self.pattern_cache:
+            # Try loading from a resolved file path if available
+            target_file = self._get_pattern_file_path(num_channels)
+            if target_file.exists():
+                try:
+                    patterns = torch.load(target_file, map_location="cpu")
+                    self.pattern_cache[(self.max_length, num_channels)] = patterns
+                    logger.info(
+                        f"Loaded NaN patterns from {target_file} for shape {(self.max_length, num_channels)}"
+                    )
+                except (RuntimeError, FileNotFoundError):
+                    # Fall back to generating if loading fails
+                    patterns = self._pregenerate_patterns(
+                        (self.max_length, num_channels)
+                    )
+                    torch.save(patterns, target_file)
+                    self.pattern_cache[(self.max_length, num_channels)] = patterns
+                    logger.info(
+                        f"Generated and saved {patterns.shape[0]} NaN patterns to {target_file}"
+                    )
+            else:
+                patterns = self._pregenerate_patterns((self.max_length, num_channels))
+                torch.save(patterns, target_file)
+                self.pattern_cache[(self.max_length, num_channels)] = patterns
+                logger.info(
+                    f"Generated and saved {patterns.shape[0]} NaN patterns to {target_file}"
+                )
+        patterns = self.pattern_cache[(self.max_length, num_channels)][
+            :, :history_length, :
+        ]
+        # Early exit if patterns are empty (e.g., generation failed or was disabled)
+        if patterns.numel() == 0:
+            return time_series_batch
+        batch_size = time_series_batch.shape[0]
+        device = time_series_batch.device
+        # 2. Vectorized decision on which series to augment
+        augment_mask = torch.rand(batch_size, device=device) < self.p_series_has_nan
+        indices_to_augment = torch.where(augment_mask)[0]
+        num_to_augment = indices_to_augment.numel()
+        if num_to_augment == 0:
+            return time_series_batch
+        # 3. Randomly sample patterns for each series being augmented
+        pattern_indices = torch.randint(
+            0, patterns.shape[0], (num_to_augment,), device=device
+        )
+        # 4. Select patterns and apply them in a single vectorized operation
+        selected_patterns = patterns[pattern_indices].to(device)
+        time_series_batch[indices_to_augment] = time_series_batch[
+            indices_to_augment
+        ].masked_fill(selected_patterns, float("nan"))
+        return time_series_batch
+class CensorAugmenter:
+    """
+    Applies censor augmentation by clipping values from above, below, or both.
+    """
+    def __init__(self):
+        """Initializes the CensorAugmenter."""
+        pass
+    def transform(self, time_series_batch: torch.Tensor) -> torch.Tensor:
+        """
+        Applies a vectorized censor augmentation to a batch of time series.
+        """
+        batch_size, seq_len, num_channels = time_series_batch.shape
+        assert num_channels == 1
+        time_series_batch = time_series_batch.squeeze(-1)
+        with torch.no_grad():
+            batch_size, seq_len = time_series_batch.shape
+            device = time_series_batch.device
+            # Step 1: Choose an op mode for each series
+            op_mode = torch.randint(0, 3, (batch_size, 1), device=device)
+            # Step 2: Calculate potential thresholds for all series
+            q1 = torch.rand(batch_size, device=device)
+            q2 = torch.rand(batch_size, device=device)
+            q_low = torch.minimum(q1, q2)
+            q_high = torch.maximum(q1, q2)
+            sorted_series = torch.sort(time_series_batch, dim=1).values
+            indices_low = (q_low * (seq_len - 1)).long()
+            indices_high = (q_high * (seq_len - 1)).long()
+            c_low = torch.gather(sorted_series, 1, indices_low.unsqueeze(1))
+            c_high = torch.gather(sorted_series, 1, indices_high.unsqueeze(1))
+            # Step 3: Compute results for all possible clipping operations
+            clip_above = torch.minimum(time_series_batch, c_high)
+            clip_below = torch.maximum(time_series_batch, c_low)
+            # Step 4: Select the final result based on the op_mode
+            result = torch.where(
+                op_mode == 1,
+                clip_above,
+                torch.where(op_mode == 2, clip_below, time_series_batch),
+            )
+            augmented_batch = torch.where(
+                op_mode == 0,
+                time_series_batch,
+                result,
+            )
+        return augmented_batch.unsqueeze(-1)
+class QuantizationAugmenter:
+    """
+    Applies non-equidistant quantization using a Sobol sequence to generate
+    uniformly distributed levels. This implementation is fully vectorized.
+    """
+    def __init__(
+        self,
+        p_quantize: float,
+        level_range: Tuple[int, int],
+        seed: Optional[int] = None,
+    ):
+        """
+        Initializes the augmenter.
+        Args:
+            p_quantize (float): Probability of applying quantization to a series.
+            level_range (Tuple[int, int]): Inclusive range [min, max] to sample the
+                                           number of quantization levels from.
+            seed (Optional[int]): Seed for the Sobol sequence generator for reproducibility.
+        """
+        assert 0.0 <= p_quantize <= 1.0, "Probability must be between 0 and 1."
+        assert level_range[0] >= 2, "Minimum number of levels must be at least 2."
+        assert level_range[0] <= level_range[1], (
+            "Min levels cannot be greater than max."
+        )
+        self.p_quantize = p_quantize
+        self.level_range = level_range
+        # Initialize a SobolEngine. The dimension is the max number of random
+        # levels we might need to generate for a single series.
+        max_intermediate_levels = self.level_range[1] - 2
+        if max_intermediate_levels > 0:
+            # SobolEngine must be created on CPU
+            self.sobol_engine = SobolEngine(
+                dimension=max_intermediate_levels, scramble=True, seed=seed
+            )
+        else:
+            self.sobol_engine = None
+    def transform(self, time_series_batch: torch.Tensor) -> torch.Tensor:
+        """
+        Applies augmentation in a fully vectorized way on the batch's device.
+        Handles input shape (batch, length, 1).
+        """
+        # Handle input shape (batch, length, 1)
+        if time_series_batch.dim() == 3 and time_series_batch.shape[2] == 1:
+            is_3d = True
+            time_series_squeezed = time_series_batch.squeeze(-1)
+        else:
+            is_3d = False
+            time_series_squeezed = time_series_batch
+        if self.p_quantize == 0 or self.sobol_engine is None:
+            return time_series_batch
+        n_series, _ = time_series_squeezed.shape
+        device = time_series_squeezed.device
+        # 1. Decide which series to augment
+        augment_mask = torch.rand(n_series, device=device) < self.p_quantize
+        n_augment = torch.sum(augment_mask)
+        if n_augment == 0:
+            return time_series_batch
+        series_to_augment = time_series_squeezed[augment_mask]
+        # 2. Determine a variable n_levels for EACH series
+        min_l, max_l = self.level_range
+        n_levels_per_series = torch.randint(
+            min_l, max_l + 1, size=(n_augment,), device=device
+        )
+        max_levels_in_batch = n_levels_per_series.max().item()
+        # 3. Find min/max for each series
+        min_vals = torch.amin(series_to_augment, dim=1, keepdim=True)
+        max_vals = torch.amax(series_to_augment, dim=1, keepdim=True)
+        value_range = max_vals - min_vals
+        is_flat = value_range == 0
+        # 4. Generate quasi-random levels using the Sobol sequence
+        num_intermediate_levels = max_levels_in_batch - 2
+        if num_intermediate_levels > 0:
+            # Draw points from the Sobol engine (on CPU) and move to target device
+            sobol_points = self.sobol_engine.draw(n_augment).to(device)
+            # We only need the first `num_intermediate_levels` dimensions
+            quasi_rand_points = sobol_points[:, :num_intermediate_levels]
+        else:
+            # Handle case where max_levels_in_batch is 2 (no intermediate points needed)
+            quasi_rand_points = torch.empty(n_augment, 0, device=device)
+        scaled_quasi_rand_levels = min_vals + value_range * quasi_rand_points
+        level_values = torch.cat([min_vals, max_vals, scaled_quasi_rand_levels], dim=1)
+        level_values, _ = torch.sort(level_values, dim=1)
+        # 5. Find the closest level using a mask to ignore padded values
+        series_expanded = series_to_augment.unsqueeze(2)
+        levels_expanded = level_values.unsqueeze(1)
+        diff = torch.abs(series_expanded - levels_expanded)
+        arange_mask = torch.arange(max_levels_in_batch, device=device).unsqueeze(0)
+        valid_levels_mask = arange_mask < n_levels_per_series.unsqueeze(1)
+        masked_diff = torch.where(valid_levels_mask.unsqueeze(1), diff, float("inf"))
+        closest_level_indices = torch.argmin(masked_diff, dim=2)
+        # 6. Gather the results from the original level values
+        quantized_subset = torch.gather(level_values, 1, closest_level_indices)
+        # 7. For flat series, revert to their original values
+        final_subset = torch.where(is_flat, series_to_augment, quantized_subset)
+        # 8. Place augmented data back into a copy of the original batch
+        augmented_batch_squeezed = time_series_squeezed.clone()
+        augmented_batch_squeezed[augment_mask] = final_subset
+        # Restore original shape before returning
+        if is_3d:
+            return augmented_batch_squeezed.unsqueeze(-1)
+        else:
+            return augmented_batch_squeezed
+class MixUpAugmenter:
+    """
+    Applies mixup augmentation by creating a weighted average of multiple time series.
+    This version includes an option for time-dependent mixup using Simplex Path
+    Interpolation, creating a smooth transition between different mixing weights.
+    """
+    def __init__(
+        self,
+        max_n_series_to_combine: int = 10,
+        p_combine: float = 0.4,
+        p_time_dependent: float = 0.5,
+        randomize_k_per_series: bool = True,
+        dirichlet_alpha_range: Tuple[float, float] = (0.1, 5.0),
+    ):
+        """
+        Initializes the augmenter.
+        Args:
+            max_n_series_to_combine (int): The maximum number of series to combine.
+                The actual number k will be sampled from [2, max].
+            p_combine (float): The probability of replacing a series with a combination.
+            p_time_dependent (float): The probability of using the time-dependent
+                simplex path method for a given mixup operation. Defaults to 0.5.
+            randomize_k_per_series (bool): If True, each augmented series will be a
+                combination of a different number of series (k).
+                If False, one k is chosen for the whole batch.
+            dirichlet_alpha_range (Tuple[float, float]): The [min, max] range to sample the
+                Dirichlet 'alpha' from. A smaller alpha (e.g., 0.2) creates mixes
+                dominated by one series. A larger alpha (e.g., 5.0) creates
+                more uniform weights.
+        """
+        assert max_n_series_to_combine >= 2, "Must combine at least 2 series."
+        assert 0.0 <= p_combine <= 1.0, "p_combine must be between 0 and 1."
+        assert 0.0 <= p_time_dependent <= 1.0, (
+            "p_time_dependent must be between 0 and 1."
+        )
+        assert (
+            dirichlet_alpha_range[0] > 0
+            and dirichlet_alpha_range[0] <= dirichlet_alpha_range[1]
+        )
+        self.max_k = max_n_series_to_combine
+        self.p_combine = p_combine
+        self.p_time_dependent = p_time_dependent
+        self.randomize_k = randomize_k_per_series
+        self.alpha_range = dirichlet_alpha_range
+    def _sample_alpha(self) -> float:
+        log_alpha_min = math.log10(self.alpha_range[0])
+        log_alpha_max = math.log10(self.alpha_range[1])
+        log_alpha = log_alpha_min + np.random.rand() * (log_alpha_max - log_alpha_min)
+        return float(10**log_alpha)
+    def _sample_k(self) -> int:
+        return int(torch.randint(2, self.max_k + 1, (1,)).item())
+    def _static_mix(
+        self,
+        source_series: torch.Tensor,
+        alpha: float,
+        return_weights: bool = False,
+    ):
+        """Mixes k source series using a single, static set of Dirichlet weights."""
+        k = int(source_series.shape[0])
+        device = source_series.device
+        concentration = torch.full((k,), float(alpha), device=device)
+        weights = torch.distributions.Dirichlet(concentration).sample()
+        weights_view = weights.view(k, 1, 1)
+        mixed_series = (source_series * weights_view).sum(dim=0, keepdim=True)
+        if return_weights:
+            return mixed_series, weights
+        return mixed_series
+    def _simplex_path_mix(
+        self,
+        source_series: torch.Tensor,
+        alpha: float,
+        return_weights: bool = False,
+    ):
+        """Mixes k series using time-varying weights interpolated along a simplex path."""
+        k, length, _ = source_series.shape
+        device = source_series.device
+        # 1. Sample two endpoint weight vectors from the Dirichlet distribution
+        concentration = torch.full((k,), float(alpha), device=device)
+        dirichlet_dist = torch.distributions.Dirichlet(concentration)
+        w_start = dirichlet_dist.sample()
+        w_end = dirichlet_dist.sample()
+        # 2. Create a linear ramp from 0 to 1
+        alpha_ramp = torch.linspace(0, 1, length, device=device)
+        # 3. Interpolate between the endpoint weights over time
+        # Reshape for broadcasting: w vectors become [k, 1], ramp becomes [1, length]
+        time_varying_weights = w_start.unsqueeze(1) * (
+            1 - alpha_ramp.unsqueeze(0)
+        ) + w_end.unsqueeze(1) * alpha_ramp.unsqueeze(0)
+        # The result `time_varying_weights` has shape [k, length]
+        # 4. Apply the time-varying weights
+        weights_view = time_varying_weights.unsqueeze(-1)  # Shape: [k, length, 1]
+        mixed_series = (source_series * weights_view).sum(dim=0, keepdim=True)
+        if return_weights:
+            return mixed_series, time_varying_weights
+        return mixed_series
+    def transform(
+        self, time_series_batch: torch.Tensor, return_debug_info: bool = False
+    ):
+        """
+        Applies the mixup augmentation, randomly choosing between static and
+        time-dependent mixing methods.
+        """
+        with torch.no_grad():
+            if self.p_combine == 0:
+                return (
+                    (time_series_batch, {}) if return_debug_info else time_series_batch
+                )
+            batch_size, _, _ = time_series_batch.shape
+            device = time_series_batch.device
+            if batch_size <= self.max_k:
+                return (
+                    (time_series_batch, {}) if return_debug_info else time_series_batch
+                )
+            # 1. Decide which series to replace
+            augment_mask = torch.rand(batch_size, device=device) < self.p_combine
+            indices_to_replace = torch.where(augment_mask)[0]
+            n_augment = indices_to_replace.numel()
+            if n_augment == 0:
+                return (
+                    (time_series_batch, {}) if return_debug_info else time_series_batch
+                )
+            # 2. Determine k for each series to augment
+            if self.randomize_k:
+                k_values = torch.randint(2, self.max_k + 1, (n_augment,), device=device)
+            else:
+                k = self._sample_k()
+                k_values = torch.full((n_augment,), k, device=device)
+            # 3. Augment series one by one
+            new_series_list = []
+            all_batch_indices = torch.arange(batch_size, device=device)
+            debug_info = {}
+            for i, target_idx in enumerate(indices_to_replace):
+                current_k = k_values[i].item()
+                # Sample source indices
+                candidate_mask = all_batch_indices != target_idx
+                candidates = all_batch_indices[candidate_mask]
+                perm = torch.randperm(candidates.shape[0], device=device)
+                source_indices = candidates[perm[:current_k]]
+                source_series = time_series_batch[source_indices]
+                alpha = self._sample_alpha()
+                mix_type = "static"
+                # Randomly choose between static and time-dependent mixup
+                if torch.rand(1).item() < self.p_time_dependent:
+                    mixed_series, weights = self._simplex_path_mix(
+                        source_series, alpha=alpha, return_weights=True
+                    )
+                    mix_type = "simplex"
+                else:
+                    mixed_series, weights = self._static_mix(
+                        source_series, alpha=alpha, return_weights=True
+                    )
+                new_series_list.append(mixed_series)
+                if return_debug_info:
+                    debug_info[target_idx.item()] = {
+                        "source_indices": source_indices.cpu().numpy(),
+                        "weights": weights.cpu().numpy(),
+                        "alpha": alpha,
+                        "k": current_k,
+                        "mix_type": mix_type,
+                    }
+            # 4. Place augmented series back into a clone of the original batch
+            augmented_batch = time_series_batch.clone()
+            if new_series_list:
+                new_series_tensor = torch.cat(new_series_list, dim=0)
+                augmented_batch[indices_to_replace] = new_series_tensor
+            if return_debug_info:
+                return augmented_batch.detach(), debug_info
+            return augmented_batch.detach()
+class TimeFlipAugmenter:
+    """
+    Applies time-reversal augmentation to a random subset of time series in a batch.
+    """
+    def __init__(self, p_flip: float = 0.5):
+        """
+        Initializes the TimeFlipAugmenter.
+        Args:
+            p_flip (float): The probability of flipping a single time series in the batch.
+                            Defaults to 0.5.
+        """
+        assert 0.0 <= p_flip <= 1.0, "Probability must be between 0 and 1."
+        self.p_flip = p_flip
+    def transform(self, time_series_batch: torch.Tensor) -> torch.Tensor:
+        """
+        Applies time-reversal augmentation to a batch of time series.
+        Args:
+            time_series_batch (torch.Tensor): The input batch of time series with
+                                              shape (batch_size, seq_len, num_channels).
+        Returns:
+            torch.Tensor: The batch with some series potentially flipped.
+        """
+        with torch.no_grad():
+            if self.p_flip == 0:
+                return time_series_batch
+            batch_size = time_series_batch.shape[0]
+            device = time_series_batch.device
+            # 1. Decide which series in the batch to flip
+            flip_mask = torch.rand(batch_size, device=device) < self.p_flip
+            indices_to_flip = torch.where(flip_mask)[0]
+            if indices_to_flip.numel() == 0:
+                return time_series_batch
+            # 2. Select the series to be flipped
+            series_to_flip = time_series_batch[indices_to_flip]
+            # 3. Flip them along the time dimension (dim=1)
+            flipped_series = torch.flip(series_to_flip, dims=[1])
+            # 4. Create a copy of the batch and place the flipped series into it
+            augmented_batch = time_series_batch.clone()
+            augmented_batch[indices_to_flip] = flipped_series
+            return augmented_batch
+class YFlipAugmenter:
+    """
+    Applies y-reversal augmentation to a random subset of time series in a batch.
+    """
+    def __init__(self, p_flip: float = 0.5):
+        """
+        Initializes the TimeFlipAugmenter.
+        Args:
+            p_flip (float): The probability of flipping a single time series in the batch.
+                            Defaults to 0.5.
+        """
+        assert 0.0 <= p_flip <= 1.0, "Probability must be between 0 and 1."
+        self.p_flip = p_flip
+    def transform(self, time_series_batch: torch.Tensor) -> torch.Tensor:
+        """
+        Applies time-reversal augmentation to a batch of time series.
+        Args:
+            time_series_batch (torch.Tensor): The input batch of time series with
+                                              shape (batch_size, seq_len, num_channels).
+        Returns:
+            torch.Tensor: The batch with some series potentially flipped.
+        """
+        with torch.no_grad():
+            if self.p_flip == 0:
+                return time_series_batch
+            batch_size = time_series_batch.shape[0]
+            device = time_series_batch.device
+            # 1. Decide which series in the batch to flip
+            flip_mask = torch.rand(batch_size, device=device) < self.p_flip
+            indices_to_flip = torch.where(flip_mask)[0]
+            if indices_to_flip.numel() == 0:
+                return time_series_batch
+            # 2. Select the series to be flipped
+            series_to_flip = time_series_batch[indices_to_flip]
+            # 3. Flip them along the time dimension (dim=1)
+            flipped_series = -series_to_flip
+            # 4. Create a copy of the batch and place the flipped series into it
+            augmented_batch = time_series_batch.clone()
+            augmented_batch[indices_to_flip] = flipped_series
+            return augmented_batch
+class DifferentialAugmenter:
+    """
+    Applies calculus-inspired augmentations. This version includes up to the
+    fourth derivative and uses nn.Conv1d with built-in 'reflect' padding for
+    cleaner and more efficient convolutions.
+    The Gaussian kernel size and sigma for the initial smoothing are randomly
+    sampled at every transform() call from user-defined ranges.
+    """
+    def __init__(
+        self,
+        p_transform: float,
+        gaussian_kernel_size_range: Tuple[int, int] = (5, 51),
+        gaussian_sigma_range: Tuple[float, float] = (2.0, 20.0),
+    ):
+        """
+        Initializes the augmenter.
+        Args:
+            p_transform (float): The probability of applying an augmentation to any given
+                                 time series in a batch.
+            gaussian_kernel_size_range (Tuple[int, int]): The [min, max] inclusive range
+                                                           for the Gaussian kernel size.
+                                                           Sizes will be forced to be odd.
+            gaussian_sigma_range (Tuple[float, float]): The [min, max] inclusive range
+                                                        for the Gaussian sigma.
+        """
+        self.p_transform = p_transform
+        self.kernel_size_range = gaussian_kernel_size_range
+        self.sigma_range = gaussian_sigma_range
+        # Validate ranges
+        if not (
+            self.kernel_size_range[0] <= self.kernel_size_range[1]
+            and self.kernel_size_range[0] >= 3
+        ):
+            raise ValueError(
+                "Invalid kernel size range. Ensure min <= max and min >= 3."
+            )
+        if not (self.sigma_range[0] <= self.sigma_range[1] and self.sigma_range[0] > 0):
+            raise ValueError("Invalid sigma range. Ensure min <= max and min > 0.")
+        # Cache for fixed-kernel convolution layers (Sobel, Laplace, etc.)
+        self.conv_cache: Dict[Tuple[int, torch.device], Dict[str, nn.Module]] = {}
+    def _create_fixed_kernel_layers(
+        self, num_channels: int, device: torch.device
+    ) -> dict:
+        """
+        Creates and configures nn.Conv1d layers for fixed-kernel derivative operations.
+        These layers are cached to improve performance.
+        """
+        sobel_conv = nn.Conv1d(
+            in_channels=num_channels,
+            out_channels=num_channels,
+            kernel_size=3,
+            padding="same",
+            padding_mode="reflect",
+            groups=num_channels,
+            bias=False,
+            device=device,
+        )
+        laplace_conv = nn.Conv1d(
+            in_channels=num_channels,
+            out_channels=num_channels,
+            kernel_size=3,
+            padding="same",
+            padding_mode="reflect",
+            groups=num_channels,
+            bias=False,
+            device=device,
+        )
+        d3_conv = nn.Conv1d(
+            in_channels=num_channels,
+            out_channels=num_channels,
+            kernel_size=5,
+            padding="same",
+            padding_mode="reflect",
+            groups=num_channels,
+            bias=False,
+            device=device,
+        )
+        d4_conv = nn.Conv1d(
+            in_channels=num_channels,
+            out_channels=num_channels,
+            kernel_size=5,
+            padding="same",
+            padding_mode="reflect",
+            groups=num_channels,
+            bias=False,
+            device=device,
+        )
+        sobel_kernel = (
+            torch.tensor([-1, 0, 1], device=device, dtype=torch.float32)
+            .view(1, 1, -1)
+            .repeat(num_channels, 1, 1)
+        )
+        laplace_kernel = (
+            torch.tensor([1, -2, 1], device=device, dtype=torch.float32)
+            .view(1, 1, -1)
+            .repeat(num_channels, 1, 1)
+        )
+        d3_kernel = (
+            torch.tensor([-1, 2, 0, -2, 1], device=device, dtype=torch.float32)
+            .view(1, 1, -1)
+            .repeat(num_channels, 1, 1)
+        )
+        d4_kernel = (
+            torch.tensor([1, -4, 6, -4, 1], device=device, dtype=torch.float32)
+            .view(1, 1, -1)
+            .repeat(num_channels, 1, 1)
+        )
+        sobel_conv.weight.data = sobel_kernel
+        laplace_conv.weight.data = laplace_kernel
+        d3_conv.weight.data = d3_kernel
+        d4_conv.weight.data = d4_kernel
+        for layer in [sobel_conv, laplace_conv, d3_conv, d4_conv]:
+            layer.weight.requires_grad = False
+        return {
+            "sobel": sobel_conv,
+            "laplace": laplace_conv,
+            "d3": d3_conv,
+            "d4": d4_conv,
+        }
+    def _create_gaussian_layer(
+        self, kernel_size: int, sigma: float, num_channels: int, device: torch.device
+    ) -> nn.Module:
+        """Creates a single Gaussian convolution layer with the given dynamic parameters."""
+        gauss_conv = nn.Conv1d(
+            in_channels=num_channels,
+            out_channels=num_channels,
+            kernel_size=kernel_size,
+            padding="same",
+            padding_mode="reflect",
+            groups=num_channels,
+            bias=False,
+            device=device,
+        )
+        ax = torch.arange(
+            -(kernel_size // 2),
+            kernel_size // 2 + 1,
+            device=device,
+            dtype=torch.float32,
+        )
+        gauss_kernel = torch.exp(-0.5 * (ax / sigma) ** 2)
+        gauss_kernel /= gauss_kernel.sum()
+        gauss_kernel = gauss_kernel.view(1, 1, -1).repeat(num_channels, 1, 1)
+        gauss_conv.weight.data = gauss_kernel
+        gauss_conv.weight.requires_grad = False
+        return gauss_conv
+    def _rescale_signal(
+        self, processed_signal: torch.Tensor, original_signal: torch.Tensor
+    ) -> torch.Tensor:
+        """Rescales the processed signal to match the min/max range of the original."""
+        original_min = torch.amin(original_signal, dim=2, keepdim=True)
+        original_max = torch.amax(original_signal, dim=2, keepdim=True)
+        processed_min = torch.amin(processed_signal, dim=2, keepdim=True)
+        processed_max = torch.amax(processed_signal, dim=2, keepdim=True)
+        original_range = original_max - original_min
+        processed_range = processed_max - processed_min
+        epsilon = 1e-8
+        rescaled_signal = (
+            (processed_signal - processed_min) / (processed_range + epsilon)
+        ) * original_range + original_min
+        return torch.where(original_range < epsilon, original_signal, rescaled_signal)
+    def transform(self, time_series_batch: torch.Tensor) -> torch.Tensor:
+        """Applies a random augmentation to a subset of the batch."""
+        with torch.no_grad():
+            if self.p_transform == 0:
+                return time_series_batch
+            batch_size, seq_len, num_channels = time_series_batch.shape
+            device = time_series_batch.device
+            augment_mask = torch.rand(batch_size, device=device) < self.p_transform
+            indices_to_augment = torch.where(augment_mask)[0]
+            num_to_augment = indices_to_augment.numel()
+            if num_to_augment == 0:
+                return time_series_batch
+            # --- 🎲 Randomly sample Gaussian parameters for this call ---
+            min_k, max_k = self.kernel_size_range
+            kernel_size = torch.randint(min_k, max_k + 1, (1,)).item()
+            kernel_size = kernel_size // 2 * 2 + 1  # Ensure kernel size is odd
+            min_s, max_s = self.sigma_range
+            sigma = (min_s + (max_s - min_s) * torch.rand(1)).item()
+            # --- Get/Create Convolution Layers ---
+            gauss_conv = self._create_gaussian_layer(
+                kernel_size, sigma, num_channels, device
+            )
+            cache_key = (num_channels, device)
+            if cache_key not in self.conv_cache:
+                self.conv_cache[cache_key] = self._create_fixed_kernel_layers(
+                    num_channels, device
+                )
+            fixed_layers = self.conv_cache[cache_key]
+            # --- Apply Augmentations ---
+            subset_to_augment = time_series_batch[indices_to_augment]
+            subset_permuted = subset_to_augment.permute(0, 2, 1)
+            op_choices = torch.randint(0, 6, (num_to_augment,), device=device)
+            smoothed_subset = gauss_conv(subset_permuted)
+            sobel_on_smoothed = fixed_layers["sobel"](smoothed_subset)
+            laplace_on_smoothed = fixed_layers["laplace"](smoothed_subset)
+            d3_on_smoothed = fixed_layers["d3"](smoothed_subset)
+            d4_on_smoothed = fixed_layers["d4"](smoothed_subset)
+            gauss_result = self._rescale_signal(smoothed_subset, subset_permuted)
+            sobel_result = self._rescale_signal(sobel_on_smoothed, subset_permuted)
+            laplace_result = self._rescale_signal(laplace_on_smoothed, subset_permuted)
+            d3_result = self._rescale_signal(d3_on_smoothed, subset_permuted)
+            d4_result = self._rescale_signal(d4_on_smoothed, subset_permuted)
+            use_right_integral = torch.rand(num_to_augment, 1, 1, device=device) > 0.5
+            flipped_subset = torch.flip(subset_permuted, dims=[2])
+            right_integral = torch.flip(torch.cumsum(flipped_subset, dim=2), dims=[2])
+            left_integral = torch.cumsum(subset_permuted, dim=2)
+            integral_result = torch.where(
+                use_right_integral, right_integral, left_integral
+            )
+            integral_result_normalized = self._rescale_signal(
+                integral_result, subset_permuted
+            )
+            # --- Assemble the results based on op_choices ---
+            op_choices_view = op_choices.view(-1, 1, 1)
+            augmented_subset = torch.where(
+                op_choices_view == 0, gauss_result, subset_permuted
+            )
+            augmented_subset = torch.where(
+                op_choices_view == 1, sobel_result, augmented_subset
+            )
+            augmented_subset = torch.where(
+                op_choices_view == 2, laplace_result, augmented_subset
+            )
+            augmented_subset = torch.where(
+                op_choices_view == 3, integral_result_normalized, augmented_subset
+            )
+            augmented_subset = torch.where(
+                op_choices_view == 4, d3_result, augmented_subset
+            )
+            augmented_subset = torch.where(
+                op_choices_view == 5, d4_result, augmented_subset
+            )
+            augmented_subset_final = augmented_subset.permute(0, 2, 1)
+            augmented_batch = time_series_batch.clone()
+            augmented_batch[indices_to_augment] = augmented_subset_final
+            return augmented_batch
+class RandomConvAugmenter:
+    """
+    Applies a stack of 1-to-N random 1D convolutions to a time series batch.
+    This augmenter is inspired by the principles of ROCKET and RandConv,
+    randomizing nearly every aspect of the convolution process to create a
+    highly diverse set of transformations. This version includes multiple
+    kernel generation strategies, random padding modes, and optional non-linearities.
+    """
+    def __init__(
+        self,
+        p_transform: float = 0.5,
+        kernel_size_range: Tuple[int, int] = (3, 31),
+        dilation_range: Tuple[int, int] = (1, 8),
+        layer_range: Tuple[int, int] = (1, 3),
+        sigma_range: Tuple[float, float] = (0.5, 5.0),
+        bias_range: Tuple[float, float] = (-0.5, 0.5),
+    ):
+        """
+        Initializes the augmenter.
+        Args:
+            p_transform (float): Probability of applying the augmentation to a series.
+            kernel_size_range (Tuple[int, int]): [min, max] range for kernel sizes.
+                                                 Must be odd numbers.
+            dilation_range (Tuple[int, int]): [min, max] range for dilation factors.
+            layer_range (Tuple[int, int]): [min, max] range for the number of
+                                           stacked convolution layers.
+            sigma_range (Tuple[float, float]): [min, max] range for the sigma of
+                                               Gaussian kernels.
+            bias_range (Tuple[float, float]): [min, max] range for the bias term.
+        """
+        assert kernel_size_range[0] % 2 == 1 and kernel_size_range[1] % 2 == 1, (
+            "Kernel sizes must be odd."
+        )
+        self.p_transform = p_transform
+        self.kernel_size_range = kernel_size_range
+        self.dilation_range = dilation_range
+        self.layer_range = layer_range
+        self.sigma_range = sigma_range
+        self.bias_range = bias_range
+        self.padding_modes = ["reflect", "replicate", "circular"]
+    def _rescale_signal(
+        self, processed_signal: torch.Tensor, original_signal: torch.Tensor
+    ) -> torch.Tensor:
+        """Rescales the processed signal to match the min/max range of the original."""
+        original_min = torch.amin(original_signal, dim=-1, keepdim=True)
+        original_max = torch.amax(original_signal, dim=-1, keepdim=True)
+        processed_min = torch.amin(processed_signal, dim=-1, keepdim=True)
+        processed_max = torch.amax(processed_signal, dim=-1, keepdim=True)
+        original_range = original_max - original_min
+        processed_range = processed_max - processed_min
+        epsilon = 1e-8
+        is_flat = processed_range < epsilon
+        rescaled_signal = (
+            (processed_signal - processed_min) / (processed_range + epsilon)
+        ) * original_range + original_min
+        original_mean = torch.mean(original_signal, dim=-1, keepdim=True)
+        flat_rescaled = original_mean.expand_as(original_signal)
+        return torch.where(is_flat, flat_rescaled, rescaled_signal)
+    def _apply_random_conv_stack(self, series: torch.Tensor) -> torch.Tensor:
+        """
+        Applies a randomly configured stack of convolutions to a single time series.
+        Args:
+            series (torch.Tensor): A single time series of shape (1, num_channels, seq_len).
+        Returns:
+            torch.Tensor: The augmented time series.
+        """
+        num_channels = series.shape[1]
+        device = series.device
+        num_layers = torch.randint(
+            self.layer_range[0], self.layer_range[1] + 1, (1,)
+        ).item()
+        processed_series = series
+        for i in range(num_layers):
+            # 1. Sample kernel size
+            k_min, k_max = self.kernel_size_range
+            kernel_size = torch.randint(k_min // 2, k_max // 2 + 1, (1,)).item() * 2 + 1
+            # 2. Sample dilation
+            d_min, d_max = self.dilation_range
+            dilation = torch.randint(d_min, d_max + 1, (1,)).item()
+            # 3. Sample bias
+            b_min, b_max = self.bias_range
+            bias_val = (b_min + (b_max - b_min) * torch.rand(1)).item()
+            # 4. Sample padding mode
+            padding_mode = np.random.choice(self.padding_modes)
+            conv_layer = nn.Conv1d(
+                in_channels=num_channels,
+                out_channels=num_channels,
+                kernel_size=kernel_size,
+                dilation=dilation,
+                padding="same",  # Let PyTorch handle padding calculation
+                padding_mode=padding_mode,
+                groups=num_channels,
+                bias=True,
+                device=device,
+            )
+            # 5. Sample kernel weights from a wider variety of types
+            weight_type = torch.randint(0, 4, (1,)).item()
+            if weight_type == 0:  # Gaussian kernel
+                s_min, s_max = self.sigma_range
+                sigma = (s_min + (s_max - s_min) * torch.rand(1)).item()
+                ax = torch.arange(
+                    -(kernel_size // 2),
+                    kernel_size // 2 + 1,
+                    device=device,
+                    dtype=torch.float32,
+                )
+                kernel = torch.exp(-0.5 * (ax / sigma) ** 2)
+            elif weight_type == 1:  # Standard normal kernel
+                kernel = torch.randn(kernel_size, device=device)
+            elif weight_type == 2:  # Polynomial kernel
+                coeffs = torch.randn(3, device=device)  # a, b, c for ax^2+bx+c
+                x_vals = torch.linspace(-1, 1, kernel_size, device=device)
+                kernel = coeffs[0] * x_vals**2 + coeffs[1] * x_vals + coeffs[2]
+            else:  # Noisy Sobel kernel
+                # Ensure kernel is large enough for a Sobel filter
+                actual_kernel_size = 3 if kernel_size < 3 else kernel_size
+                sobel_base = torch.tensor(
+                    [-1, 0, 1], dtype=torch.float32, device=device
+                )
+                noise = torch.randn(3, device=device) * 0.1
+                noisy_sobel = sobel_base + noise
+                # Pad if the random kernel size is larger than 3
+                pad_total = actual_kernel_size - 3
+                pad_left = pad_total // 2
+                pad_right = pad_total - pad_left
+                kernel = F.pad(noisy_sobel, (pad_left, pad_right), "constant", 0)
+            # 6. Probabilistic normalization
+            if torch.rand(1).item() < 0.8:  # 80% chance to normalize
+                kernel /= torch.sum(torch.abs(kernel)) + 1e-8
+            kernel = kernel.view(1, 1, -1).repeat(num_channels, 1, 1)
+            conv_layer.weight.data = kernel
+            conv_layer.bias.data.fill_(bias_val)
+            conv_layer.weight.requires_grad = False
+            conv_layer.bias.requires_grad = False
+            # Apply convolution
+            processed_series = conv_layer(processed_series)
+            # 7. Optional non-linearity (not on the last layer)
+            if i < num_layers - 1:
+                activation_type = torch.randint(0, 3, (1,)).item()
+                if activation_type == 1:
+                    processed_series = F.relu(processed_series)
+                elif activation_type == 2:
+                    processed_series = torch.tanh(processed_series)
+                # if 0, do nothing (linear)
+        return processed_series
+    def transform(self, time_series_batch: torch.Tensor) -> torch.Tensor:
+        """Applies a random augmentation to a subset of the batch."""
+        with torch.no_grad():
+            if self.p_transform == 0:
+                return time_series_batch
+            batch_size, seq_len, num_channels = time_series_batch.shape
+            device = time_series_batch.device
+            augment_mask = torch.rand(batch_size, device=device) < self.p_transform
+            indices_to_augment = torch.where(augment_mask)[0]
+            num_to_augment = indices_to_augment.numel()
+            if num_to_augment == 0:
+                return time_series_batch
+            subset_to_augment = time_series_batch[indices_to_augment]
+            subset_permuted = subset_to_augment.permute(0, 2, 1)
+            augmented_subset_list = []
+            for i in range(num_to_augment):
+                original_series = subset_permuted[i : i + 1]
+                augmented_series = self._apply_random_conv_stack(original_series)
+                rescaled_series = self._rescale_signal(
+                    augmented_series.squeeze(0), original_series.squeeze(0)
+                )
+                augmented_subset_list.append(rescaled_series.unsqueeze(0))
+            if augmented_subset_list:
+                augmented_subset = torch.cat(augmented_subset_list, dim=0)
+                augmented_subset_final = augmented_subset.permute(0, 2, 1)
+                augmented_batch = time_series_batch.clone()
+                augmented_batch[indices_to_augment] = augmented_subset_final
+                return augmented_batch
+            else:
+                return time_series_batch

src/data/batch_composer.py ADDED Viewed

	@@ -0,0 +1,705 @@

+import json
+import logging
+import random
+from typing import Dict, Optional, Tuple
+import numpy as np
+import pandas as pd
+import torch
+from src.data.augmentations import (
+    NanAugmenter,
+)
+from src.data.constants import DEFAULT_NAN_STATS_PATH, LENGTH_CHOICES, LENGTH_WEIGHTS
+from src.data.containers import BatchTimeSeriesContainer
+from src.data.datasets import CyclicalBatchDataset
+from src.data.frequency import Frequency
+from src.data.scalers import MeanScaler, MedianScaler, MinMaxScaler, RobustScaler
+from src.data.utils import sample_future_length
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class BatchComposer:
+    """
+    Composes batches from saved generator data according to specified proportions.
+    Manages multiple CyclicalBatchDataset instances and creates uniform or mixed batches.
+    """
+    def __init__(
+        self,
+        base_data_dir: str,
+        generator_proportions: Optional[Dict[str, float]] = None,
+        mixed_batches: bool = True,
+        device: Optional[torch.device] = None,
+        augmentations: Optional[Dict[str, bool]] = None,
+        augmentation_probabilities: Optional[Dict[str, float]] = None,
+        nan_stats_path: Optional[str] = None,
+        nan_patterns_path: Optional[str] = None,
+        global_seed: int = 42,
+        chosen_scaler_name: Optional[str] = None,
+        rank: int = 0,
+        world_size: int = 1,
+    ):
+        """
+        Initialize the BatchComposer.
+        Args:
+            base_data_dir: Base directory containing generator subdirectories
+            generator_proportions: Dict mapping generator names to proportions
+            mixed_batches: If True, create mixed batches; if False, uniform batches
+            device: Device to load tensors to
+            augmentations: Dict mapping augmentation names to booleans
+            augmentation_probabilities: Dict mapping augmentation names to probabilities
+            global_seed: Global random seed
+            chosen_scaler_name: Name of the scaler that used in training
+            rank: Rank of current process for distributed data loading
+            world_size: Total number of processes for distributed data loading
+        """
+        self.base_data_dir = base_data_dir
+        self.mixed_batches = mixed_batches
+        self.device = device
+        self.global_seed = global_seed
+        self.nan_stats_path = nan_stats_path
+        self.nan_patterns_path = nan_patterns_path
+        self.rank = rank
+        self.world_size = world_size
+        self.augmentation_probabilities = augmentation_probabilities or {
+            "noise_augmentation": 0.3,
+            "scaler_augmentation": 0.5,
+        }
+        # Optional preferred scaler name provided by training config
+        self.chosen_scaler_name = (
+            chosen_scaler_name.lower() if chosen_scaler_name is not None else None
+        )
+        # Setup random state
+        self.rng = np.random.default_rng(global_seed)
+        random.seed(global_seed)
+        torch.manual_seed(global_seed)
+        # Setup augmentations
+        self._setup_augmentations(augmentations)
+        # Setup generator proportions
+        self._setup_proportions(generator_proportions)
+        # Initialize datasets
+        self.datasets = self._initialize_datasets()
+        logger.info(
+            f"Initialized BatchComposer with {len(self.datasets)} generators, "
+            f"mixed_batches={mixed_batches}, proportions={self.generator_proportions}, "
+            f"augmentations={self.augmentations}, "
+            f"augmentation_probabilities={self.augmentation_probabilities}"
+        )
+    def _setup_augmentations(self, augmentations: Optional[Dict[str, bool]]):
+        """Setup only the augmentations that should remain online (NaN)."""
+        default_augmentations = {
+            "nan_augmentation": False,
+            "scaler_augmentation": False,
+            "length_shortening": False,
+        }
+        self.augmentations = augmentations or default_augmentations
+        # Initialize NaN augmenter if needed
+        self.nan_augmenter = None
+        if self.augmentations.get("nan_augmentation", False):
+            stats_path_to_use = self.nan_stats_path or DEFAULT_NAN_STATS_PATH
+            stats = json.load(open(stats_path_to_use, "r"))
+            self.nan_augmenter = NanAugmenter(
+                p_series_has_nan=stats["p_series_has_nan"],
+                nan_ratio_distribution=stats["nan_ratio_distribution"],
+                nan_length_distribution=stats["nan_length_distribution"],
+                nan_patterns_path=self.nan_patterns_path,
+            )
+    def _should_apply_scaler_augmentation(self) -> bool:
+        """
+        Decide whether to apply scaler augmentation for a single series based on
+        the boolean toggle and probability from the configuration.
+        """
+        if not self.augmentations.get("scaler_augmentation", False):
+            return False
+        probability = float(
+            self.augmentation_probabilities.get("scaler_augmentation", 0.0)
+        )
+        probability = max(0.0, min(1.0, probability))
+        return bool(self.rng.random() < probability)
+    def _choose_random_scaler(self) -> Optional[object]:
+        """
+        Choose a random scaler for augmentation, explicitly avoiding the one that
+        is already selected in the training configuration (if any).
+        Returns an instance of the selected scaler or None when no valid option exists.
+        """
+        chosen: Optional[str] = None
+        if self.chosen_scaler_name is not None:
+            chosen = self.chosen_scaler_name.strip().lower()
+        candidates = ["custom_robust", "minmax", "median", "mean"]
+        # Remove the chosen scaler from the candidates
+        if chosen in candidates:
+            candidates = [c for c in candidates if c != chosen]
+        if not candidates:
+            return None
+        pick = str(self.rng.choice(candidates))
+        if pick == "custom_robust":
+            return RobustScaler()
+        if pick == "minmax":
+            return MinMaxScaler()
+        if pick == "median":
+            return MedianScaler()
+        if pick == "mean":
+            return MeanScaler()
+        return None
+    def _setup_proportions(self, generator_proportions):
+        """Setup default or custom generator proportions."""
+        default_proportions = {
+            "forecast_pfn": 1.0,
+            "gp": 1.0,
+            "kernel": 1.0,
+            "sinewave": 1.0,
+            "sawtooth": 1.0,
+            "step": 0.1,
+            "anomaly": 1.0,
+            "spike": 2.0,
+            "cauker_univariate": 2.0,
+            "cauker_multivariate": 0.00,
+            "lmc": 0.00,  # multivariate
+            "ou_process": 1.0,
+            "audio_financial_volatility": 0.1,
+            "audio_multi_scale_fractal": 0.1,
+            "audio_network_topology": 0.5,
+            "audio_stochastic_rhythm": 1.0,
+            "augmented_per_sample_2048": 3.0,
+            "augmented_temp_batch_2048": 3.0,
+        }
+        self.generator_proportions = generator_proportions or default_proportions
+        # Normalize proportions
+        total = sum(self.generator_proportions.values())
+        if total <= 0:
+            raise ValueError("Total generator proportions must be positive")
+        self.generator_proportions = {
+            k: v / total for k, v in self.generator_proportions.items()
+        }
+    def _initialize_datasets(self) -> Dict[str, CyclicalBatchDataset]:
+        """Initialize CyclicalBatchDataset for each generator with proportion > 0."""
+        datasets = {}
+        for generator_name, proportion in self.generator_proportions.items():
+            # Only initialize datasets for generators with positive proportion
+            if proportion <= 0:
+                logger.info(f"Skipping {generator_name} (proportion = {proportion})")
+                continue
+            batches_dir = f"{self.base_data_dir}/{generator_name}"
+            try:
+                dataset = CyclicalBatchDataset(
+                    batches_dir=batches_dir,
+                    generator_type=generator_name,
+                    device=None,
+                    prefetch_next=True,
+                    prefetch_threshold=32,
+                    rank=self.rank,
+                    world_size=self.world_size,
+                )
+                datasets[generator_name] = dataset
+                logger.info(
+                    f"Loaded dataset for {generator_name} (proportion = {proportion})"
+                )
+            except Exception as e:
+                logger.warning(f"Failed to load dataset for {generator_name}: {e}")
+                continue
+        if not datasets:
+            raise ValueError(
+                f"No valid datasets found in {self.base_data_dir} or all generators have proportion <= 0"
+            )
+        return datasets
+    def _convert_sample_to_tensors(
+        self, sample: dict, future_length: Optional[int] = None
+    ) -> Tuple[torch.Tensor, np.datetime64, Frequency]:
+        """
+        Convert a sample dict to tensors and metadata.
+        Args:
+            sample: Sample dict from CyclicalBatchDataset
+            future_length: Desired future length (if None, use default split)
+        Returns:
+            Tuple of (history_values, future_values, start, frequency)
+        """
+        # Handle both old and new data formats
+        num_channels = sample.get("num_channels", 1)
+        values_data = sample["values"]
+        generator_type = sample.get("generator_type", "unknown")
+        if num_channels == 1:
+            # Univariate data
+            if isinstance(values_data[0], list):
+                # New format: [[channel_values]]
+                values = torch.tensor(values_data[0], dtype=torch.float32)
+                logger.debug(
+                    f"{generator_type}: Using new univariate format, shape: {values.shape}"
+                )
+            else:
+                # Old format: [values]
+                values = torch.tensor(values_data, dtype=torch.float32)
+            values = values.unsqueeze(0).unsqueeze(-1)  # Shape: [1, seq_len, 1]
+        else:
+            # Multivariate data (LMC) - new format: [[ch1_values], [ch2_values], ...]
+            channel_tensors = []
+            for channel_values in values_data:
+                channel_tensor = torch.tensor(channel_values, dtype=torch.float32)
+                channel_tensors.append(channel_tensor)
+            # Stack channels: [1, seq_len, num_channels]
+            values = torch.stack(channel_tensors, dim=-1).unsqueeze(0)
+            logger.debug(
+                f"{generator_type}: Using multivariate format, {num_channels} channels, shape: {values.shape}"
+            )
+        # Handle frequency conversion
+        freq_str = sample["frequency"]
+        try:
+            frequency = Frequency(freq_str)
+        except ValueError:
+            # Map common frequency strings to Frequency enum
+            freq_mapping = {
+                "h": Frequency.H,
+                "D": Frequency.D,
+                "W": Frequency.W,
+                "M": Frequency.M,
+                "Q": Frequency.Q,
+                "A": Frequency.A,
+                "Y": Frequency.A,  # Annual
+                "1min": Frequency.T1,
+                "5min": Frequency.T5,
+                "10min": Frequency.T10,
+                "15min": Frequency.T15,
+                "30min": Frequency.T30,
+                "s": Frequency.S,
+            }
+            frequency = freq_mapping.get(freq_str, Frequency.H)  # Default to hourly
+        # Handle start timestamp
+        if isinstance(sample["start"], pd.Timestamp):
+            start = sample["start"].to_numpy()
+        else:
+            start = np.datetime64(sample["start"])
+        return values, start, frequency
+    def _effective_proportions_for_length(
+        self, total_length_for_batch: int
+    ) -> Dict[str, float]:
+        """
+        Build a simple, length-aware proportion map for the current batch.
+        Rules:
+        - For generators named 'augmented{L}', keep only the one matching the
+          chosen length L; zero out others.
+        - Keep non-augmented generators as-is.
+        - Drop generators that are unavailable (not loaded) or zero-weight.
+        - If nothing remains, fall back to 'augmented{L}' if available, else any dataset.
+        - Normalize the final map to sum to 1.
+        """
+        def augmented_length_from_name(name: str) -> Optional[int]:
+            if not name.startswith("augmented"):
+                return None
+            suffix = name[len("augmented") :]
+            if not suffix:
+                return None
+            try:
+                return int(suffix)
+            except ValueError:
+                return None
+        # 1) Adjust proportions with the length-aware rule
+        adjusted: Dict[str, float] = {}
+        for name, proportion in self.generator_proportions.items():
+            aug_len = augmented_length_from_name(name)
+            if aug_len is None:
+                adjusted[name] = proportion
+            else:
+                adjusted[name] = (
+                    proportion if aug_len == total_length_for_batch else 0.0
+                )
+        # 2) Keep only available, positive-weight datasets
+        adjusted = {
+            name: p for name, p in adjusted.items() if name in self.datasets and p > 0.0
+        }
+        # 3) Fallback if empty
+        if not adjusted:
+            preferred = f"augmented{total_length_for_batch}"
+            if preferred in self.datasets:
+                adjusted = {preferred: 1.0}
+            elif self.datasets:
+                # Choose any available dataset deterministically (first key)
+                first_key = next(iter(self.datasets.keys()))
+                adjusted = {first_key: 1.0}
+            else:
+                raise ValueError("No datasets available to create batch")
+        # 4) Normalize
+        total = sum(adjusted.values())
+        return {name: p / total for name, p in adjusted.items()}
+    def _compute_sample_counts_for_batch(
+        self, proportions: Dict[str, float], batch_size: int
+    ) -> Dict[str, int]:
+        """
+        Convert a proportion map into integer sample counts that sum to batch_size.
+        Strategy: allocate floor(batch_size * p) to each generator in order, and let the
+        last generator absorb any remainder to ensure the total matches exactly.
+        """
+        counts: Dict[str, int] = {}
+        remaining = batch_size
+        names = list(proportions.keys())
+        values = list(proportions.values())
+        for index, (name, p) in enumerate(zip(names, values)):
+            if index == len(names) - 1:
+                counts[name] = remaining
+            else:
+                n = int(batch_size * p)
+                counts[name] = n
+                remaining -= n
+        return counts
+    def _calculate_generator_samples(self, batch_size: int) -> Dict[str, int]:
+        """
+        Calculate the number of samples each generator should contribute.
+        Args:
+            batch_size: Total batch size
+        Returns:
+            Dict mapping generator names to sample counts
+        """
+        generator_samples = {}
+        remaining_samples = batch_size
+        generators = list(self.generator_proportions.keys())
+        proportions = list(self.generator_proportions.values())
+        # Calculate base samples for each generator
+        for i, (generator, proportion) in enumerate(zip(generators, proportions)):
+            if generator not in self.datasets:
+                continue
+            if i == len(generators) - 1:  # Last generator gets remaining samples
+                samples = remaining_samples
+            else:
+                samples = int(batch_size * proportion)
+                remaining_samples -= samples
+            generator_samples[generator] = samples
+        return generator_samples
+    def create_batch(
+        self,
+        batch_size: int = 128,
+        seed: Optional[int] = None,
+        future_length: Optional[int] = None,
+    ) -> Tuple[BatchTimeSeriesContainer, str]:
+        """
+        Create a batch of the specified size.
+        Args:
+            batch_size: Size of the batch to create
+            seed: Random seed for this batch
+            future_length: Fixed future length to use. If None, samples from gift_eval range
+        Returns:
+            Tuple of (batch_container, generator_info)
+        """
+        if seed is not None:
+            batch_rng = np.random.default_rng(seed)
+            random.seed(seed)
+        else:
+            batch_rng = self.rng
+        if self.mixed_batches:
+            return self._create_mixed_batch(batch_size, future_length)
+        else:
+            return self._create_uniform_batch(batch_size, batch_rng, future_length)
+    def _create_mixed_batch(
+        self, batch_size: int, future_length: Optional[int] = None
+    ) -> Tuple[BatchTimeSeriesContainer, str]:
+        """Create a mixed batch with samples from multiple generators, rejecting NaNs."""
+        # Choose total length for this batch; respect length_shortening flag.
+        # When disabled, always use the maximum to avoid shortening.
+        if self.augmentations.get("length_shortening", False):
+            lengths = list(LENGTH_WEIGHTS.keys())
+            probs = list(LENGTH_WEIGHTS.values())
+            total_length_for_batch = int(self.rng.choice(lengths, p=probs))
+        else:
+            total_length_for_batch = int(max(LENGTH_CHOICES))
+        if future_length is None:
+            prediction_length = int(
+                sample_future_length(
+                    range="gift_eval", total_length=total_length_for_batch
+                )
+            )
+        else:
+            prediction_length = future_length
+        history_length = total_length_for_batch - prediction_length
+        # Calculate samples per generator using simple, per-batch length-aware proportions
+        effective_props = self._effective_proportions_for_length(total_length_for_batch)
+        generator_samples = self._compute_sample_counts_for_batch(
+            effective_props, batch_size
+        )
+        all_values = []
+        all_starts = []
+        all_frequencies = []
+        actual_proportions = {}
+        # Collect valid samples from each generator using batched fetches to reduce I/O overhead
+        for generator_name, num_samples in generator_samples.items():
+            if num_samples == 0 or generator_name not in self.datasets:
+                continue
+            dataset = self.datasets[generator_name]
+            # Lists to hold valid samples for the current generator
+            generator_values = []
+            generator_starts = []
+            generator_frequencies = []
+            # Loop until we have collected the required number of VALID samples
+            max_attempts = 50
+            attempts = 0
+            while len(generator_values) < num_samples and attempts < max_attempts:
+                attempts += 1
+                # Fetch a batch larger than needed to reduce round-trips
+                need = num_samples - len(generator_values)
+                fetch_n = max(need * 2, 8)
+                samples = dataset.get_samples(fetch_n)
+                for sample in samples:
+                    if len(generator_values) >= num_samples:
+                        break
+                    values, sample_start, sample_freq = self._convert_sample_to_tensors(
+                        sample, future_length
+                    )
+                    # Skip if NaNs exist (we inject NaNs later in history only)
+                    if torch.isnan(values).any():
+                        continue
+                    # Resize to target batch length when longer
+                    if total_length_for_batch < values.shape[1]:
+                        strategy = self.rng.choice(["cut", "subsample"])  # 50/50
+                        if strategy == "cut":
+                            max_start_idx = values.shape[1] - total_length_for_batch
+                            start_idx = int(self.rng.integers(0, max_start_idx + 1))
+                            values = values[
+                                :, start_idx : start_idx + total_length_for_batch, :
+                            ]
+                        else:
+                            indices = np.linspace(
+                                0,
+                                values.shape[1] - 1,
+                                total_length_for_batch,
+                                dtype=int,
+                            )
+                            values = values[:, indices, :]
+                    # Optionally apply scaler augmentation according to configuration
+                    if self._should_apply_scaler_augmentation():
+                        scaler = self._choose_random_scaler()
+                        if scaler is not None:
+                            values = scaler.scale(
+                                values, scaler.compute_statistics(values)
+                            )
+                    generator_values.append(values)
+                    generator_starts.append(sample_start)
+                    generator_frequencies.append(sample_freq)
+            if len(generator_values) < num_samples:
+                logger.warning(
+                    f"Generator {generator_name}: collected {len(generator_values)}/{num_samples} after {attempts} attempts"
+                )
+            # Add the collected valid samples to the main batch lists
+            if generator_values:
+                all_values.extend(generator_values)
+                all_starts.extend(generator_starts)
+                all_frequencies.extend(generator_frequencies)
+                actual_proportions[generator_name] = len(generator_values)
+        if not all_values:
+            raise RuntimeError(
+                "No valid samples could be collected from any generator."
+            )
+        combined_values = torch.cat(all_values, dim=0)
+        # Split into history and future
+        combined_history = combined_values[:, :history_length, :]
+        combined_future = combined_values[
+            :, history_length : history_length + prediction_length, :
+        ]
+        if self.nan_augmenter is not None:
+            combined_history = self.nan_augmenter.transform(combined_history)
+        # Create container
+        container = BatchTimeSeriesContainer(
+            history_values=combined_history,
+            future_values=combined_future,
+            start=all_starts,
+            frequency=all_frequencies,
+        )
+        return container, "MixedBatch"
+    def _create_uniform_batch(
+        self,
+        batch_size: int,
+        batch_rng: np.random.Generator,
+        future_length: Optional[int] = None,
+    ) -> Tuple[BatchTimeSeriesContainer, str]:
+        """Create a uniform batch with samples from a single generator."""
+        # Select generator based on proportions
+        generators = list(self.datasets.keys())
+        proportions = [self.generator_proportions[gen] for gen in generators]
+        selected_generator = batch_rng.choice(generators, p=proportions)
+        # Sample future length
+        if future_length is None:
+            future_length = sample_future_length(range="gift_eval")
+        # Get samples from selected generator
+        dataset = self.datasets[selected_generator]
+        samples = dataset.get_samples(batch_size)
+        all_history_values = []
+        all_future_values = []
+        all_starts = []
+        all_frequencies = []
+        for sample in samples:
+            values, sample_start, sample_freq = self._convert_sample_to_tensors(
+                sample, future_length
+            )
+            total_length = values.shape[1]
+            history_length = max(1, total_length - future_length)
+            # Optionally apply scaler augmentation according to configuration
+            if self._should_apply_scaler_augmentation():
+                scaler = self._choose_random_scaler()
+                if scaler is not None:
+                    values = scaler.scale(values, scaler.compute_statistics(values))
+            # Reshape to [1, seq_len, 1] for single sample
+            hist_vals = values[:, :history_length, :]
+            fut_vals = values[:, history_length : history_length + future_length, :]
+            all_history_values.append(hist_vals)
+            all_future_values.append(fut_vals)
+            all_starts.append(sample_start)
+            all_frequencies.append(sample_freq)
+        # Combine samples
+        combined_history = torch.cat(all_history_values, dim=0)
+        combined_future = torch.cat(all_future_values, dim=0)
+        # Create container
+        container = BatchTimeSeriesContainer(
+            history_values=combined_history,
+            future_values=combined_future,
+            start=all_starts,
+            frequency=all_frequencies,
+        )
+        return container, selected_generator
+    def get_dataset_info(self) -> Dict[str, dict]:
+        """Get information about all datasets."""
+        info = {}
+        for name, dataset in self.datasets.items():
+            info[name] = dataset.get_info()
+        return info
+    def get_generator_info(self) -> Dict[str, any]:
+        """Get information about the composer configuration."""
+        return {
+            "mixed_batches": self.mixed_batches,
+            "generator_proportions": self.generator_proportions,
+            "active_generators": list(self.datasets.keys()),
+            "total_generators": len(self.datasets),
+            "augmentations": self.augmentations,
+            "augmentation_probabilities": self.augmentation_probabilities,
+            "nan_augmenter_enabled": self.nan_augmenter is not None,
+        }
+class ComposedDataset(torch.utils.data.Dataset):
+    """
+    PyTorch Dataset wrapper around BatchComposer for training pipeline integration.
+    """
+    def __init__(
+        self,
+        batch_composer: BatchComposer,
+        num_batches_per_epoch: int = 100,
+        batch_size: int = 128,
+    ):
+        """
+        Initialize the dataset.
+        Args:
+            batch_composer: The BatchComposer instance
+            num_batches_per_epoch: Number of batches to generate per epoch
+            batch_size: Size of each batch
+        """
+        self.batch_composer = batch_composer
+        self.num_batches_per_epoch = num_batches_per_epoch
+        self.batch_size = batch_size
+    def __len__(self) -> int:
+        return self.num_batches_per_epoch
+    def __getitem__(self, idx: int) -> BatchTimeSeriesContainer:
+        """
+        Get a batch by index.
+        Args:
+            idx: Batch index (used as seed for reproducibility)
+        Returns:
+            BatchTimeSeriesContainer
+        """
+        # Use index as seed for reproducible batches
+        batch, _ = self.batch_composer.create_batch(
+            batch_size=self.batch_size, seed=self.batch_composer.global_seed + idx
+        )
+        return batch

src/data/constants.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from datetime import date
+from typing import Dict
+import numpy as np
+DEFAULT_START_DATE = date(1700, 1, 1)
+DEFAULT_END_DATE = date(2200, 1, 1)
+BASE_START_DATE = np.datetime64(DEFAULT_START_DATE)
+BASE_END_DATE = np.datetime64(DEFAULT_END_DATE)
+# Maximum years to prevent timestamp overflow
+MAX_YEARS = 500
+LENGTH_CHOICES = [128, 256, 512, 1024, 1536, 2048]
+DEFAULT_NAN_STATS_PATH: str = "./data/nan_stats.json"
+LENGTH_WEIGHTS: Dict[int, float] = {
+    128: 0.05,
+    256: 0.10,
+    512: 0.10,
+    1024: 0.10,
+    1536: 0.15,
+    2048: 0.50,
+}

src/data/containers.py ADDED Viewed

	@@ -0,0 +1,272 @@

+from dataclasses import dataclass
+from typing import List, Optional
+import numpy as np
+import torch
+from src.data.frequency import Frequency
+@dataclass
+class BatchTimeSeriesContainer:
+    """
+    Container for a batch of multivariate time series data and their associated features.
+    Attributes:
+        history_values: Tensor of historical observations.
+            Shape: [batch_size, seq_len, num_channels]
+        future_values: Tensor of future observations to predict.
+            Shape: [batch_size, pred_len, num_channels]
+        start: Timestamp of the first history value.
+            Type: List[np.datetime64]
+        frequency: Frequency of the time series.
+            Type: List[Frequency]
+        history_mask: Optional boolean/float tensor indicating missing entries in history_values across channels.
+            Shape: [batch_size, seq_len]
+        future_mask: Optional boolean/float tensor indicating missing entries in future_values across channels.
+            Shape: [batch_size, pred_len]
+    """
+    history_values: torch.Tensor
+    future_values: torch.Tensor
+    start: List[np.datetime64]
+    frequency: List[Frequency]
+    history_mask: Optional[torch.Tensor] = None
+    future_mask: Optional[torch.Tensor] = None
+    def __post_init__(self):
+        """Validate all tensor shapes and consistency."""
+        # --- Tensor Type Checks ---
+        if not isinstance(self.history_values, torch.Tensor):
+            raise TypeError("history_values must be a torch.Tensor")
+        if not isinstance(self.future_values, torch.Tensor):
+            raise TypeError("future_values must be a torch.Tensor")
+        if not isinstance(self.start, list) or not all(
+            isinstance(x, np.datetime64) for x in self.start
+        ):
+            raise TypeError("start must be a List[np.datetime64]")
+        if not isinstance(self.frequency, list) or not all(
+            isinstance(x, Frequency) for x in self.frequency
+        ):
+            raise TypeError("frequency must be a List[Frequency]")
+        batch_size, seq_len, num_channels = self.history_values.shape
+        pred_len = self.future_values.shape[1]
+        # --- Core Shape Checks ---
+        if self.future_values.shape[0] != batch_size:
+            raise ValueError("Batch size mismatch between history and future_values")
+        if self.future_values.shape[2] != num_channels:
+            raise ValueError("Channel size mismatch between history and future_values")
+        # --- Optional Mask Checks ---
+        if self.history_mask is not None:
+            if not isinstance(self.history_mask, torch.Tensor):
+                raise TypeError("history_mask must be a Tensor or None")
+            if self.history_mask.shape[:2] != (batch_size, seq_len):
+                raise ValueError(
+                    f"Shape mismatch in history_mask: {self.history_mask.shape[:2]} vs {(batch_size, seq_len)}"
+                )
+        if self.future_mask is not None:
+            if not isinstance(self.future_mask, torch.Tensor):
+                raise TypeError("future_mask must be a Tensor or None")
+            if not (
+                self.future_mask.shape == (batch_size, pred_len)
+                or self.future_mask.shape == self.future_values.shape
+            ):
+                raise ValueError(
+                    f"Shape mismatch in future_mask: expected {(batch_size, pred_len)} or {self.future_values.shape}, got {self.future_mask.shape}"
+                )
+    def to_device(
+        self, device: torch.device, attributes: Optional[List[str]] = None
+    ) -> None:
+        """
+        Move specified tensors to the target device in place.
+        Args:
+            device: Target device (e.g., 'cpu', 'cuda').
+            attributes: Optional list of attribute names to move. If None, move all tensors.
+        Raises:
+            ValueError: If an invalid attribute is specified or device transfer fails.
+        """
+        all_tensors = {
+            "history_values": self.history_values,
+            "future_values": self.future_values,
+            "history_mask": self.history_mask,
+            "future_mask": self.future_mask,
+        }
+        if attributes is None:
+            attributes = [k for k, v in all_tensors.items() if v is not None]
+        for attr in attributes:
+            if attr not in all_tensors:
+                raise ValueError(f"Invalid attribute: {attr}")
+            if all_tensors[attr] is not None:
+                setattr(self, attr, all_tensors[attr].to(device))
+    def to(self, device: torch.device, attributes: Optional[List[str]] = None):
+        """
+        Alias for to_device method for consistency with PyTorch conventions.
+        Args:
+            device: Target device (e.g., 'cpu', 'cuda').
+            attributes: Optional list of attribute names to move. If None, move all tensors.
+        """
+        self.to_device(device, attributes)
+        return self
+    @property
+    def batch_size(self) -> int:
+        return self.history_values.shape[0]
+    @property
+    def history_length(self) -> int:
+        return self.history_values.shape[1]
+    @property
+    def future_length(self) -> int:
+        return self.future_values.shape[1]
+    @property
+    def num_channels(self) -> int:
+        return self.history_values.shape[2]
+@dataclass
+class TimeSeriesContainer:
+    """
+    Container for batch of time series data without explicit history/future split.
+    This container is used for storing generated synthetic time series data where
+    the entire series is treated as a single entity, typically for further processing
+    or splitting into history/future components later.
+    Attributes:
+        values: np.ndarray of time series values.
+            Shape: [batch_size, seq_len, num_channels] for multivariate series
+                   [batch_size, seq_len] for univariate series
+        start: List of start timestamps for each series in the batch.
+            Type: List[np.datetime64], length should match batch_size
+        frequency: List of frequency for each series in the batch.
+            Type: List[Frequency], length should match batch_size
+    """
+    values: np.ndarray
+    start: List[np.datetime64]
+    frequency: List[Frequency]
+    def __post_init__(self):
+        """Validate all shapes and consistency."""
+        # --- Numpy Type Checks ---
+        if not isinstance(self.values, np.ndarray):
+            raise TypeError("values must be a np.ndarray")
+        if not isinstance(self.start, list) or not all(
+            isinstance(x, np.datetime64) for x in self.start
+        ):
+            raise TypeError("start must be a List[np.datetime64]")
+        if not isinstance(self.frequency, list) or not all(
+            isinstance(x, Frequency) for x in self.frequency
+        ):
+            raise TypeError("frequency must be a List[Frequency]")
+        # --- Shape and Length Consistency Checks ---
+        if len(self.values.shape) < 2 or len(self.values.shape) > 3:
+            raise ValueError(
+                f"values must have 2 or 3 dimensions [batch_size, seq_len] or [batch_size, seq_len, num_channels], got shape {self.values.shape}"
+            )
+        batch_size = self.values.shape[0]
+        if len(self.start) != batch_size:
+            raise ValueError(
+                f"Length of start ({len(self.start)}) must match batch_size ({batch_size})"
+            )
+        if len(self.frequency) != batch_size:
+            raise ValueError(
+                f"Length of frequency ({len(self.frequency)}) must match batch_size ({batch_size})"
+            )
+    @property
+    def batch_size(self) -> int:
+        return self.values.shape[0]
+    @property
+    def seq_length(self) -> int:
+        return self.values.shape[1]
+    @property
+    def num_channels(self) -> int:
+        return self.values.shape[2] if len(self.values.shape) == 3 else 1
+@dataclass
+class TimeSeriesContainer:
+    """
+    Container for batch of time series data without explicit history/future split.
+    This container is used for storing generated synthetic time series data where
+    the entire series is treated as a single entity, typically for further processing
+    or splitting into history/future components later.
+    Attributes:
+        values: np.ndarray of time series values.
+            Shape: [batch_size, seq_len, num_channels] for multivariate series
+                   [batch_size, seq_len] for univariate series
+        start: List of start timestamps for each series in the batch.
+            Type: List[np.datetime64], length should match batch_size
+        frequency: List of frequency for each series in the batch.
+            Type: List[Frequency], length should match batch_size
+    """
+    values: np.ndarray
+    start: List[np.datetime64]
+    frequency: List[Frequency]
+    def __post_init__(self):
+        """Validate all shapes and consistency."""
+        # --- Numpy Type Checks ---
+        if not isinstance(self.values, np.ndarray):
+            raise TypeError("values must be a np.ndarray")
+        if not isinstance(self.start, list) or not all(
+            isinstance(x, np.datetime64) for x in self.start
+        ):
+            raise TypeError("start must be a List[np.datetime64]")
+        if not isinstance(self.frequency, list) or not all(
+            isinstance(x, Frequency) for x in self.frequency
+        ):
+            raise TypeError("frequency must be a List[Frequency]")
+        # --- Shape and Length Consistency Checks ---
+        if len(self.values.shape) < 2 or len(self.values.shape) > 3:
+            raise ValueError(
+                f"values must have 2 or 3 dimensions [batch_size, seq_len] or [batch_size, seq_len, num_channels], got shape {self.values.shape}"
+            )
+        batch_size = self.values.shape[0]
+        if len(self.start) != batch_size:
+            raise ValueError(
+                f"Length of start ({len(self.start)}) must match batch_size ({batch_size})"
+            )
+        if len(self.frequency) != batch_size:
+            raise ValueError(
+                f"Length of frequency ({len(self.frequency)}) must match batch_size ({batch_size})"
+            )
+    @property
+    def batch_size(self) -> int:
+        return self.values.shape[0]
+    @property
+    def seq_length(self) -> int:
+        return self.values.shape[1]
+    @property
+    def num_channels(self) -> int:
+        return self.values.shape[2] if len(self.values.shape) == 3 else 1

src/data/datasets.py ADDED Viewed

	@@ -0,0 +1,267 @@

+import logging
+import os
+import random
+from typing import List, Optional
+import pyarrow.feather as feather
+import torch
+logger = logging.getLogger(__name__)
+class CyclicalBatchDataset:
+    """
+    Dataset class that loads saved batches from continuous generation script.
+    Maintains a pointer and provides cyclical access to individual samples.
+    Includes enhanced logging to track data shard cycling during training.
+    Supports per-rank file sharding for large-scale distributed training.
+    """
+    def __init__(
+        self,
+        batches_dir: str,
+        generator_type: str,
+        device: Optional[torch.device] = None,
+        prefetch_next: bool = True,
+        prefetch_threshold: int = 32,
+        rank: int = 0,
+        world_size: int = 1,
+    ):
+        """
+        Initialize the cyclical batch dataset.
+        Args:
+            batches_dir: Directory containing the batch arrow files
+            generator_type: Type of generator (for logging)
+            device: Device to load tensors to
+            prefetch_next: Whether to prefetch the next batch
+            prefetch_threshold: Number of remaining samples to trigger prefetching
+            rank: Rank of the current process (for file sharding)
+            world_size: Total number of processes (for file sharding)
+        """
+        self.batches_dir = batches_dir
+        self.generator_type = generator_type
+        self.device = device
+        self.prefetch_next = prefetch_next
+        self.prefetch_threshold = prefetch_threshold
+        self.rank = rank
+        self.world_size = world_size
+        self.batch_files = self._find_batch_files()
+        if not self.batch_files:
+            raise ValueError(f"No batch files found in {batches_dir}")
+        # --- State tracking ---
+        self.current_batch_idx = 0
+        self.current_sample_idx = 0
+        self.current_batch_data = None
+        self.next_batch_data = None
+        self.prefetching_in_progress = False
+        # --- NEW: Logging and cycle tracking ---
+        self.visited_batch_indices = set()
+        self.full_cycles_completed = 0
+        # Load first batch and update tracking
+        self._load_current_batch()
+        self.visited_batch_indices.add(self.current_batch_idx)
+        logger.info(
+            f"Initialized '{self.generator_type}' dataset with {len(self.batch_files)} batches. "
+            f"Current batch file: '{os.path.basename(self.batch_files[self.current_batch_idx])}' "
+            f"has {len(self.current_batch_data)} samples."
+        )
+    def _find_batch_files(self) -> List[str]:
+        """
+        Find and sort batch files with per-rank sharding for distributed training.
+        Each rank gets a disjoint subset of files to minimize I/O contention
+        when scaling to hundreds of GPUs.
+        """
+        import glob
+        pattern = os.path.join(self.batches_dir, "batch_*.arrow")
+        all_files = sorted(glob.glob(pattern))  # Sort for deterministic sharding
+        if not all_files:
+            return []
+        # Shard files across ranks: each rank gets every world_size-th file
+        # Example with 4 ranks: rank0=[0,4,8,...], rank1=[1,5,9,...], etc.
+        rank_files = [
+            f for i, f in enumerate(all_files) if i % self.world_size == self.rank
+        ]
+        # Shuffle only within this rank's shard for variety
+        random.shuffle(rank_files)
+        logger.info(
+            f"[Rank {self.rank}] '{self.generator_type}': Sharded {len(all_files)} files → "
+            f"{len(rank_files)} files for this rank ({len(rank_files) / len(all_files) * 100:.1f}%)"
+        )
+        return rank_files
+    def _load_batch_from_file(self, batch_file: str) -> List[dict]:
+        """Load a batch from arrow file."""
+        try:
+            table = feather.read_table(batch_file)
+            has_num_channels = "num_channels" in table.column_names
+            batch_data = []
+            for i in range(len(table)):
+                row = {
+                    "series_id": table["series_id"][i].as_py(),
+                    "values": table["values"][i].as_py(),
+                    "length": table["length"][i].as_py(),
+                    "generator_type": table["generator_type"][i].as_py(),
+                    "start": table["start"][i].as_py(),
+                    "frequency": table["frequency"][i].as_py(),
+                    "generation_timestamp": table["generation_timestamp"][i].as_py(),
+                }
+                if has_num_channels:
+                    row["num_channels"] = table["num_channels"][i].as_py()
+                else:
+                    row["num_channels"] = 1
+                batch_data.append(row)
+            return batch_data
+        except Exception as e:
+            logger.error(f"Error loading batch from {batch_file}: {e}")
+            raise
+    def _load_current_batch(self):
+        """Load the current batch into memory."""
+        if hasattr(self, "current_batch_data") and self.current_batch_data is not None:
+            del self.current_batch_data
+        batch_file = self.batch_files[self.current_batch_idx]
+        self.current_batch_data = self._load_batch_from_file(batch_file)
+        self.current_sample_idx = 0
+        logger.debug(
+            f"Loaded batch {self.current_batch_idx} for {self.generator_type} "
+            f"with {len(self.current_batch_data)} samples"
+        )
+    def _trigger_smart_prefetch(self):
+        """Trigger prefetching when batch is almost exhausted."""
+        if not self.prefetch_next or len(self.batch_files) <= 1:
+            return
+        remaining_samples = self.get_remaining_samples_in_current_batch()
+        should_prefetch = (
+            remaining_samples <= self.prefetch_threshold
+            and self.next_batch_data is None
+            and not self.prefetching_in_progress
+        )
+        if should_prefetch:
+            self._prefetch_next_batch()
+    def _prefetch_next_batch(self):
+        """Prefetch the next batch."""
+        if self.prefetching_in_progress:
+            return
+        self.prefetching_in_progress = True
+        next_batch_idx = (self.current_batch_idx + 1) % len(self.batch_files)
+        next_batch_file = self.batch_files[next_batch_idx]
+        try:
+            self.next_batch_data = self._load_batch_from_file(next_batch_file)
+            logger.debug(
+                f"Prefetched next batch {next_batch_idx} for {self.generator_type}"
+            )
+        except Exception as e:
+            logger.warning(f"Failed to prefetch batch {next_batch_idx}: {e}")
+            self.next_batch_data = None
+        finally:
+            self.prefetching_in_progress = False
+    def _advance_to_next_batch(self):
+        """Advance to the next batch and log the transition."""
+        if hasattr(self, "current_batch_data") and self.current_batch_data is not None:
+            del self.current_batch_data
+        previous_batch_idx = self.current_batch_idx
+        self.current_batch_idx = (self.current_batch_idx + 1) % len(self.batch_files)
+        if hasattr(self, "next_batch_data") and self.next_batch_data is not None:
+            self.current_batch_data = self.next_batch_data
+            self.next_batch_data = None
+        else:
+            self._load_current_batch()
+        self.current_sample_idx = 0
+        self.prefetching_in_progress = False
+        # --- NEW: Enhanced Logging Logic ---
+        self.visited_batch_indices.add(self.current_batch_idx)
+        # Calculate progress
+        total_files = len(self.batch_files)
+        visited_count = len(self.visited_batch_indices)
+        progress_percent = (visited_count / total_files) * 100
+        # Log the shard cycle event
+        logger.info(
+            f"\nDATA SHARD CYCLED for '{self.generator_type}': "
+            f"Moved from file index {previous_batch_idx} to {self.current_batch_idx}. "
+            f"Unique files visited: {visited_count}/{total_files} ({progress_percent:.1f}%)."
+        )
+        # Check if a full cycle has been completed
+        if visited_count == total_files:
+            self.full_cycles_completed += 1
+            logger.info(
+                f"🎉 FULL CYCLE #{self.full_cycles_completed} COMPLETED for '{self.generator_type}'! "
+                f"All {total_files} data files have been visited at least once. "
+                "Resetting visited set to track the next cycle."
+            )
+            # Reset for the next cycle count
+            self.visited_batch_indices.clear()
+            self.visited_batch_indices.add(self.current_batch_idx)
+    def get_sample(self) -> dict:
+        """Get the current sample and advance pointer."""
+        if not hasattr(self, "current_batch_data") or self.current_batch_data is None:
+            self._load_current_batch()
+        if self.current_batch_data is None:
+            raise RuntimeError("No batch data loaded")
+        if self.current_sample_idx >= len(self.current_batch_data):
+            self._advance_to_next_batch()
+        self._trigger_smart_prefetch()
+        sample = self.current_batch_data[self.current_sample_idx]
+        self.current_sample_idx += 1
+        return sample
+    def get_samples(self, num_samples: int) -> List[dict]:
+        """Get multiple samples."""
+        samples = []
+        for _ in range(num_samples):
+            samples.append(self.get_sample())
+        return samples
+    def get_total_samples_in_current_batch(self) -> int:
+        """Get total samples in current batch."""
+        if not hasattr(self, "current_batch_data") or self.current_batch_data is None:
+            return 0
+        return len(self.current_batch_data)
+    def get_remaining_samples_in_current_batch(self) -> int:
+        """Get remaining samples in current batch."""
+        if not hasattr(self, "current_batch_data") or self.current_batch_data is None:
+            return 0
+        return max(0, len(self.current_batch_data) - self.current_sample_idx)
+    def get_info(self) -> dict:
+        """Get extended dataset info, including cycle progress."""
+        total_files = len(self.batch_files)
+        visited_count = len(self.visited_batch_indices)
+        return {
+            "generator_type": self.generator_type,
+            "total_batch_files": total_files,
+            "current_batch_idx": self.current_batch_idx,
+            "current_sample_idx": self.current_sample_idx,
+            "current_batch_size": self.get_total_samples_in_current_batch(),
+            "remaining_in_batch": self.get_remaining_samples_in_current_batch(),
+            "unique_files_visited": visited_count,
+            "cycle_progress_percent": (visited_count / total_files) * 100
+            if total_files > 0
+            else 0,
+            "full_cycles_completed": self.full_cycles_completed,
+        }

src/data/filter.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import numpy as np
+import torch
+from scipy import signal
+from statsmodels.tsa.stattools import acf
+def lempel_ziv_complexity(binary_sequence: np.ndarray) -> int:
+    """Computes the Lempel-Ziv complexity of a binary sequence."""
+    sub_strings = set()
+    n = len(binary_sequence)
+    i = 0
+    count = 0
+    while i < n:
+        sub_str = ""
+        for j in range(i, n):
+            sub_str += str(binary_sequence[j])
+            if sub_str not in sub_strings:
+                sub_strings.add(sub_str)
+                count += 1
+                i = j + 1
+                break
+        else:
+            i += 1
+    return count
+def is_low_quality(
+    series: torch.Tensor,
+    autocorr_threshold: float = 0.2,
+    snr_threshold: float = 0.5,
+    complexity_threshold: float = 0.4,
+) -> bool:
+    """
+    Returns True if the series appears non-forecastable (noise-like):
+    - weak autocorrelation
+    - low SNR proxy
+    - high normalized Lempel-Ziv complexity
+    """
+    x = series.squeeze().detach().cpu().numpy()
+    if x.size < 20:
+        return True
+    if np.var(x) < 1e-10:
+        return True
+    x_detrended = signal.detrend(x)
+    try:
+        max_lags = min(len(x_detrended) // 4, 40)
+        if max_lags < 1:
+            autocorr_strength = 0.0
+        else:
+            acf_vals = acf(x_detrended, nlags=max_lags, fft=True)[1:]
+            autocorr_strength = float(np.max(np.abs(acf_vals)))
+    except Exception:
+        autocorr_strength = 0.0
+    win_size = max(3, min(len(x) // 10, 15))
+    signal_est = np.convolve(x, np.ones(win_size) / win_size, mode="valid")
+    noise_est = x[win_size - 1 :] - signal_est
+    var_signal = float(np.var(signal_est))
+    var_noise = float(np.var(noise_est))
+    snr_proxy = var_signal / var_noise if var_noise > 1e-8 else 1.0
+    median_val = float(np.median(x_detrended))
+    binary_seq = (x_detrended > median_val).astype(np.uint8)
+    complexity_score = lempel_ziv_complexity(binary_seq)
+    normalized_complexity = complexity_score / max(1, len(binary_seq))
+    is_random_like = (snr_proxy < snr_threshold) and (
+        normalized_complexity > complexity_threshold
+    )
+    is_uncorrelated = autocorr_strength < autocorr_threshold
+    return bool(is_uncorrelated and is_random_like)

src/data/frequency.py ADDED Viewed

	@@ -0,0 +1,538 @@

+"""
+Comprehensive frequency management module for time series forecasting.
+This module centralizes all frequency-related functionality including:
+- Frequency enum with helper methods
+- Frequency parsing and validation
+- Pandas frequency string conversion
+- Safety checks for date ranges
+- Frequency selection utilities
+- All frequency constants and mappings
+"""
+import logging
+import re
+from enum import Enum
+from typing import Dict, Tuple
+import numpy as np
+import pandas as pd
+from numpy.random import Generator
+from src.data.constants import BASE_END_DATE, BASE_START_DATE, MAX_YEARS
+logger = logging.getLogger(__name__)
+class Frequency(Enum):
+    """
+    Enhanced Frequency enum with comprehensive helper methods.
+    Each frequency includes methods for pandas conversion, safety checks,
+    and other frequency-specific operations.
+    """
+    A = "A"  # Annual
+    Q = "Q"  # Quarterly
+    M = "M"  # Monthly
+    W = "W"  # Weekly
+    D = "D"  # Daily
+    H = "h"  # Hourly
+    S = "s"  # Seconds
+    T1 = "1min"  # 1 minute
+    T5 = "5min"  # 5 minutes
+    T10 = "10min"  # 10 minutes
+    T15 = "15min"  # 15 minutes
+    T30 = "30min"  # 30 minutes
+    def to_pandas_freq(self, for_date_range: bool = True) -> str:
+        """
+        Convert to pandas frequency string.
+        Args:
+            for_date_range: If True, use strings suitable for pd.date_range().
+                           If False, use strings suitable for pd.PeriodIndex().
+        Returns:
+            Pandas frequency string
+        """
+        base, prefix, _ = FREQUENCY_MAPPING[self]
+        # Special handling for date_range vs period compatibility
+        if for_date_range:
+            # For date_range, use modern pandas frequency strings
+            if self == Frequency.M:
+                return "ME"  # Month End
+            elif self == Frequency.A:
+                return "YE"  # Year End
+            elif self == Frequency.Q:
+                return "QE"  # Quarter End
+        else:
+            # For periods, use legacy frequency strings
+            if self == Frequency.M:
+                return "M"  # Month for periods
+            elif self == Frequency.A:
+                return "Y"  # Year for periods (not YE)
+            elif self == Frequency.Q:
+                return "Q"  # Quarter for periods (not QE)
+        # Construct frequency string for other frequencies
+        if prefix:
+            return f"{prefix}{base}"
+        else:
+            return base
+    def to_pandas_offset(self) -> str:
+        """Get pandas offset string for time delta calculations."""
+        return FREQUENCY_TO_OFFSET[self]
+    def get_days_per_period(self) -> float:
+        """Get approximate days per period for this frequency."""
+        _, _, days = FREQUENCY_MAPPING[self]
+        return days
+    def get_max_safe_length(self) -> int:
+        """Get maximum safe sequence length to prevent timestamp overflow."""
+        return ALL_FREQUENCY_MAX_LENGTHS.get(self, float("inf"))
+    def is_high_frequency(self) -> bool:
+        """Check if this is a high frequency (minute/second level)."""
+        return self in [
+            Frequency.S,
+            Frequency.T1,
+            Frequency.T5,
+            Frequency.T10,
+            Frequency.T15,
+            Frequency.T30,
+        ]
+    def is_low_frequency(self) -> bool:
+        """Check if this is a low frequency (annual/quarterly/monthly)."""
+        return self in [Frequency.A, Frequency.Q, Frequency.M]
+    def get_seasonality(self) -> int:
+        """Get typical seasonality for this frequency."""
+        seasonality_map = {
+            Frequency.S: 3600,  # 1 hour of seconds
+            Frequency.T1: 60,  # 1 hour of minutes
+            Frequency.T5: 12,  # 1 hour of 5-minute intervals
+            Frequency.T10: 6,  # 1 hour of 10-minute intervals
+            Frequency.T15: 4,  # 1 hour of 15-minute intervals
+            Frequency.T30: 2,  # 1 hour of 30-minute intervals
+            Frequency.H: 24,  # 1 day of hours
+            Frequency.D: 7,  # 1 week of days
+            Frequency.W: 52,  # 1 year of weeks
+            Frequency.M: 12,  # 1 year of months
+            Frequency.Q: 4,  # 1 year of quarters
+            Frequency.A: 1,  # No clear seasonality for annual
+        }
+        return seasonality_map.get(self, 1)
+    def get_gift_eval_weight(self) -> float:
+        """Get GIFT eval dataset frequency weight."""
+        return GIFT_EVAL_FREQUENCY_WEIGHTS.get(self, 0.1)
+    def get_length_range(self) -> Tuple[int, int, int, int]:
+        """Get (min_length, max_length, optimal_start, optimal_end) for this frequency."""
+        return GIFT_EVAL_LENGTH_RANGES.get(self, (50, 1000, 100, 500))
+# ============================================================================
+# Frequency Mappings and Constants
+# ============================================================================
+# Core frequency mapping: (pandas_base, prefix, days_per_period)
+FREQUENCY_MAPPING: Dict[Frequency, Tuple[str, str, float]] = {
+    Frequency.A: (
+        "YE",
+        "",
+        365.25,
+    ),  # Average days per year (accounting for leap years)
+    Frequency.Q: ("Q", "", 91.3125),  # 365.25/4 - average days per quarter
+    Frequency.M: ("M", "", 30.4375),  # 365.25/12 - average days per month
+    Frequency.W: ("W", "", 7),
+    Frequency.D: ("D", "", 1),
+    Frequency.H: ("h", "", 1 / 24),
+    Frequency.S: ("s", "", 1 / 86400),  # 24*60*60
+    Frequency.T1: ("min", "1", 1 / 1440),  # 24*60
+    Frequency.T5: ("min", "5", 1 / 288),  # 24*60/5
+    Frequency.T10: ("min", "10", 1 / 144),  # 24*60/10
+    Frequency.T15: ("min", "15", 1 / 96),  # 24*60/15
+    Frequency.T30: ("min", "30", 1 / 48),  # 24*60/30
+}
+# Frequency to pandas offset mapping for calculating time deltas
+FREQUENCY_TO_OFFSET: Dict[Frequency, str] = {
+    Frequency.A: "AS",  # Annual start
+    Frequency.Q: "QS",  # Quarter start
+    Frequency.M: "MS",  # Month start
+    Frequency.W: "W",  # Weekly
+    Frequency.D: "D",  # Daily
+    Frequency.H: "H",  # Hourly
+    Frequency.T1: "1T",  # 1 minute
+    Frequency.T5: "5T",  # 5 minutes
+    Frequency.T10: "10T",  # 10 minutes
+    Frequency.T15: "15T",  # 15 minutes
+    Frequency.T30: "30T",  # 30 minutes
+    Frequency.S: "S",  # Seconds
+}
+# Maximum sequence lengths to avoid pandas OutOfBoundsDatetime errors
+SHORT_FREQUENCY_MAX_LENGTHS = {
+    Frequency.A: MAX_YEARS,
+    Frequency.Q: MAX_YEARS * 4,
+    Frequency.M: MAX_YEARS * 12,
+    Frequency.W: int(MAX_YEARS * 52.1775),
+    Frequency.D: int(MAX_YEARS * 365.2425),
+}
+HIGH_FREQUENCY_MAX_LENGTHS = {
+    Frequency.H: int(MAX_YEARS * 365.2425 * 24),
+    Frequency.S: int(MAX_YEARS * 365.2425 * 24 * 60 * 60),
+    Frequency.T1: int(MAX_YEARS * 365.2425 * 24 * 60),
+    Frequency.T5: int(MAX_YEARS * 365.2425 * 24 * 12),
+    Frequency.T10: int(MAX_YEARS * 365.2425 * 24 * 6),
+    Frequency.T15: int(MAX_YEARS * 365.2425 * 24 * 4),
+    Frequency.T30: int(MAX_YEARS * 365.2425 * 24 * 2),
+}
+# Combined max lengths for all frequencies
+ALL_FREQUENCY_MAX_LENGTHS = {
+    **SHORT_FREQUENCY_MAX_LENGTHS,
+    **HIGH_FREQUENCY_MAX_LENGTHS,
+}
+# GIFT eval-based frequency weights from actual dataset analysis
+GIFT_EVAL_FREQUENCY_WEIGHTS: Dict[Frequency, float] = {
+    Frequency.H: 25.0,  # Hourly - most common
+    Frequency.D: 23.4,  # Daily - second most common
+    Frequency.W: 12.9,  # Weekly - third most common
+    Frequency.T15: 9.7,  # 15-minute
+    Frequency.T5: 9.7,  # 5-minute
+    Frequency.M: 7.3,  # Monthly
+    Frequency.T10: 4.8,  # 10-minute
+    Frequency.S: 4.8,  # 10-second
+    Frequency.T1: 1.6,  # 1-minute
+    Frequency.Q: 0.8,  # Quarterly
+    Frequency.A: 0.8,  # Annual
+}
+# GIFT eval-based length ranges derived from actual dataset analysis
+# Format: (min_length, max_length, optimal_start, optimal_end)
+GIFT_EVAL_LENGTH_RANGES: Dict[Frequency, Tuple[int, int, int, int]] = {
+    # Low frequency ranges (based on actual GIFT eval data + logical extensions)
+    Frequency.A: (25, 100, 30, 70),
+    Frequency.Q: (25, 150, 50, 120),
+    Frequency.M: (40, 1000, 100, 600),
+    Frequency.W: (50, 3500, 100, 1500),
+    # Medium frequency ranges
+    Frequency.D: (150, 25000, 300, 7000),  # Daily: covers 1-year+ scenarios
+    Frequency.H: (600, 35000, 700, 17000),
+    # High frequency ranges (extended for shorter realistic scenarios)
+    Frequency.T1: (200, 2500, 1200, 1800),  # 1-minute: day to few days
+    Frequency.S: (7500, 9500, 7900, 9000),
+    Frequency.T15: (1000, 140000, 50000, 130000),
+    Frequency.T5: (200, 105000, 20000, 95000),
+    Frequency.T10: (40000, 55000, 47000, 52000),
+    Frequency.T30: (100, 50000, 10000, 40000),
+}
+# ============================================================================
+# Frequency Parsing and Validation
+# ============================================================================
+def parse_frequency(freq_str: str) -> Frequency:
+    """
+    Parse frequency string to Frequency enum, robust to variations.
+    Handles various frequency string formats:
+    - Standard: "A", "Q", "M", "W", "D", "H", "S"
+    - Pandas-style: "A-DEC", "W-SUN", "QE-MAR"
+    - Minutes: "5T", "10min", "1T"
+    - Case variations: "a", "h", "D"
+    Args:
+        freq_str: The frequency string to parse (e.g., "5T", "W-SUN", "M")
+    Returns:
+        Corresponding Frequency enum member
+    Raises:
+        ValueError: If the frequency string is not supported
+    """
+    # Handle minute-based frequencies BEFORE pandas standardization
+    # because pandas converts "5T" to just "min", losing the multiplier
+    minute_match = re.match(r"^(\d*)T$", freq_str, re.IGNORECASE) or re.match(
+        r"^(\d*)min$", freq_str, re.IGNORECASE
+    )
+    if minute_match:
+        multiplier = int(minute_match.group(1)) if minute_match.group(1) else 1
+        enum_key = f"T{multiplier}"
+        try:
+            return Frequency[enum_key]
+        except KeyError:
+            logger.warning(
+                f"Unsupported minute frequency '{freq_str}' (multiplier: {multiplier}). "
+                f"Falling back to '1min' ({Frequency.T1.value})."
+            )
+            return Frequency.T1
+    # Now standardize frequency string for other cases
+    try:
+        offset = pd.tseries.frequencies.to_offset(freq_str)
+        standardized_freq = offset.name
+    except Exception:
+        standardized_freq = freq_str
+    # Handle other frequencies by their base (e.g., 'W-SUN' -> 'W', 'A-DEC' -> 'A')
+    base_freq = standardized_freq.split("-")[0].upper()
+    freq_map = {
+        "A": Frequency.A,
+        "Y": Frequency.A,  # Alias for Annual
+        "YE": Frequency.A,  # Alias for Annual
+        "Q": Frequency.Q,
+        "QE": Frequency.Q,  # Alias for Quarterly
+        "M": Frequency.M,
+        "ME": Frequency.M,  # Alias for Monthly
+        "W": Frequency.W,
+        "D": Frequency.D,
+        "H": Frequency.H,
+        "S": Frequency.S,
+    }
+    if base_freq in freq_map:
+        return freq_map[base_freq]
+    raise NotImplementedError(f"Frequency '{standardized_freq}' is not supported.")
+def validate_frequency_safety(
+    start_date: np.datetime64, total_length: int, frequency: Frequency
+) -> bool:
+    """
+    Check if start date and frequency combination is safe for pandas datetime operations.
+    This function verifies that pd.date_range(start=start_date, periods=total_length, freq=freq_str)
+    will not raise an OutOfBoundsDatetime error, accounting for pandas' datetime bounds
+    (1677-09-21 to 2262-04-11) and realistic frequency limitations.
+    Args:
+        start_date: The proposed start date for the time series
+        total_length: Total length of the time series
+        frequency: The frequency of the time series
+    Returns:
+        True if the combination is safe, False otherwise
+    """
+    try:
+        # Get the pandas frequency string
+        freq_str = frequency.to_pandas_freq(for_date_range=True)
+        # Convert numpy datetime64 to pandas Timestamp for date_range
+        start_pd = pd.Timestamp(start_date)
+        # Check if start date is within pandas' valid datetime range
+        if start_pd < pd.Timestamp.min or start_pd > pd.Timestamp.max:
+            return False
+        # Check maximum length constraints
+        max_length = frequency.get_max_safe_length()
+        if total_length > max_length:
+            return False
+        # For low frequencies, be extra conservative
+        if frequency.is_low_frequency():
+            if frequency == Frequency.A and total_length > 500:  # Max ~500 years
+                return False
+            elif frequency == Frequency.Q and total_length > 2000:  # Max ~500 years
+                return False
+            elif frequency == Frequency.M and total_length > 6000:  # Max ~500 years
+                return False
+        # Calculate approximate end date
+        days_per_period = frequency.get_days_per_period()
+        approx_days = total_length * days_per_period
+        # For annual/quarterly frequencies, add extra safety margin
+        if frequency in [Frequency.A, Frequency.Q]:
+            approx_days *= 1.1  # 10% safety margin
+        end_date = start_pd + pd.Timedelta(days=approx_days)
+        # Check if end date is within pandas' valid datetime range
+        if end_date < pd.Timestamp.min or end_date > pd.Timestamp.max:
+            return False
+        # Try to create the date range as final validation
+        pd.date_range(start=start_pd, periods=total_length, freq=freq_str)
+        return True
+    except (pd.errors.OutOfBoundsDatetime, OverflowError, ValueError):
+        return False
+# ============================================================================
+# Frequency Selection Utilities
+# ============================================================================
+def select_safe_random_frequency(total_length: int, rng: Generator) -> Frequency:
+    """
+    Select a random frequency suitable for a given total length of a time series,
+    based on actual GIFT eval dataset patterns and distributions.
+    The selection logic:
+    1. Filters frequencies that can handle the given total_length
+    2. Applies base weights derived from actual GIFT eval frequency distribution
+    3. Strongly boosts frequencies that are in their optimal length ranges
+    4. Handles edge cases gracefully with fallbacks
+    Args:
+        total_length: The total length of the time series (history + future)
+        rng: A numpy random number generator instance
+    Returns:
+        A randomly selected frequency that matches GIFT eval patterns
+    """
+    # Find valid frequencies and calculate weighted scores
+    valid_frequencies = []
+    frequency_scores = []
+    for freq in Frequency:
+        # Check basic timestamp overflow limits
+        max_allowed = freq.get_max_safe_length()
+        if total_length > max_allowed:
+            continue
+        # Check if frequency has defined ranges
+        min_len, max_len, optimal_start, optimal_end = freq.get_length_range()
+        # Must be within the frequency's realistic range
+        if total_length < min_len or total_length > max_len:
+            continue
+        valid_frequencies.append(freq)
+        # Calculate fitness score based on GIFT eval patterns
+        base_weight = freq.get_gift_eval_weight()
+        # Enhanced length-based fitness scoring
+        if optimal_start <= total_length <= optimal_end:
+            # In optimal range - very strong preference
+            length_multiplier = 5.0
+        else:
+            # Outside optimal but within valid range - calculate penalty
+            if total_length < optimal_start:
+                # Below optimal range
+                distance_ratio = (optimal_start - total_length) / (
+                    optimal_start - min_len
+                )
+            else:
+                # Above optimal range
+                distance_ratio = (total_length - optimal_end) / (max_len - optimal_end)
+            # Apply graduated penalty: closer to optimal = higher score
+            length_multiplier = 0.3 + 1.2 * (1.0 - distance_ratio)  # Range: 0.3-1.5
+        final_score = base_weight * length_multiplier
+        frequency_scores.append(final_score)
+    # Handle edge cases with smart fallbacks
+    if not valid_frequencies:
+        # Fallback strategy based on typical length patterns
+        if total_length <= 100:
+            # Very short series - prefer low frequencies
+            fallback_order = [
+                Frequency.A,
+                Frequency.Q,
+                Frequency.M,
+                Frequency.W,
+                Frequency.D,
+            ]
+        elif total_length <= 1000:
+            # Medium short series - prefer daily/weekly
+            fallback_order = [Frequency.D, Frequency.W, Frequency.H, Frequency.M]
+        else:
+            # Longer series - prefer higher frequencies
+            fallback_order = [Frequency.H, Frequency.D, Frequency.T15, Frequency.T5]
+        for fallback_freq in fallback_order:
+            max_allowed = fallback_freq.get_max_safe_length()
+            if total_length <= max_allowed:
+                return fallback_freq
+        # Last resort
+        return Frequency.D
+    if len(valid_frequencies) == 1:
+        return valid_frequencies[0]
+    # Select based on weighted probabilities
+    scores = np.array(frequency_scores)
+    probabilities = scores / scores.sum()
+    return rng.choice(valid_frequencies, p=probabilities)
+def select_safe_start_date(
+    total_length: int,
+    frequency: Frequency,
+    rng: Generator = np.random.default_rng(),
+    max_retries: int = 10,
+) -> np.datetime64:
+    """
+    Select a safe start date that ensures the entire time series (history + future)
+    will not exceed pandas' datetime bounds.
+    Args:
+        total_length: Total length of the time series (history + future)
+        frequency: Time series frequency
+        rng: Random number generator instance
+        max_retries: Maximum number of retry attempts
+    Returns:
+        A safe start date that prevents timestamp overflow
+    Raises:
+        ValueError: If no safe start date is found after max_retries or if the required
+                   time span exceeds the available date window
+    """
+    days_per_period = frequency.get_days_per_period()
+    # Calculate approximate duration in days
+    total_days = total_length * days_per_period
+    # Define safe bounds: ensure end date doesn't exceed BASE_END_DATE
+    latest_safe_start = BASE_END_DATE - np.timedelta64(int(total_days), "D")
+    earliest_safe_start = BASE_START_DATE
+    # Check if the required time span exceeds the available window
+    if latest_safe_start < earliest_safe_start:
+        available_days = (
+            (BASE_END_DATE - BASE_START_DATE).astype("timedelta64[D]").astype(int)
+        )
+        available_years = available_days / 365.25
+        required_years = total_days / 365.25
+        raise ValueError(
+            f"Required time span ({required_years:.1f} years, {total_days:.0f} days) "
+            f"exceeds available date window ({available_years:.1f} years, {available_days} days). "
+            f"Reduce total_length ({total_length}) or extend the date window."
+        )
+    # Convert to nanoseconds for random sampling
+    earliest_ns = earliest_safe_start.astype("datetime64[ns]").astype(np.int64)
+    latest_ns = latest_safe_start.astype("datetime64[ns]").astype(np.int64)
+    for _ in range(max_retries):
+        # Uniformly sample a start date within bounds
+        random_ns = rng.integers(earliest_ns, latest_ns + 1)
+        start_date = np.datetime64(int(random_ns), "ns")
+        # Verify safety
+        if validate_frequency_safety(start_date, total_length, frequency):
+            return start_date
+    # Default to base start date if no safe start date is found
+    return BASE_START_DATE

src/data/loaders.py ADDED Viewed

	@@ -0,0 +1,661 @@

+import logging
+import random
+from typing import Dict, Iterator, List, Optional
+import numpy as np
+import pandas as pd
+import torch
+from src.data.batch_composer import BatchComposer, ComposedDataset
+from src.data.containers import BatchTimeSeriesContainer
+from src.data.frequency import parse_frequency
+from src.gift_eval.constants import ALL_DATASETS
+from src.gift_eval.data import Dataset as GiftEvalDataset
+logger = logging.getLogger(__name__)
+class GiftEvalDataLoader:
+    """
+    Data loader for GIFT-eval datasets, converting them to BatchTimeSeriesContainer format.
+    Supports both training and validation modes.
+    """
+    TERMS = ["short", "medium", "long"]
+    def __init__(
+        self,
+        mode: str = "train",
+        batch_size: int = 32,
+        device: Optional[torch.device] = None,
+        shuffle: bool = True,
+        to_univariate: bool = False,
+        max_context_length: Optional[int] = None,
+        max_windows: int = 20,
+        skip_datasets_with_nans: bool = False,
+        datasets_to_use: Optional[List[str]] = None,
+        dataset_storage_path: Optional[str] = None,
+    ):
+        """
+        Initialize GIFT-eval data loader.
+        Args:
+            mode: Either "train" or "validation"
+            batch_size: Number of samples per batch
+            device: Device to load data to
+            shuffle: Whether to shuffle data
+            to_univariate: Whether to convert multivariate data to multiple univariate series
+            max_context_length: Optional maximum total window length (context + forecast) to prevent memory issues
+            max_windows: Number of windows to use for training/validation
+            skip_datasets_with_nans: Whether to skip datasets/series that contain NaN values
+            datasets_to_use: Optional list of dataset names to use. If None, uses all available datasets
+            dataset_storage_path: Path on disk where GIFT-eval HuggingFace datasets are stored
+        """
+        # Use specified datasets or all available datasets if none specified
+        if datasets_to_use is not None and len(datasets_to_use) > 0:
+            # Validate that requested datasets are available
+            invalid_datasets = [ds for ds in datasets_to_use if ds not in ALL_DATASETS]
+            if invalid_datasets:
+                logger.warning(f"Invalid datasets requested: {invalid_datasets}")
+                logger.warning(f"Available datasets: {ALL_DATASETS}")
+                # Use only valid datasets
+                self.dataset_names = [
+                    ds for ds in datasets_to_use if ds in ALL_DATASETS
+                ]
+            else:
+                self.dataset_names = datasets_to_use
+        else:
+            self.dataset_names = ALL_DATASETS
+        # Log dataset selection
+        if datasets_to_use is not None and len(datasets_to_use) > 0:
+            logger.info(
+                f"Using subset of datasets: {len(self.dataset_names)}/{len(ALL_DATASETS)} datasets"
+            )
+            logger.info(f"Selected datasets: {self.dataset_names}")
+        else:
+            logger.info(
+                f"Using all available datasets: {len(self.dataset_names)} datasets"
+            )
+        self.terms = self.TERMS
+        self.mode = mode
+        self.batch_size = batch_size
+        self.device = device
+        self.shuffle = shuffle
+        self.to_univariate = to_univariate
+        self.max_context_length = max_context_length
+        self.skip_datasets_with_nans = skip_datasets_with_nans
+        # Window configuration based on mode
+        self.max_windows = max_windows
+        self.dataset_storage_path = dataset_storage_path
+        # Load all datasets and prepare data
+        self._load_datasets()
+        # Create iterator state
+        self._current_idx = 0
+        self._epoch_data = []
+        self._prepare_epoch_data()
+    def _load_datasets(self) -> None:
+        """Load all specified GIFT-eval datasets."""
+        self.datasets = {}
+        self.dataset_prediction_lengths = {}
+        for dataset_name in self.dataset_names:
+            if dataset_name.startswith("m4_"):
+                max_windows = 1
+            else:
+                max_windows = self.max_windows
+            try:
+                # Determine if we need univariate conversion
+                # First check with multivariate to see target dimension
+                temp_dataset = GiftEvalDataset(
+                    name=dataset_name,
+                    term=self.terms[0],  # Use first term to check dimensionality
+                    to_univariate=False,
+                    max_windows=max_windows,
+                    storage_path=self.dataset_storage_path,
+                )
+                # Convert to univariate if needed
+                to_univariate = self.to_univariate and temp_dataset.target_dim > 1
+                # Load datasets for all terms
+                for term in self.terms:
+                    dataset_key = f"{dataset_name}_{term}"
+                    dataset = GiftEvalDataset(
+                        name=dataset_name,
+                        term=term,
+                        to_univariate=to_univariate,
+                        max_windows=max_windows,
+                        storage_path=self.dataset_storage_path,
+                    )
+                    self.datasets[dataset_key] = dataset
+                    self.dataset_prediction_lengths[dataset_key] = (
+                        dataset.prediction_length
+                    )
+                    logger.info(
+                        f"Loaded {dataset_key} - prediction_length: {dataset.prediction_length}, "
+                        f"frequency: {dataset.freq}, target_dim: {dataset.target_dim}, "
+                        f"min_length: {dataset._min_series_length}, windows: {dataset.windows}"
+                    )
+            except Exception as e:
+                logger.warning(f"Failed to load dataset {dataset_name}: {str(e)}")
+                continue
+    def _contains_nan(self, data_entry: dict) -> bool:
+        """Check if a data entry contains NaN values."""
+        target = data_entry.get("target")
+        if target is None:
+            return False
+        # Convert to numeric numpy array for robust NaN checking
+        try:
+            target_np = np.asarray(target, dtype=np.float32)
+            return np.isnan(target_np).any()
+        except Exception:
+            logger.warning(
+                "NaN check: failed to coerce target to float32; skipping entry"
+            )
+            return True
+    def _convert_to_container(
+        self, data_entries: List[dict], prediction_length: int, dataset_freq: str
+    ) -> BatchTimeSeriesContainer:
+        """Convert a batch of data entries to BatchTimeSeriesContainer format with fixed future length."""
+        batch_size = len(data_entries)
+        max_history_len = 0
+        # First pass: determine max history length after truncation
+        for entry in data_entries:
+            target = np.asarray(entry["target"], dtype=np.float32)
+            if target.ndim == 1:
+                target = target.reshape(1, -1)
+            _, seq_len = target.shape
+            # Only consider up to the last (max_context_length) values
+            effective_max_context = (
+                self.max_context_length
+                if self.max_context_length is not None
+                else seq_len
+            )
+            if seq_len > effective_max_context:
+                seq_len = effective_max_context
+            # History is up to (max_context_length - prediction_length)
+            history_len = max(
+                0, min(seq_len, effective_max_context) - prediction_length
+            )
+            max_history_len = max(max_history_len, history_len)
+        # Get number of channels from first entry
+        first_target = np.asarray(data_entries[0]["target"], dtype=np.float32)
+        if first_target.ndim == 1:
+            # Shape to [channels, time]
+            first_target = first_target.reshape(1, -1)
+        num_channels = first_target.shape[0]
+        # Allocate arrays
+        history_values = np.full(
+            (batch_size, max_history_len, num_channels), np.nan, dtype=np.float32
+        )
+        future_values = np.full(
+            (batch_size, prediction_length, num_channels), np.nan, dtype=np.float32
+        )
+        history_mask = np.zeros((batch_size, max_history_len), dtype=bool)
+        # Second pass: fill arrays
+        for i, entry in enumerate(data_entries):
+            target = np.asarray(entry["target"], dtype=np.float32)
+            if target.ndim == 1:
+                target = target.reshape(1, -1)
+            # Truncate to last effective_max_context points if needed
+            full_seq_len = target.shape[1]
+            total_len_allowed = (
+                self.max_context_length
+                if self.max_context_length is not None
+                else full_seq_len
+            )
+            total_len_for_entry = min(full_seq_len, total_len_allowed)
+            if total_len_for_entry < prediction_length + 1:
+                # Not enough length to build (history + future). Signal to caller.
+                raise ValueError(
+                    "Entry too short after max_context_length truncation to form history+future window"
+                )
+            truncated = target[:, -total_len_for_entry:]
+            cur_history_len = total_len_for_entry - prediction_length
+            hist = truncated[:, :cur_history_len]  # [C, H]
+            fut = truncated[
+                :, cur_history_len : cur_history_len + prediction_length
+            ]  # [C, P]
+            # Write into batch arrays with time last -> transpose to [H, C] / [P, C]
+            history_values[i, :cur_history_len, :] = hist.T
+            future_values[i, :, :] = fut.T
+            history_mask[i, :cur_history_len] = True
+        # Get start timestamp and frequency (replicate across batch)
+        start_timestamp = data_entries[0]["start"]
+        if hasattr(start_timestamp, "to_timestamp"):
+            start_numpy = start_timestamp.to_timestamp().to_numpy()
+        else:
+            start_numpy = pd.Timestamp(start_timestamp).to_numpy()
+        start_list = [start_numpy for _ in range(batch_size)]
+        # Get frequency enum and replicate across batch
+        frequency_enum = parse_frequency(dataset_freq)
+        frequency_list = [frequency_enum for _ in range(batch_size)]
+        # Create the container
+        return BatchTimeSeriesContainer(
+            history_values=torch.tensor(history_values, dtype=torch.float32),
+            future_values=torch.tensor(future_values, dtype=torch.float32),
+            start=start_list,
+            frequency=frequency_list,
+            history_mask=torch.tensor(history_mask, dtype=torch.bool)
+            if self.mode == "train"
+            else None,
+        )
+    def _prepare_epoch_data(self) -> None:
+        """Prepare all batches for one epoch."""
+        self._epoch_data = []
+        for dataset_key, dataset in self.datasets.items():
+            try:
+                # Get appropriate dataset based on mode
+                if self.mode == "train":
+                    data = dataset.training_dataset
+                else:
+                    data = dataset.validation_dataset
+                # Collect all valid data entries
+                valid_entries = []
+                dataset_freq = dataset.freq
+                prediction_length = self.dataset_prediction_lengths[dataset_key]
+                for entry in data:
+                    # Skip if contains NaN and configured to do so
+                    if self.skip_datasets_with_nans and self._contains_nan(entry):
+                        continue
+                    # Check if we have enough data
+                    target = np.asarray(entry["target"])
+                    if target.ndim == 1:
+                        seq_len = len(target)
+                    else:
+                        seq_len = target.shape[1]
+                    # Need at least prediction_length + 1 for training
+                    if self.mode == "train" and seq_len < prediction_length + 1:
+                        continue
+                    valid_entries.append(entry)
+                if not valid_entries:
+                    logger.warning(f"No valid entries found for {dataset_key}")
+                    continue
+                # Create batches
+                for i in range(0, len(valid_entries), self.batch_size):
+                    batch_entries = valid_entries[i : i + self.batch_size]
+                    try:
+                        batch_container = self._convert_to_container(
+                            batch_entries, prediction_length, dataset_freq
+                        )
+                        self._epoch_data.append((dataset_key, batch_container))
+                    except Exception as e:
+                        logger.warning(
+                            f"Failed to create batch for {dataset_key}: {str(e)}"
+                        )
+                        continue
+            except Exception as e:
+                logger.warning(
+                    f"Failed to process dataset {dataset_key}: {str(e)}. "
+                    f"Dataset may be too short for the required offset."
+                )
+                continue
+        # Shuffle if in training mode
+        if self.mode == "train" and self.shuffle:
+            random.shuffle(self._epoch_data)
+        logger.info(f"Prepared {len(self._epoch_data)} batches for {self.mode} mode")
+    def __iter__(self) -> Iterator[BatchTimeSeriesContainer]:
+        """Iterate through batches for one epoch."""
+        # Reset index at the start of each epoch
+        self._current_idx = 0
+        # Reshuffle data for each new epoch if in training mode
+        if self.mode == "train" and self.shuffle:
+            random.shuffle(self._epoch_data)
+        return self
+    def __next__(self) -> BatchTimeSeriesContainer:
+        """Get next batch."""
+        if not self._epoch_data:
+            raise StopIteration("No valid data available")
+        # Check if we've exhausted the epoch
+        if self._current_idx >= len(self._epoch_data):
+            raise StopIteration
+        # Get current batch
+        dataset_key, batch = self._epoch_data[self._current_idx]
+        self._current_idx += 1
+        # Move to device if specified
+        if self.device is not None:
+            batch.to_device(self.device)
+        return batch
+    def __len__(self) -> int:
+        """Return number of batches per epoch."""
+        return len(self._epoch_data)
+class CyclicGiftEvalDataLoader:
+    """
+    Wrapper for GiftEvalDataLoader that provides cycling behavior for training.
+    This allows training for a fixed number of iterations per epoch, cycling through
+    the available data as needed.
+    """
+    def __init__(self, base_loader: GiftEvalDataLoader, num_iterations_per_epoch: int):
+        """
+        Initialize the cyclic data loader.
+        Args:
+            base_loader: The underlying GiftEvalDataLoader
+            num_iterations_per_epoch: Number of iterations to run per epoch
+        """
+        self.base_loader = base_loader
+        self.num_iterations_per_epoch = num_iterations_per_epoch
+        self.dataset_names = base_loader.dataset_names
+        self.device = base_loader.device
+    def __iter__(self) -> Iterator[BatchTimeSeriesContainer]:
+        """Iterate for exactly num_iterations_per_epoch iterations."""
+        self._current_iteration = 0
+        self._base_iter = iter(self.base_loader)
+        return self
+    def __next__(self) -> BatchTimeSeriesContainer:
+        """Get next batch, cycling through base loader as needed."""
+        if self._current_iteration >= self.num_iterations_per_epoch:
+            raise StopIteration
+        try:
+            batch = next(self._base_iter)
+        except StopIteration:
+            # Restart the base iterator when exhausted
+            self._base_iter = iter(self.base_loader)
+            batch = next(self._base_iter)
+        self._current_iteration += 1
+        return batch
+    def __len__(self) -> int:
+        """Return the configured number of iterations per epoch."""
+        return self.num_iterations_per_epoch
+def create_synthetic_dataloader(
+    base_data_dir: str,
+    batch_size: int = 128,
+    num_batches_per_epoch: int = 1000,
+    generator_proportions: Optional[Dict[str, float]] = None,
+    mixed_batches: bool = True,
+    augmentations: Optional[Dict[str, bool]] = None,
+    augmentation_probabilities: Optional[Dict[str, float]] = None,
+    device: Optional[torch.device] = None,
+    num_workers: int = 0,
+    pin_memory: bool = True,
+    global_seed: int = 42,
+    nan_stats_path: Optional[str] = None,
+    nan_patterns_path: Optional[str] = None,
+    chosen_scaler_name: Optional[str] = None,
+) -> torch.utils.data.DataLoader:
+    """
+    Create a PyTorch DataLoader for training with saved generator batches.
+    Args:
+        base_data_dir: Base directory containing generator subdirectories
+        batch_size: Size of each training batch
+        num_batches_per_epoch: Number of batches per epoch
+        generator_proportions: Dict mapping generator names to proportions
+        mixed_batches: Whether to create mixed or uniform batches
+        augmentations: Dict mapping augmentation names to booleans
+        augmentation_probabilities: Dict mapping augmentation names to probabilities
+        device: Target device
+        num_workers: Number of DataLoader workers
+        pin_memory: Whether to pin memory
+        global_seed: Global random seed
+        nan_stats_path: Path to nan stats file
+        chosen_scaler_name: Name of the scaler that used in training
+    Returns:
+        PyTorch DataLoader
+    """
+    # Create batch composer
+    composer = BatchComposer(
+        base_data_dir=base_data_dir,
+        generator_proportions=generator_proportions,
+        mixed_batches=mixed_batches,
+        device=device,
+        augmentations=augmentations,
+        augmentation_probabilities=augmentation_probabilities,
+        global_seed=global_seed,
+        nan_stats_path=nan_stats_path,
+        nan_patterns_path=nan_patterns_path,
+        chosen_scaler_name=chosen_scaler_name,
+    )
+    # Create dataset
+    dataset = ComposedDataset(
+        batch_composer=composer,
+        num_batches_per_epoch=num_batches_per_epoch,
+        batch_size=batch_size,
+    )
+    # Custom collate function for BatchTimeSeriesContainer
+    def collate_fn(batch):
+        """Custom collate function that returns a single BatchTimeSeriesContainer."""
+        # Since each item is already a BatchTimeSeriesContainer with batch_size samples,
+        # and DataLoader batch_size=1, we just return the first (and only) item
+        return batch[0]
+    # Create DataLoader
+    dataloader = torch.utils.data.DataLoader(
+        dataset,
+        batch_size=1,  # Each dataset item is already a complete batch
+        shuffle=False,
+        num_workers=num_workers,
+        pin_memory=pin_memory,
+        collate_fn=collate_fn,
+        drop_last=False,
+    )
+    logger.info(
+        f"Created DataLoader with {len(dataset)} batches per epoch, "
+        f"batch_size={batch_size}, mixed_batches={mixed_batches}"
+    )
+    return dataloader
+class SyntheticValidationDataset(torch.utils.data.Dataset):
+    """
+    Fixed synthetic validation dataset that generates a small number of batches
+    using the same composition approach as training data.
+    """
+    def __init__(
+        self,
+        base_data_dir: str,
+        batch_size: int = 128,
+        num_batches: int = 2,
+        future_length: int = 512,
+        generator_proportions: Optional[Dict[str, float]] = None,
+        augmentations: Optional[Dict[str, bool]] = None,
+        augmentation_probabilities: Optional[Dict[str, float]] = None,
+        device: Optional[torch.device] = None,
+        global_seed: int = 42,
+        chosen_scaler_name: Optional[str] = None,
+        nan_stats_path: Optional[str] = None,
+        nan_patterns_path: Optional[str] = None,
+        rank: int = 0,
+        world_size: int = 1,
+    ):
+        """
+        Initialize the validation dataset.
+        Args:
+            base_data_dir: Base directory containing generator subdirectories
+            batch_size: Size of each validation batch
+            num_batches: Number of validation batches to generate (1 or 2)
+            generator_proportions: Dict mapping generator names to proportions
+            device: Device to load tensors to
+            global_seed: Global random seed
+            chosen_scaler_name: Name of the scaler that used in training
+        """
+        self.batch_size = batch_size
+        self.num_batches = num_batches
+        self.device = device
+        # Create batch composer; force validation to use max-length windows (no length shortening)
+        val_augmentations = dict(augmentations or {})
+        val_augmentations["length_shortening"] = False
+        self.batch_composer = BatchComposer(
+            base_data_dir=base_data_dir,
+            generator_proportions=generator_proportions,
+            mixed_batches=True,  # Use mixed batches for validation
+            device=device,
+            global_seed=global_seed + 999999,
+            augmentations=val_augmentations,
+            augmentation_probabilities=augmentation_probabilities,
+            nan_stats_path=nan_stats_path,
+            nan_patterns_path=nan_patterns_path,
+            chosen_scaler_name=chosen_scaler_name,
+            rank=rank,
+            world_size=world_size,
+        )
+        # Pre-generate fixed validation batches
+        self.validation_batches = []
+        for i in range(num_batches):
+            batch, _ = self.batch_composer.create_batch(
+                batch_size=batch_size,
+                future_length=future_length,
+                seed=global_seed
+                + 999999
+                + i,  # Fixed seeds for reproducible validation
+            )
+            self.validation_batches.append(batch)
+        logger.info(
+            f"Created {num_batches} fixed validation batches with batch_size={batch_size}"
+        )
+    def __len__(self) -> int:
+        return self.num_batches
+    def __getitem__(self, idx: int) -> BatchTimeSeriesContainer:
+        """
+        Get a pre-generated validation batch by index.
+        Args:
+            idx: Batch index
+        Returns:
+            BatchTimeSeriesContainer
+        """
+        if idx >= len(self.validation_batches):
+            raise IndexError(f"Batch index {idx} out of range")
+        batch = self.validation_batches[idx]
+        # Move to device if needed
+        if self.device is not None:
+            batch.to_device(self.device)
+        return batch
+def create_synthetic_dataset(
+    base_data_dir: str,
+    batch_size: int = 128,
+    num_batches_per_epoch: int = 1000,
+    generator_proportions: Optional[Dict[str, float]] = None,
+    mixed_batches: bool = True,
+    augmentations: Optional[Dict[str, bool]] = None,
+    augmentation_probabilities: Optional[Dict[str, float]] = None,
+    global_seed: int = 42,
+    nan_stats_path: Optional[str] = None,
+    nan_patterns_path: Optional[str] = None,
+    chosen_scaler_name: Optional[str] = None,
+    rank: int = 0,
+    world_size: int = 1,
+) -> ComposedDataset:
+    """
+    Creates the ComposedDataset for training with saved generator batches.
+    Args:
+        base_data_dir: Base directory containing generator subdirectories.
+        batch_size: Size of each training batch.
+        num_batches_per_epoch: Number of batches per epoch.
+        generator_proportions: Dict mapping generator names to proportions.
+        mixed_batches: Whether to create mixed or uniform batches.
+        augmentations: Dict mapping augmentation names to booleans.
+        global_seed: Global random seed.
+        nan_stats_path: Path to nan stats file.
+        chosen_scaler_name: Name of the scaler to use.
+    Returns:
+        A ComposedDataset instance.
+    """
+    # Create batch composer
+    composer = BatchComposer(
+        base_data_dir=base_data_dir,
+        generator_proportions=generator_proportions,
+        mixed_batches=mixed_batches,
+        device=None,  # Device is handled in the training loop
+        augmentations=augmentations,
+        augmentation_probabilities=augmentation_probabilities,
+        global_seed=global_seed,
+        nan_stats_path=nan_stats_path,
+        nan_patterns_path=nan_patterns_path,
+        chosen_scaler_name=chosen_scaler_name,
+        rank=rank,
+        world_size=world_size,
+    )
+    # Create and return the dataset
+    dataset = ComposedDataset(
+        batch_composer=composer,
+        num_batches_per_epoch=num_batches_per_epoch,
+        batch_size=batch_size,
+    )
+    logger.info(
+        f"Created ComposedDataset with {len(dataset)} batches per epoch, "
+        f"batch_size={batch_size}, mixed_batches={mixed_batches}"
+    )
+    return dataset

src/data/scalers.py ADDED Viewed

	@@ -0,0 +1,360 @@

+from abc import ABC, abstractmethod
+from typing import Dict, Optional
+import torch
+class BaseScaler(ABC):
+    """
+    Abstract base class for time series scalers.
+    Defines the interface for scaling multivariate time series data with support
+    for masked values and channel-wise scaling.
+    """
+    @abstractmethod
+    def compute_statistics(
+        self, history_values: torch.Tensor, history_mask: Optional[torch.Tensor] = None
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Compute scaling statistics from historical data.
+        """
+        pass
+    @abstractmethod
+    def scale(
+        self, data: torch.Tensor, statistics: Dict[str, torch.Tensor]
+    ) -> torch.Tensor:
+        """
+        Apply scaling transformation to data.
+        """
+        pass
+    @abstractmethod
+    def inverse_scale(
+        self, scaled_data: torch.Tensor, statistics: Dict[str, torch.Tensor]
+    ) -> torch.Tensor:
+        """
+        Apply inverse scaling transformation to recover original scale.
+        """
+        pass
+class RobustScaler(BaseScaler):
+    """
+    Robust scaler using median and IQR for normalization.
+    """
+    def __init__(self, epsilon: float = 1e-6, min_scale: float = 1e-3):
+        if epsilon <= 0:
+            raise ValueError("epsilon must be positive")
+        if min_scale <= 0:
+            raise ValueError("min_scale must be positive")
+        self.epsilon = epsilon
+        self.min_scale = min_scale
+    def compute_statistics(
+        self, history_values: torch.Tensor, history_mask: Optional[torch.Tensor] = None
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Compute median and IQR statistics from historical data with improved numerical stability.
+        """
+        batch_size, seq_len, num_channels = history_values.shape
+        device = history_values.device
+        medians = torch.zeros(batch_size, 1, num_channels, device=device)
+        iqrs = torch.ones(batch_size, 1, num_channels, device=device)
+        for b in range(batch_size):
+            for c in range(num_channels):
+                channel_data = history_values[b, :, c]
+                if history_mask is not None:
+                    mask = history_mask[b, :].bool()
+                    valid_data = channel_data[mask]
+                else:
+                    valid_data = channel_data
+                if len(valid_data) == 0:
+                    continue
+                valid_data = valid_data[torch.isfinite(valid_data)]
+                if len(valid_data) == 0:
+                    continue
+                median_val = torch.median(valid_data)
+                medians[b, 0, c] = median_val
+                if len(valid_data) > 1:
+                    try:
+                        q75 = torch.quantile(valid_data, 0.75)
+                        q25 = torch.quantile(valid_data, 0.25)
+                        iqr_val = q75 - q25
+                        iqr_val = torch.max(
+                            iqr_val, torch.tensor(self.min_scale, device=device)
+                        )
+                        iqrs[b, 0, c] = iqr_val
+                    except Exception:
+                        std_val = torch.std(valid_data)
+                        iqrs[b, 0, c] = torch.max(
+                            std_val, torch.tensor(self.min_scale, device=device)
+                        )
+                else:
+                    iqrs[b, 0, c] = self.min_scale
+        return {"median": medians, "iqr": iqrs}
+    def scale(
+        self, data: torch.Tensor, statistics: Dict[str, torch.Tensor]
+    ) -> torch.Tensor:
+        """
+        Apply robust scaling: (data - median) / (iqr + epsilon).
+        """
+        median = statistics["median"]
+        iqr = statistics["iqr"]
+        denominator = torch.max(
+            iqr + self.epsilon, torch.tensor(self.min_scale, device=iqr.device)
+        )
+        scaled_data = (data - median) / denominator
+        scaled_data = torch.clamp(scaled_data, -50.0, 50.0)
+        return scaled_data
+    def inverse_scale(
+        self, scaled_data: torch.Tensor, statistics: Dict[str, torch.Tensor]
+    ) -> torch.Tensor:
+        """
+        Apply inverse robust scaling, now compatible with 3D or 4D tensors.
+        """
+        median = statistics["median"]
+        iqr = statistics["iqr"]
+        denominator = torch.max(
+            iqr + self.epsilon, torch.tensor(self.min_scale, device=iqr.device)
+        )
+        if scaled_data.ndim == 4:
+            denominator = denominator.unsqueeze(-1)
+            median = median.unsqueeze(-1)
+        return scaled_data * denominator + median
+class MinMaxScaler(BaseScaler):
+    """
+    Min-Max scaler that normalizes data to the range [-1, 1].
+    """
+    def __init__(self, epsilon: float = 1e-8):
+        if epsilon <= 0:
+            raise ValueError("epsilon must be positive")
+        self.epsilon = epsilon
+    def compute_statistics(
+        self, history_values: torch.Tensor, history_mask: Optional[torch.Tensor] = None
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Compute min and max statistics from historical data.
+        """
+        batch_size, seq_len, num_channels = history_values.shape
+        device = history_values.device
+        mins = torch.zeros(batch_size, 1, num_channels, device=device)
+        maxs = torch.ones(batch_size, 1, num_channels, device=device)
+        for b in range(batch_size):
+            for c in range(num_channels):
+                channel_data = history_values[b, :, c]
+                if history_mask is not None:
+                    mask = history_mask[b, :].bool()
+                    valid_data = channel_data[mask]
+                else:
+                    valid_data = channel_data
+                if len(valid_data) == 0:
+                    continue
+                min_val = torch.min(valid_data)
+                max_val = torch.max(valid_data)
+                mins[b, 0, c] = min_val
+                maxs[b, 0, c] = max_val
+                if torch.abs(max_val - min_val) < self.epsilon:
+                    maxs[b, 0, c] = min_val + 1.0
+        return {"min": mins, "max": maxs}
+    def scale(
+        self, data: torch.Tensor, statistics: Dict[str, torch.Tensor]
+    ) -> torch.Tensor:
+        """
+        Apply min-max scaling to range [-1, 1].
+        """
+        min_val = statistics["min"]
+        max_val = statistics["max"]
+        normalized = (data - min_val) / (max_val - min_val + self.epsilon)
+        return normalized * 2.0 - 1.0
+    def inverse_scale(
+        self, scaled_data: torch.Tensor, statistics: Dict[str, torch.Tensor]
+    ) -> torch.Tensor:
+        """
+        Apply inverse min-max scaling, now compatible with 3D or 4D tensors.
+        """
+        min_val = statistics["min"]
+        max_val = statistics["max"]
+        if scaled_data.ndim == 4:
+            min_val = min_val.unsqueeze(-1)
+            max_val = max_val.unsqueeze(-1)
+        normalized = (scaled_data + 1.0) / 2.0
+        return normalized * (max_val - min_val + self.epsilon) + min_val
+class MeanScaler(BaseScaler):
+    """
+    A scaler that centers the data by subtracting the channel-wise mean.
+    This scaler only performs centering and does not affect the scale of the data.
+    """
+    def compute_statistics(
+            self, history_values: torch.Tensor, history_mask: Optional[torch.Tensor] = None
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Compute the mean for each channel from historical data.
+        """
+        batch_size, seq_len, num_channels = history_values.shape
+        device = history_values.device
+        # Initialize a tensor to store the mean for each channel in each batch item
+        means = torch.zeros(batch_size, 1, num_channels, device=device)
+        for b in range(batch_size):
+            for c in range(num_channels):
+                channel_data = history_values[b, :, c]
+                # Use the mask to select only valid (observed) data points
+                if history_mask is not None:
+                    mask = history_mask[b, :].bool()
+                    valid_data = channel_data[mask]
+                else:
+                    valid_data = channel_data
+                # Skip if there's no valid data for this channel
+                if len(valid_data) == 0:
+                    continue
+                # Filter out non-finite values like NaN or Inf before computing
+                valid_data = valid_data[torch.isfinite(valid_data)]
+                if len(valid_data) == 0:
+                    continue
+                # Compute the mean and store it
+                means[b, 0, c] = torch.mean(valid_data)
+        return {"mean": means}
+    def scale(
+            self, data: torch.Tensor, statistics: Dict[str, torch.Tensor]
+    ) -> torch.Tensor:
+        """
+        Apply mean centering: data - mean.
+        """
+        mean = statistics["mean"]
+        return data - mean
+    def inverse_scale(
+            self, scaled_data: torch.Tensor, statistics: Dict[str, torch.Tensor]
+    ) -> torch.Tensor:
+        """
+        Apply inverse mean centering: scaled_data + mean.
+        Handles both 3D (e.g., training input) and 4D (e.g., model output samples) tensors.
+        """
+        mean = statistics["mean"]
+        # Adjust shape for 4D tensors (batch, seq_len, channels, samples)
+        if scaled_data.ndim == 4:
+            mean = mean.unsqueeze(-1)
+        return scaled_data + mean
+class MedianScaler(BaseScaler):
+    """
+    A scaler that centers the data by subtracting the channel-wise median.
+    This scaler only performs centering and does not affect the scale of the data.
+    It is more robust to outliers than the MeanScaler.
+    """
+    def compute_statistics(
+            self, history_values: torch.Tensor, history_mask: Optional[torch.Tensor] = None
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Compute the median for each channel from historical data.
+        """
+        batch_size, seq_len, num_channels = history_values.shape
+        device = history_values.device
+        # Initialize a tensor to store the median for each channel in each batch item
+        medians = torch.zeros(batch_size, 1, num_channels, device=device)
+        for b in range(batch_size):
+            for c in range(num_channels):
+                channel_data = history_values[b, :, c]
+                # Use the mask to select only valid (observed) data points
+                if history_mask is not None:
+                    mask = history_mask[b, :].bool()
+                    valid_data = channel_data[mask]
+                else:
+                    valid_data = channel_data
+                # Skip if there's no valid data for this channel
+                if len(valid_data) == 0:
+                    continue
+                # Filter out non-finite values like NaN or Inf before computing
+                valid_data = valid_data[torch.isfinite(valid_data)]
+                if len(valid_data) == 0:
+                    continue
+                # Compute the median and store it
+                medians[b, 0, c] = torch.median(valid_data)
+        return {"median": medians}
+    def scale(
+            self, data: torch.Tensor, statistics: Dict[str, torch.Tensor]
+    ) -> torch.Tensor:
+        """
+        Apply median centering: data - median.
+        """
+        median = statistics["median"]
+        return data - median
+    def inverse_scale(
+            self, scaled_data: torch.Tensor, statistics: Dict[str, torch.Tensor]
+    ) -> torch.Tensor:
+        """
+        Apply inverse median centering: scaled_data + median.
+        Handles both 3D (e.g., training input) and 4D (e.g., model output samples) tensors.
+        """
+        median = statistics["median"]
+        # Adjust shape for 4D tensors (batch, seq_len, channels, samples)
+        if scaled_data.ndim == 4:
+            median = median.unsqueeze(-1)
+        return scaled_data + median

src/data/time_features.py ADDED Viewed

	@@ -0,0 +1,564 @@

+import logging
+from typing import Any, Dict, List, Optional
+import numpy as np
+import pandas as pd
+import scipy.fft as fft
+import torch
+from gluonts.time_feature import time_features_from_frequency_str
+from gluonts.time_feature._base import (
+    day_of_month,
+    day_of_month_index,
+    day_of_week,
+    day_of_week_index,
+    day_of_year,
+    hour_of_day,
+    hour_of_day_index,
+    minute_of_hour,
+    minute_of_hour_index,
+    month_of_year,
+    month_of_year_index,
+    second_of_minute,
+    second_of_minute_index,
+    week_of_year,
+    week_of_year_index,
+)
+from gluonts.time_feature.holiday import (
+    BLACK_FRIDAY,
+    CHRISTMAS_DAY,
+    CHRISTMAS_EVE,
+    CYBER_MONDAY,
+    EASTER_MONDAY,
+    EASTER_SUNDAY,
+    GOOD_FRIDAY,
+    INDEPENDENCE_DAY,
+    LABOR_DAY,
+    MEMORIAL_DAY,
+    NEW_YEARS_DAY,
+    NEW_YEARS_EVE,
+    THANKSGIVING,
+    SpecialDateFeatureSet,
+    exponential_kernel,
+    squared_exponential_kernel,
+)
+from gluonts.time_feature.seasonality import get_seasonality
+from scipy.signal import find_peaks
+from src.data.constants import BASE_END_DATE, BASE_START_DATE
+from src.data.frequency import (
+    Frequency,
+    validate_frequency_safety,
+)
+from src.utils.utils import device
+# Configure logging
+logging.basicConfig(
+    level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+# Enhanced feature sets for different frequencies
+ENHANCED_TIME_FEATURES = {
+    # High-frequency features (seconds, minutes)
+    "high_freq": {
+        "normalized": [
+            second_of_minute,
+            minute_of_hour,
+            hour_of_day,
+            day_of_week,
+            day_of_month,
+        ],
+        "index": [
+            second_of_minute_index,
+            minute_of_hour_index,
+            hour_of_day_index,
+            day_of_week_index,
+        ],
+    },
+    # Medium-frequency features (hourly, daily)
+    "medium_freq": {
+        "normalized": [
+            hour_of_day,
+            day_of_week,
+            day_of_month,
+            day_of_year,
+            month_of_year,
+        ],
+        "index": [
+            hour_of_day_index,
+            day_of_week_index,
+            day_of_month_index,
+            week_of_year_index,
+        ],
+    },
+    # Low-frequency features (weekly, monthly)
+    "low_freq": {
+        "normalized": [day_of_week, day_of_month, month_of_year, week_of_year],
+        "index": [day_of_week_index, month_of_year_index, week_of_year_index],
+    },
+}
+# Holiday features for different markets/regions
+HOLIDAY_FEATURE_SETS = {
+    "us_business": [
+        NEW_YEARS_DAY,
+        MEMORIAL_DAY,
+        INDEPENDENCE_DAY,
+        LABOR_DAY,
+        THANKSGIVING,
+        CHRISTMAS_EVE,
+        CHRISTMAS_DAY,
+        NEW_YEARS_EVE,
+    ],
+    "us_retail": [
+        NEW_YEARS_DAY,
+        EASTER_SUNDAY,
+        MEMORIAL_DAY,
+        INDEPENDENCE_DAY,
+        LABOR_DAY,
+        THANKSGIVING,
+        BLACK_FRIDAY,
+        CYBER_MONDAY,
+        CHRISTMAS_EVE,
+        CHRISTMAS_DAY,
+        NEW_YEARS_EVE,
+    ],
+    "christian": [
+        NEW_YEARS_DAY,
+        GOOD_FRIDAY,
+        EASTER_SUNDAY,
+        EASTER_MONDAY,
+        CHRISTMAS_EVE,
+        CHRISTMAS_DAY,
+        NEW_YEARS_EVE,
+    ],
+}
+class TimeFeatureGenerator:
+    """
+    Enhanced time feature generator that leverages full GluonTS capabilities.
+    """
+    def __init__(
+        self,
+        use_enhanced_features: bool = True,
+        use_holiday_features: bool = True,
+        holiday_set: str = "us_business",
+        holiday_kernel: str = "exponential",
+        holiday_kernel_alpha: float = 1.0,
+        use_index_features: bool = True,
+        k_max: int = 15,
+        include_seasonality_info: bool = True,
+        use_auto_seasonality: bool = False,  # New parameter
+        max_seasonal_periods: int = 3,  # New parameter
+    ):
+        """
+        Initialize enhanced time feature generator.
+        Parameters
+        ----------
+        use_enhanced_features : bool
+            Whether to use frequency-specific enhanced features
+        use_holiday_features : bool
+            Whether to include holiday features
+        holiday_set : str
+            Which holiday set to use ('us_business', 'us_retail', 'christian')
+        holiday_kernel : str
+            Holiday kernel type ('indicator', 'exponential', 'squared_exponential')
+        holiday_kernel_alpha : float
+            Kernel parameter for exponential kernels
+        use_index_features : bool
+            Whether to include index-based features alongside normalized ones
+        k_max : int
+            Maximum number of time features to pad to
+        include_seasonality_info : bool
+            Whether to include seasonality information as features
+        use_auto_seasonality : bool
+            Whether to use automatic FFT-based seasonality detection
+        max_seasonal_periods : int
+            Maximum number of seasonal periods to detect automatically
+        """
+        self.use_enhanced_features = use_enhanced_features
+        self.use_holiday_features = use_holiday_features
+        self.holiday_set = holiday_set
+        self.use_index_features = use_index_features
+        self.k_max = k_max
+        self.include_seasonality_info = include_seasonality_info
+        self.use_auto_seasonality = use_auto_seasonality
+        self.max_seasonal_periods = max_seasonal_periods
+        # Initialize holiday feature set
+        self.holiday_feature_set = None
+        if use_holiday_features and holiday_set in HOLIDAY_FEATURE_SETS:
+            kernel_func = self._get_holiday_kernel(holiday_kernel, holiday_kernel_alpha)
+            self.holiday_feature_set = SpecialDateFeatureSet(
+                HOLIDAY_FEATURE_SETS[holiday_set], kernel_func
+            )
+    def _get_holiday_kernel(self, kernel_type: str, alpha: float):
+        """Get holiday kernel function."""
+        if kernel_type == "exponential":
+            return exponential_kernel(alpha)
+        elif kernel_type == "squared_exponential":
+            return squared_exponential_kernel(alpha)
+        else:
+            # Default indicator kernel
+            return lambda x: float(x == 0)
+    def _get_feature_category(self, freq_str: str) -> str:
+        """Determine feature category based on frequency."""
+        if freq_str in ["s", "1min", "5min", "10min", "15min"]:
+            return "high_freq"
+        elif freq_str in ["h", "D"]:
+            return "medium_freq"
+        else:
+            return "low_freq"
+    def _compute_enhanced_features(
+        self, period_index: pd.PeriodIndex, freq_str: str
+    ) -> np.ndarray:
+        """Compute enhanced time features based on frequency."""
+        if not self.use_enhanced_features:
+            return np.array([]).reshape(len(period_index), 0)
+        category = self._get_feature_category(freq_str)
+        feature_config = ENHANCED_TIME_FEATURES[category]
+        features = []
+        # Add normalized features
+        for feat_func in feature_config["normalized"]:
+            try:
+                feat_values = feat_func(period_index)
+                features.append(feat_values)
+            except Exception:
+                continue
+        # Add index features if enabled
+        if self.use_index_features:
+            for feat_func in feature_config["index"]:
+                try:
+                    feat_values = feat_func(period_index)
+                    # Normalize index features to [0, 1] range
+                    if feat_values.max() > 0:
+                        feat_values = feat_values / feat_values.max()
+                    features.append(feat_values)
+                except Exception:
+                    continue
+        if features:
+            return np.stack(features, axis=-1)
+        else:
+            return np.array([]).reshape(len(period_index), 0)
+    def _compute_holiday_features(self, date_range: pd.DatetimeIndex) -> np.ndarray:
+        """Compute holiday features."""
+        if not self.use_holiday_features or self.holiday_feature_set is None:
+            return np.array([]).reshape(len(date_range), 0)
+        try:
+            holiday_features = self.holiday_feature_set(date_range)
+            return holiday_features.T  # Transpose to get [time, features] shape
+        except Exception:
+            return np.array([]).reshape(len(date_range), 0)
+    def _detect_auto_seasonality(self, time_series_values: np.ndarray) -> list:
+        """
+        Detect seasonal periods automatically using FFT analysis.
+        Parameters
+        ----------
+        time_series_values : np.ndarray
+            Time series values for seasonality detection
+        Returns
+        -------
+        list
+            List of detected seasonal periods
+        """
+        if not self.use_auto_seasonality or len(time_series_values) < 10:
+            return []
+        try:
+            # Remove NaN values
+            values = time_series_values[~np.isnan(time_series_values)]
+            if len(values) < 10:
+                return []
+            # Simple linear detrending
+            x = np.arange(len(values))
+            coeffs = np.polyfit(x, values, 1)
+            trend = np.polyval(coeffs, x)
+            detrended = values - trend
+            # Apply Hann window to reduce spectral leakage
+            window = np.hanning(len(detrended))
+            windowed = detrended * window
+            # Zero padding for better frequency resolution
+            padded_length = len(windowed) * 2
+            padded_values = np.zeros(padded_length)
+            padded_values[: len(windowed)] = windowed
+            # Compute FFT
+            fft_values = fft.rfft(padded_values)
+            fft_magnitudes = np.abs(fft_values)
+            freqs = np.fft.rfftfreq(padded_length)
+            # Exclude DC component
+            fft_magnitudes[0] = 0.0
+            # Find peaks with threshold (5% of max magnitude)
+            threshold = 0.05 * np.max(fft_magnitudes)
+            peak_indices, _ = find_peaks(fft_magnitudes, height=threshold)
+            if len(peak_indices) == 0:
+                return []
+            # Sort by magnitude and take top periods
+            sorted_indices = peak_indices[
+                np.argsort(fft_magnitudes[peak_indices])[::-1]
+            ]
+            top_indices = sorted_indices[: self.max_seasonal_periods]
+            # Convert frequencies to periods
+            periods = []
+            for idx in top_indices:
+                if freqs[idx] > 0:
+                    period = 1.0 / freqs[idx]
+                    # Scale back to original length and round
+                    period = round(period / 2)  # Account for zero padding
+                    if 2 <= period <= len(values) // 2:  # Reasonable period range
+                        periods.append(period)
+            return list(set(periods))  # Remove duplicates
+        except Exception:
+            return []
+    def _compute_seasonality_features(
+        self,
+        period_index: pd.PeriodIndex,
+        freq_str: str,
+        time_series_values: np.ndarray = None,
+    ) -> np.ndarray:
+        """Compute seasonality-aware features."""
+        if not self.include_seasonality_info:
+            return np.array([]).reshape(len(period_index), 0)
+        all_seasonal_features = []
+        # Original frequency-based seasonality
+        try:
+            seasonality = get_seasonality(freq_str)
+            if seasonality > 1:
+                positions = np.arange(len(period_index))
+                sin_feat = np.sin(2 * np.pi * positions / seasonality)
+                cos_feat = np.cos(2 * np.pi * positions / seasonality)
+                all_seasonal_features.extend([sin_feat, cos_feat])
+        except Exception:
+            pass
+        # Automatic seasonality detection
+        if self.use_auto_seasonality and time_series_values is not None:
+            auto_periods = self._detect_auto_seasonality(time_series_values)
+            for period in auto_periods:
+                try:
+                    positions = np.arange(len(period_index))
+                    sin_feat = np.sin(2 * np.pi * positions / period)
+                    cos_feat = np.cos(2 * np.pi * positions / period)
+                    all_seasonal_features.extend([sin_feat, cos_feat])
+                except Exception:
+                    continue
+        if all_seasonal_features:
+            return np.stack(all_seasonal_features, axis=-1)
+        else:
+            return np.array([]).reshape(len(period_index), 0)
+    def compute_features(
+        self,
+        period_index: pd.PeriodIndex,
+        date_range: pd.DatetimeIndex,
+        freq_str: str,
+        time_series_values: np.ndarray = None,
+    ) -> np.ndarray:
+        """
+        Compute all time features for given period index.
+        Parameters
+        ----------
+        period_index : pd.PeriodIndex
+            Period index for computing features
+        date_range : pd.DatetimeIndex
+            Corresponding datetime index for holiday features
+        freq_str : str
+            Frequency string
+        time_series_values : np.ndarray, optional
+            Time series values for automatic seasonality detection
+        Returns
+        -------
+        np.ndarray
+            Time features array of shape [time_steps, num_features]
+        """
+        all_features = []
+        # Standard GluonTS features
+        try:
+            standard_features = time_features_from_frequency_str(freq_str)
+            if standard_features:
+                std_feat = np.stack(
+                    [feat(period_index) for feat in standard_features], axis=-1
+                )
+                all_features.append(std_feat)
+        except Exception:
+            pass
+        # Enhanced features
+        enhanced_feat = self._compute_enhanced_features(period_index, freq_str)
+        if enhanced_feat.shape[1] > 0:
+            all_features.append(enhanced_feat)
+        # Holiday features
+        holiday_feat = self._compute_holiday_features(date_range)
+        if holiday_feat.shape[1] > 0:
+            all_features.append(holiday_feat)
+        # Seasonality features (including auto-detected)
+        seasonality_feat = self._compute_seasonality_features(
+            period_index, freq_str, time_series_values
+        )
+        if seasonality_feat.shape[1] > 0:
+            all_features.append(seasonality_feat)
+        if all_features:
+            combined_features = np.concatenate(all_features, axis=-1)
+        else:
+            combined_features = np.zeros((len(period_index), 1))
+        return combined_features
+def compute_batch_time_features(
+    start: List[np.datetime64],
+    history_length: int,
+    future_length: int,
+    batch_size: int,
+    frequency: List[Frequency],
+    K_max: int = 6,
+    time_feature_config: Optional[Dict[str, Any]] = None,
+):
+    """
+    Compute time features from start timestamps and frequency.
+    Parameters
+    ----------
+    start : array-like, shape (batch_size,)
+        Start timestamps for each batch item.
+    history_length : int
+        Length of history sequence.
+    future_length : int
+        Length of target sequence.
+    batch_size : int
+        Batch size.
+    frequency : array-like, shape (batch_size,)
+        Frequency of the time series.
+    K_max : int, optional
+        Maximum number of time features to pad to (default: 6).
+    time_feature_config : dict, optional
+        Configuration for enhanced time features.
+    Returns
+    -------
+    tuple
+        (history_time_features, target_time_features) where each is a torch.Tensor
+        of shape (batch_size, length, K_max).
+    """
+    # Initialize enhanced feature generator
+    feature_config = time_feature_config or {}
+    feature_generator = TimeFeatureGenerator(**feature_config)
+    # Generate timestamps and features
+    history_features_list = []
+    future_features_list = []
+    total_length = history_length + future_length
+    for i in range(batch_size):
+        frequency_i = frequency[i]
+        freq_str = frequency_i.to_pandas_freq(for_date_range=True)
+        period_freq_str = frequency_i.to_pandas_freq(for_date_range=False)
+        # Validate start timestamp is within safe bounds
+        start_ts = pd.Timestamp(start[i])
+        if not validate_frequency_safety(start_ts, total_length, frequency_i):
+            logger.debug(
+                f"Start date {start_ts} not safe for total_length={total_length}, frequency={frequency_i}. "
+                f"Using BASE_START_DATE instead."
+            )
+            start_ts = BASE_START_DATE
+        # Create history range with bounds checking
+        history_range = pd.date_range(
+            start=start_ts, periods=history_length, freq=freq_str
+        )
+        # Check if history range goes beyond safe bounds
+        if history_range[-1] > BASE_END_DATE:
+            safe_start = BASE_END_DATE - pd.tseries.frequencies.to_offset(freq_str) * (
+                history_length + future_length
+            )
+            if safe_start < BASE_START_DATE:
+                safe_start = BASE_START_DATE
+            history_range = pd.date_range(
+                start=safe_start, periods=history_length, freq=freq_str
+            )
+        future_start = history_range[-1] + pd.tseries.frequencies.to_offset(freq_str)
+        future_range = pd.date_range(
+            start=future_start, periods=future_length, freq=freq_str
+        )
+        # Convert to period indices
+        history_period_idx = history_range.to_period(period_freq_str)
+        future_period_idx = future_range.to_period(period_freq_str)
+        # Compute enhanced features
+        history_features = feature_generator.compute_features(
+            history_period_idx, history_range, freq_str
+        )
+        future_features = feature_generator.compute_features(
+            future_period_idx, future_range, freq_str
+        )
+        # Pad or truncate to K_max
+        history_features = _pad_or_truncate_features(history_features, K_max)
+        future_features = _pad_or_truncate_features(future_features, K_max)
+        history_features_list.append(history_features)
+        future_features_list.append(future_features)
+    # Stack into batch tensors
+    history_time_features = np.stack(history_features_list, axis=0)
+    future_time_features = np.stack(future_features_list, axis=0)
+    return (
+        torch.from_numpy(history_time_features).float().to(device),
+        torch.from_numpy(future_time_features).float().to(device),
+    )
+def _pad_or_truncate_features(features: np.ndarray, K_max: int) -> np.ndarray:
+    """Pad with zeros or truncate features to K_max dimensions."""
+    seq_len, num_features = features.shape
+    if num_features < K_max:
+        # Pad with zeros
+        padding = np.zeros((seq_len, K_max - num_features))
+        features = np.concatenate([features, padding], axis=-1)
+    elif num_features > K_max:
+        # Truncate to K_max (keep most important features first)
+        features = features[:, :K_max]
+    return features

src/data/utils.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import random
+from typing import Optional, Tuple, Union
+def sample_future_length(
+    range: Union[Tuple[int, int], str] = "gift_eval",
+    total_length: Optional[int] = None,
+) -> int:
+    """
+    Sample a forecast length.
+    - If `range` is a tuple, uniformly sample in [min, max]. When `total_length` is
+      provided, enforce a cap so the result is at most floor(0.45 * total_length).
+    - If `range` is "gift_eval", sample from a pre-defined weighted set. When
+      `total_length` is provided, filter out candidates greater than
+      floor(0.45 * total_length) before sampling.
+    """
+    # Compute the cap when total_length is provided
+    cap: Optional[int] = None
+    if total_length is not None:
+        cap = max(1, int(0.45 * int(total_length)))
+    if isinstance(range, tuple):
+        min_len, max_len = range
+        if cap is not None:
+            effective_max_len = min(max_len, cap)
+            # Ensure valid bounds
+            if min_len > effective_max_len:
+                return effective_max_len
+            return random.randint(min_len, effective_max_len)
+        return random.randint(min_len, max_len)
+    elif range == "gift_eval":
+        # Gift eval forecast lengths with their frequencies
+        GIFT_EVAL_FORECAST_LENGTHS = {
+            48: 5,
+            720: 38,
+            480: 38,
+            30: 3,
+            300: 16,
+            8: 2,
+            120: 3,
+            450: 8,
+            80: 8,
+            12: 2,
+            900: 10,
+            180: 3,
+            600: 10,
+            60: 3,
+            210: 3,
+            195: 3,
+            140: 3,
+            130: 3,
+            14: 1,
+            18: 1,
+            13: 1,
+            6: 1,
+        }
+        lengths = list(GIFT_EVAL_FORECAST_LENGTHS.keys())
+        weights = list(GIFT_EVAL_FORECAST_LENGTHS.values())
+        if cap is not None:
+            filtered = [
+                (length_candidate, weight)
+                for length_candidate, weight in zip(lengths, weights)
+                if length_candidate <= cap
+            ]
+            if filtered:
+                lengths, weights = zip(*filtered)
+                lengths = list(lengths)
+                weights = list(weights)
+        return random.choices(lengths, weights=weights)[0]
+    else:
+        raise ValueError(f"Invalid range: {range}")

src/gift_eval/__init__.py ADDED Viewed

File without changes

src/gift_eval/aggregate_results.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import argparse
+import glob
+import logging
+from pathlib import Path
+from typing import List
+import pandas as pd
+from src.gift_eval.constants import (
+    ALL_DATASETS,
+    DATASET_PROPERTIES,
+    MED_LONG_DATASETS,
+    PRETTY_NAMES,
+)
+logger = logging.getLogger(__name__)
+def get_all_datasets_full_name() -> List[str]:
+    """Get all possible dataset full names for validation."""
+    terms = ["short", "medium", "long"]
+    datasets_full_names: List[str] = []
+    for name in ALL_DATASETS:
+        for term in terms:
+            if term in ["medium", "long"] and name not in MED_LONG_DATASETS.split():
+                continue
+            if "/" in name:
+                ds_key, ds_freq = name.split("/")
+                ds_key = ds_key.lower()
+                ds_key = PRETTY_NAMES.get(ds_key, ds_key)
+            else:
+                ds_key = name.lower()
+                ds_key = PRETTY_NAMES.get(ds_key, ds_key)
+                ds_freq = DATASET_PROPERTIES[ds_key]["frequency"]
+            datasets_full_names.append(f"{ds_key}/{ds_freq}/{term}")
+    return datasets_full_names
+def aggregate_results(
+    result_root_dir: str | Path,
+) -> pd.DataFrame | None:
+    """Aggregate results from multiple CSV files.
+    Returns the combined dataframe. Optionally saves to
+    <result_root_dir>/all_results.csv
+    """
+    result_root_dir = Path(result_root_dir)
+    logger.info(f"Aggregating results in: {result_root_dir}")
+    # Find all CSV result files under the provided root directory
+    # Results are written per-dataset as <result_root_dir>/<dataset_name>/results.csv
+    result_files = glob.glob(f"{result_root_dir}/**/results.csv", recursive=True)
+    if not result_files:
+        logger.error("No result files found!")
+        return None
+    # Initialize empty list to store dataframes
+    dataframes: List[pd.DataFrame] = []
+    # Read and combine all CSV files
+    for file in result_files:
+        try:
+            df = pd.read_csv(file)
+            if len(df) > 0:
+                dataframes.append(df)
+            else:
+                logger.warning(f"Empty file: {file}")
+        except pd.errors.EmptyDataError:
+            logger.warning(f"Skipping empty file: {file}")
+        except Exception as e:
+            logger.error(f"Error reading {file}: {str(e)}")
+    if dataframes:
+        # Combine all dataframes and sort by dataset
+        combined_df = pd.concat(dataframes, ignore_index=True).sort_values("dataset")
+        # Check for duplicates
+        if len(combined_df) != len(set(combined_df.dataset)):
+            duplicate_datasets = combined_df.dataset[
+                combined_df.dataset.duplicated()
+            ].tolist()
+            logger.warning(f"Warning: Duplicate datasets found: {duplicate_datasets}")
+            # Remove duplicates, keeping the first occurrence
+            combined_df = combined_df.drop_duplicates(subset=["dataset"], keep="first")
+            logger.info(
+                f"Removed duplicates, {len(combined_df)} unique datasets remaining"
+            )
+        logger.info(f"Combined results: {len(combined_df)} datasets")
+    else:
+        logger.warning("No valid CSV files found to combine")
+        return None
+    # Get all expected datasets and compare with completed ones
+    all_datasets_full_name = get_all_datasets_full_name()
+    completed_experiments = combined_df.dataset.tolist()
+    completed_experiments_clean = [
+        exp for exp in completed_experiments if exp in all_datasets_full_name
+    ]
+    missing_or_failed_experiments = [
+        exp for exp in all_datasets_full_name if exp not in completed_experiments_clean
+    ]
+    logger.info("=== EXPERIMENT SUMMARY ===")
+    logger.info(f"Total expected datasets: {len(all_datasets_full_name)}")
+    logger.info(f"Completed experiments: {len(completed_experiments_clean)}")
+    logger.info(f"Missing/failed experiments: {len(missing_or_failed_experiments)}")
+    logger.info("Completed experiments:")
+    for i, exp in enumerate(completed_experiments_clean):
+        logger.info(f"  {i + 1:3d}: {exp}")
+    if missing_or_failed_experiments:
+        logger.info("Missing or failed experiments:")
+        for i, exp in enumerate(missing_or_failed_experiments):
+            logger.info(f"  {i + 1:3d}: {exp}")
+    # Calculate completion percentage
+    completion_rate = (
+        len(completed_experiments_clean) / len(all_datasets_full_name) * 100
+    )
+    logger.info(f"Completion rate: {completion_rate:.1f}%")
+    # Save combined results
+    output_file = result_root_dir / "all_results.csv"
+    combined_df.to_csv(output_file, index=False)
+    logger.info(f"Combined results saved to: {output_file}")
+    return combined_df
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Aggregate GIFT-Eval results from multiple CSV files"
+    )
+    parser.add_argument(
+        "--result_root_dir",
+        type=str,
+        required=True,
+        help="Root directory containing result subdirectories",
+    )
+    args = parser.parse_args()
+    args.result_root_dir = Path(args.result_root_dir)
+    logging.basicConfig(
+        level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+    )
+    logger.info(f"Searching in directory: {args.result_root_dir}")
+    aggregate_results(
+        result_root_dir=args.result_root_dir,
+    )

src/gift_eval/constants.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import json
+import logging
+import os
+from gluonts.ev.metrics import (
+    MAE,
+    MAPE,
+    MASE,
+    MSE,
+    MSIS,
+    ND,
+    NRMSE,
+    RMSE,
+    SMAPE,
+    MeanWeightedSumQuantileLoss,
+)
+logger = logging.getLogger(__name__)
+# Environment setup
+os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
+DATASET_PROPERTIES_PATH = "src/gift_eval/dataset_properties.json"
+try:
+    with open(DATASET_PROPERTIES_PATH, "r") as f:
+        DATASET_PROPERTIES = json.load(f)
+except Exception as e:
+    DATASET_PROPERTIES = {}
+    logger.warning(
+        f"Could not load dataset properties from {DATASET_PROPERTIES_PATH}: {e}. Domain and num_variates will fall back to defaults."
+    )
+# Datasets
+SHORT_DATASETS = "m4_yearly m4_quarterly m4_monthly m4_weekly m4_daily m4_hourly electricity/15T electricity/H electricity/D electricity/W solar/10T solar/H solar/D solar/W hospital covid_deaths us_births/D us_births/M us_births/W saugeenday/D saugeenday/M saugeenday/W temperature_rain_with_missing kdd_cup_2018_with_missing/H kdd_cup_2018_with_missing/D car_parts_with_missing restaurant hierarchical_sales/D hierarchical_sales/W LOOP_SEATTLE/5T LOOP_SEATTLE/H LOOP_SEATTLE/D SZ_TAXI/15T SZ_TAXI/H M_DENSE/H M_DENSE/D ett1/15T ett1/H ett1/D ett1/W ett2/15T ett2/H ett2/D ett2/W jena_weather/10T jena_weather/H jena_weather/D bitbrains_fast_storage/5T bitbrains_fast_storage/H bitbrains_rnd/5T bitbrains_rnd/H bizitobs_application bizitobs_service bizitobs_l2c/5T bizitobs_l2c/H"
+MED_LONG_DATASETS = "electricity/15T electricity/H solar/10T solar/H kdd_cup_2018_with_missing/H LOOP_SEATTLE/5T LOOP_SEATTLE/H SZ_TAXI/15T M_DENSE/H ett1/15T ett1/H ett2/15T ett2/H jena_weather/10T jena_weather/H bitbrains_fast_storage/5T bitbrains_rnd/5T bizitobs_application bizitobs_service bizitobs_l2c/5T bizitobs_l2c/H"
+ALL_DATASETS = list(set(SHORT_DATASETS.split() + MED_LONG_DATASETS.split()))
+# Evaluation terms
+TERMS = ["short", "medium", "long"]
+# Pretty names mapping (following GIFT eval standard)
+PRETTY_NAMES = {
+    "saugeenday": "saugeen",
+    "temperature_rain_with_missing": "temperature_rain",
+    "kdd_cup_2018_with_missing": "kdd_cup_2018",
+    "car_parts_with_missing": "car_parts",
+}
+METRICS = [
+    MSE(forecast_type="mean"),
+    MSE(forecast_type=0.5),
+    MAE(),
+    MASE(),
+    MAPE(),
+    SMAPE(),
+    MSIS(),
+    RMSE(),
+    NRMSE(),
+    ND(),
+    MeanWeightedSumQuantileLoss(
+        quantile_levels=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+    ),
+]
+STANDARD_METRIC_NAMES = [
+    "MSE[mean]",
+    "MSE[0.5]",
+    "MAE[0.5]",
+    "MASE[0.5]",
+    "MAPE[0.5]",
+    "sMAPE[0.5]",
+    "MSIS",
+    "RMSE[mean]",
+    "NRMSE[mean]",
+    "ND[0.5]",
+    "mean_weighted_sum_quantile_loss",
+]

src/gift_eval/data.py ADDED Viewed

	@@ -0,0 +1,234 @@

+# Copyright (c) 2023, Salesforce, Inc.
+# SPDX-License-Identifier: Apache-2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+from collections.abc import Iterable, Iterator
+from enum import Enum
+from functools import cached_property
+from pathlib import Path
+from typing import Optional
+import datasets
+import pyarrow.compute as pc
+from gluonts.dataset import DataEntry
+from gluonts.dataset.common import ProcessDataEntry
+from gluonts.dataset.split import TestData, TrainingDataset, split
+from gluonts.itertools import Map
+from gluonts.time_feature import norm_freq_str
+from gluonts.transform import Transformation
+from pandas.tseries.frequencies import to_offset
+from toolz import compose
+TEST_SPLIT = 0.1
+MAX_WINDOW = 20
+M4_PRED_LENGTH_MAP = {
+    "A": 6,
+    "Q": 8,
+    "M": 18,
+    "W": 13,
+    "D": 14,
+    "H": 48,
+    "h": 48,
+    "Y": 6,
+}
+PRED_LENGTH_MAP = {
+    "M": 12,
+    "W": 8,
+    "D": 30,
+    "H": 48,
+    "h": 48,
+    "T": 48,
+    "S": 60,
+    "s": 60,
+    "min": 48,
+}
+TFB_PRED_LENGTH_MAP = {
+    "A": 6,
+    "Y": 6,
+    "H": 48,
+    "h": 48,
+    "Q": 8,
+    "D": 14,
+    "M": 18,
+    "W": 13,
+    "U": 8,
+    "T": 8,
+    "min": 8,
+    "us": 8,
+}
+class Term(Enum):
+    SHORT = "short"
+    MEDIUM = "medium"
+    LONG = "long"
+    @property
+    def multiplier(self) -> int:
+        if self == Term.SHORT:
+            return 1
+        elif self == Term.MEDIUM:
+            return 10
+        elif self == Term.LONG:
+            return 15
+def itemize_start(data_entry: DataEntry) -> DataEntry:
+    data_entry["start"] = data_entry["start"].item()
+    return data_entry
+class MultivariateToUnivariate(Transformation):
+    def __init__(self, field):
+        self.field = field
+    def __call__(
+        self, data_it: Iterable[DataEntry], is_train: bool = False
+    ) -> Iterator:
+        for data_entry in data_it:
+            item_id = data_entry["item_id"]
+            val_ls = list(data_entry[self.field])
+            for id, val in enumerate(val_ls):
+                univariate_entry = data_entry.copy()
+                univariate_entry[self.field] = val
+                univariate_entry["item_id"] = item_id + "_dim" + str(id)
+                yield univariate_entry
+class Dataset:
+    def __init__(
+        self,
+        name: str,
+        term: Term | str = Term.SHORT,
+        to_univariate: bool = False,
+        storage_path: str = None,
+        max_windows: Optional[int] = None,
+    ):
+        storage_path = Path(storage_path)
+        self.hf_dataset = datasets.load_from_disk(str(storage_path / name)).with_format(
+            "numpy"
+        )
+        process = ProcessDataEntry(
+            self.freq,
+            one_dim_target=self.target_dim == 1,
+        )
+        self.gluonts_dataset = Map(compose(process, itemize_start), self.hf_dataset)
+        if to_univariate:
+            self.gluonts_dataset = MultivariateToUnivariate("target").apply(
+                self.gluonts_dataset
+            )
+        self.term = Term(term)
+        self.name = name
+        self.max_windows = max_windows if max_windows is not None else MAX_WINDOW
+    @cached_property
+    def prediction_length(self) -> int:
+        freq = norm_freq_str(to_offset(self.freq).name)
+        if freq.endswith("E"):
+            freq = freq[:-1]
+        pred_len = (
+            M4_PRED_LENGTH_MAP[freq] if "m4" in self.name else PRED_LENGTH_MAP[freq]
+        )
+        return self.term.multiplier * pred_len
+    @cached_property
+    def freq(self) -> str:
+        return self.hf_dataset[0]["freq"]
+    @cached_property
+    def target_dim(self) -> int:
+        return (
+            target.shape[0]
+            if len((target := self.hf_dataset[0]["target"]).shape) > 1
+            else 1
+        )
+    @cached_property
+    def past_feat_dynamic_real_dim(self) -> int:
+        if "past_feat_dynamic_real" not in self.hf_dataset[0]:
+            return 0
+        elif (
+            len(
+                (
+                    past_feat_dynamic_real := self.hf_dataset[0][
+                        "past_feat_dynamic_real"
+                    ]
+                ).shape
+            )
+            > 1
+        ):
+            return past_feat_dynamic_real.shape[0]
+        else:
+            return 1
+    @cached_property
+    def windows(self) -> int:
+        if "m4" in self.name:
+            return 1
+        w = math.ceil(TEST_SPLIT * self._min_series_length / self.prediction_length)
+        return min(max(1, w), self.max_windows)
+    @cached_property
+    def _min_series_length(self) -> int:
+        if self.hf_dataset[0]["target"].ndim > 1:
+            lengths = pc.list_value_length(
+                pc.list_flatten(
+                    pc.list_slice(self.hf_dataset.data.column("target"), 0, 1)
+                )
+            )
+        else:
+            lengths = pc.list_value_length(self.hf_dataset.data.column("target"))
+        return min(lengths.to_numpy())
+    @cached_property
+    def sum_series_length(self) -> int:
+        if self.hf_dataset[0]["target"].ndim > 1:
+            lengths = pc.list_value_length(
+                pc.list_flatten(self.hf_dataset.data.column("target"))
+            )
+        else:
+            lengths = pc.list_value_length(self.hf_dataset.data.column("target"))
+        return sum(lengths.to_numpy())
+    @property
+    def training_dataset(self) -> TrainingDataset:
+        training_dataset, _ = split(
+            self.gluonts_dataset, offset=-self.prediction_length * (self.windows + 1)
+        )
+        return training_dataset
+    @property
+    def validation_dataset(self) -> TrainingDataset:
+        validation_dataset, _ = split(
+            self.gluonts_dataset, offset=-self.prediction_length * self.windows
+        )
+        return validation_dataset
+    @property
+    def test_data(self) -> TestData:
+        _, test_template = split(
+            self.gluonts_dataset, offset=-self.prediction_length * self.windows
+        )
+        test_data = test_template.generate_instances(
+            prediction_length=self.prediction_length,
+            windows=self.windows,
+            distance=self.prediction_length,
+        )
+        return test_data

src/gift_eval/dataset_properties.json ADDED Viewed

	@@ -0,0 +1,152 @@

+{
+    "m4_yearly": {
+        "domain": "Econ/Fin",
+        "frequency": "A",
+        "num_variates": 1
+    },
+    "m4_quarterly": {
+        "domain": "Econ/Fin",
+        "frequency": "Q",
+        "num_variates": 1
+    },
+    "m4_monthly": {
+        "domain": "Econ/Fin",
+        "frequency": "M",
+        "num_variates": 1
+    },
+    "m4_weekly": {
+        "domain": "Econ/Fin",
+        "frequency": "W",
+        "num_variates": 1
+    },
+    "m4_daily": {
+        "domain": "Econ/Fin",
+        "frequency": "D",
+        "num_variates": 1
+    },
+    "m4_hourly": {
+        "domain": "Econ/Fin",
+        "frequency": "H",
+        "num_variates": 1
+    },
+    "electricity": {
+        "domain": "Energy",
+        "frequency": "W",
+        "num_variates": 1
+    },
+    "ett1": {
+        "domain": "Energy",
+        "frequency": "W",
+        "num_variates": 7
+    },
+    "ett2": {
+        "domain": "Energy",
+        "frequency": "W",
+        "num_variates": 7
+    },
+    "solar": {
+        "domain": "Energy",
+        "frequency": "W",
+        "num_variates": 1
+    },
+    "hospital": {
+        "domain": "Healthcare",
+        "frequency": "M",
+        "num_variates": 1
+    },
+    "covid_deaths": {
+        "domain": "Healthcare",
+        "frequency": "D",
+        "num_variates": 1
+    },
+    "us_births": {
+        "domain": "Healthcare",
+        "frequency": "M",
+        "num_variates": 1
+    },
+    "saugeen": {
+        "domain": "Nature",
+        "frequency": "M",
+        "num_variates": 1
+    },
+    "temperature_rain": {
+        "domain": "Nature",
+        "frequency": "D",
+        "num_variates": 1
+    },
+    "kdd_cup_2018": {
+        "domain": "Nature",
+        "frequency": "D",
+        "num_variates": 1
+    },
+    "jena_weather": {
+        "domain": "Nature",
+        "frequency": "D",
+        "num_variates": 21
+    },
+    "car_parts": {
+        "domain": "Sales",
+        "frequency": "M",
+        "num_variates": 1
+    },
+    "restaurant": {
+        "domain": "Sales",
+        "frequency": "D",
+        "num_variates": 1
+    },
+    "hierarchical_sales": {
+        "domain": "Sales",
+        "frequency": "W-WED",
+        "num_variates": 1
+    },
+    "loop_seattle": {
+        "domain": "Transport",
+        "frequency": "D",
+        "num_variates": 1
+    },
+    "sz_taxi": {
+        "domain": "Transport",
+        "frequency": "H",
+        "num_variates": 1
+    },
+    "m_dense": {
+        "domain": "Transport",
+        "frequency": "D",
+        "num_variates": 1
+    },
+    "bitbrains_fast_storage": {
+        "domain": "Web/CloudOps",
+        "frequency": "H",
+        "num_variates": 2
+    },
+    "bitbrains_rnd": {
+        "domain": "Web/CloudOps",
+        "frequency": "H",
+        "num_variates": 2
+    },
+    "bizitobs_application": {
+        "domain": "Web/CloudOps",
+        "frequency": "10S",
+        "num_variates": 2
+    },
+    "bizitobs_service": {
+        "domain": "Web/CloudOps",
+        "frequency": "10S",
+        "num_variates": 2
+    },
+    "bizitobs_l2c": {
+        "domain": "Web/CloudOps",
+        "frequency": "H",
+        "num_variates": 7
+    },
+    "dd_benchmark_short": {
+        "domain": "Web/Observability",
+        "frequency": "Short",
+        "num_variates": 32
+    },
+    "dd_benchmark_long": {
+        "domain": "Web/Observability",
+        "frequency": "Long",
+        "num_variates": 32
+    }
+}

src/gift_eval/evaluate.py ADDED Viewed

	@@ -0,0 +1,529 @@

+import argparse
+import csv
+import logging
+import warnings
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+import matplotlib
+import matplotlib.pyplot as plt
+from gluonts.model.evaluation import evaluate_model
+from gluonts.time_feature import get_seasonality
+from linear_operator.utils.cholesky import NumericalWarning
+from src.gift_eval.constants import (
+    ALL_DATASETS,
+    DATASET_PROPERTIES,
+    MED_LONG_DATASETS,
+    METRICS,
+    PRETTY_NAMES,
+    STANDARD_METRIC_NAMES,
+)
+from src.gift_eval.data import Dataset
+from src.gift_eval.model_wrapper import TimeSeriesPredictor
+from src.plotting.gift_eval_utils import create_plots_for_dataset
+logger = logging.getLogger(__name__)
+# Warnings configuration
+warnings.filterwarnings("ignore", category=NumericalWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+warnings.filterwarnings("ignore", category=DeprecationWarning)
+matplotlib.set_loglevel("WARNING")
+logging.getLogger("matplotlib").setLevel(logging.WARNING)
+logging.getLogger("matplotlib.font_manager").setLevel(logging.WARNING)
+logging.getLogger("PIL").setLevel(logging.WARNING)
+class WarningFilter(logging.Filter):
+    def __init__(self, text_to_filter: str) -> None:
+        super().__init__()
+        self.text_to_filter = text_to_filter
+    def filter(self, record: logging.LogRecord) -> bool:
+        return self.text_to_filter not in record.getMessage()
+# Filter out gluonts warnings about mean predictions
+gts_logger = logging.getLogger("gluonts.model.forecast")
+gts_logger.addFilter(
+    WarningFilter("The mean prediction is not stored in the forecast data")
+)
+@dataclass
+class DatasetMetadata:
+    full_name: str
+    key: str
+    freq: str
+    term: str
+    season_length: int
+    target_dim: int
+    to_univariate: bool
+    prediction_length: int
+    windows: int
+@dataclass
+class EvaluationItem:
+    dataset_metadata: DatasetMetadata
+    metrics: Dict
+    figures: List[Tuple[object, str]]
+def construct_evaluation_data(
+    dataset_name: str,
+    dataset_storage_path: str,
+    terms: List[str] = ["short", "medium", "long"],
+    max_windows: Optional[int] = None,
+) -> List[Tuple[Dataset, DatasetMetadata]]:
+    """Build datasets and rich metadata per term for a dataset name."""
+    sub_datasets: List[Tuple[Dataset, DatasetMetadata]] = []
+    if "/" in dataset_name:
+        ds_key, ds_freq = dataset_name.split("/")
+        ds_key = ds_key.lower()
+        ds_key = PRETTY_NAMES.get(ds_key, ds_key)
+    else:
+        ds_key = dataset_name.lower()
+        ds_key = PRETTY_NAMES.get(ds_key, ds_key)
+        ds_freq = DATASET_PROPERTIES[ds_key]["frequency"]
+    for term in terms:
+        # Skip medium/long terms for datasets that don't support them
+        if (
+            term == "medium" or term == "long"
+        ) and dataset_name not in MED_LONG_DATASETS.split():
+            continue
+        # Probe once to determine dimensionality
+        probe_dataset = Dataset(
+            name=dataset_name,
+            term=term,
+            to_univariate=False,
+            storage_path=dataset_storage_path,
+            max_windows=max_windows,
+        )
+        to_univariate = probe_dataset.target_dim > 1
+        dataset = Dataset(
+            name=dataset_name,
+            term=term,
+            to_univariate=to_univariate,
+            storage_path=dataset_storage_path,
+            max_windows=max_windows,
+        )
+        # Compute metadata
+        season_length = get_seasonality(dataset.freq)
+        actual_freq = ds_freq if ds_freq else dataset.freq
+        metadata = DatasetMetadata(
+            full_name=f"{ds_key}/{actual_freq}/{term}",
+            key=ds_key,
+            freq=actual_freq,
+            term=term,
+            season_length=season_length,
+            target_dim=probe_dataset.target_dim,
+            to_univariate=to_univariate,
+            prediction_length=dataset.prediction_length,
+            windows=dataset.windows,
+        )
+        sub_datasets.append((dataset, metadata))
+    return sub_datasets
+def _ensure_results_csv(csv_file_path: Path) -> None:
+    if not csv_file_path.exists():
+        csv_file_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(csv_file_path, "w", newline="") as csvfile:
+            writer = csv.writer(csvfile)
+            header = (
+                ["dataset", "model"]
+                + [f"eval_metrics/{name}" for name in STANDARD_METRIC_NAMES]
+                + [
+                    "domain",
+                    "num_variates",
+                ]
+            )
+            writer.writerow(header)
+def write_results_to_disk(
+    items: List[EvaluationItem],
+    dataset_name: str,
+    output_dir: Path,
+    model_name: str,
+    create_plots: bool,
+) -> None:
+    output_dir = output_dir / dataset_name
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_csv_path = output_dir / "results.csv"
+    _ensure_results_csv(output_csv_path)
+    with open(output_csv_path, "a", newline="") as csvfile:
+        writer = csv.writer(csvfile)
+        for item in items:
+            md = item.dataset_metadata
+            # Extract metric values in the standard order
+            metric_values: List[Optional[float]] = []
+            for metric_name in STANDARD_METRIC_NAMES:
+                value = item.metrics.get(metric_name, None)
+                if value is None:
+                    metric_values.append(None)
+                else:
+                    if (
+                        hasattr(value, "__len__")
+                        and not isinstance(value, (str, bytes))
+                        and len(value) == 1
+                    ):
+                        value = value[0]
+                    elif hasattr(value, "item"):
+                        value = value.item()
+                    metric_values.append(value)
+            # Lookup domain and num_variates from dataset properties
+            ds_key = md.key.lower()
+            props = DATASET_PROPERTIES.get(ds_key, {})
+            domain = props.get("domain", "unknown")
+            num_variates = props.get(
+                "num_variates", 1 if md.to_univariate else md.target_dim
+            )
+            row = [md.full_name, model_name] + metric_values + [domain, num_variates]
+            writer.writerow(row)
+            if create_plots and item.figures:
+                plots_dir = output_dir / "plots" / md.key / md.term
+                plots_dir.mkdir(parents=True, exist_ok=True)
+                for fig, filename in item.figures:
+                    filepath = plots_dir / filename
+                    fig.savefig(filepath, dpi=300, bbox_inches="tight")
+                    plt.close(fig)
+    logger.info(
+        f"Evaluation complete for dataset '{dataset_name}'. Results saved to {output_csv_path}"
+    )
+    if create_plots:
+        logger.info(f"Plots saved under {output_dir / 'plots'}")
+def evaluate_datasets(
+    predictor: TimeSeriesPredictor,
+    dataset: str,
+    dataset_storage_path: str,
+    terms: List[str] = ["short", "medium", "long"],
+    max_windows: Optional[int] = None,
+    batch_size: int = 48,
+    max_context_length: Optional[int] = 1024,
+    create_plots: bool = False,
+    max_plots_per_dataset: int = 10,
+) -> List[EvaluationItem]:
+    """Evaluate predictor on one dataset across the requested terms."""
+    sub_datasets = construct_evaluation_data(
+        dataset_name=dataset,
+        dataset_storage_path=dataset_storage_path,
+        terms=terms,
+        max_windows=max_windows,
+    )
+    results: List[EvaluationItem] = []
+    for i, (sub_dataset, metadata) in enumerate(sub_datasets):
+        logger.info(f"Evaluating {i + 1}/{len(sub_datasets)}: {metadata.full_name}")
+        logger.info(f"  Dataset size: {len(sub_dataset.test_data)}")
+        logger.info(f"  Frequency: {sub_dataset.freq}")
+        logger.info(f"  Term: {metadata.term}")
+        logger.info(f"  Prediction length: {sub_dataset.prediction_length}")
+        logger.info(f"  Target dimensions: {sub_dataset.target_dim}")
+        logger.info(f"  Windows: {sub_dataset.windows}")
+        # Update context on the reusable predictor
+        predictor.set_dataset_context(
+            prediction_length=sub_dataset.prediction_length,
+            freq=sub_dataset.freq,
+            batch_size=batch_size,
+            max_context_length=max_context_length,
+        )
+        res = evaluate_model(
+            model=predictor,
+            test_data=sub_dataset.test_data,
+            metrics=METRICS,
+            axis=None,
+            mask_invalid_label=True,
+            allow_nan_forecast=False,
+            seasonality=metadata.season_length,
+        )
+        figs: List[Tuple[object, str]] = []
+        if create_plots:
+            forecasts = predictor.predict(sub_dataset.test_data.input)
+            figs = create_plots_for_dataset(
+                forecasts=forecasts,
+                test_data=sub_dataset.test_data,
+                dataset_metadata=metadata,
+                max_plots=max_plots_per_dataset,
+                max_context_length=max_context_length,
+            )
+        results.append(
+            EvaluationItem(dataset_metadata=metadata, metrics=res, figures=figs)
+        )
+    return results
+def _expand_datasets_arg(datasets: List[str] | str) -> List[str]:
+    if datasets[0] == "all":
+        return list(ALL_DATASETS)
+    if isinstance(datasets, str):
+        datasets = [datasets]
+    for dataset in datasets:
+        if dataset not in ALL_DATASETS:
+            raise ValueError(f"Invalid dataset: {dataset}. Use one of {ALL_DATASETS}")
+    return datasets
+def _run_evaluation(
+    predictor: TimeSeriesPredictor,
+    datasets: List[str] | str,
+    terms: List[str],
+    dataset_storage_path: str,
+    max_windows: Optional[int] = None,
+    batch_size: int = 48,
+    max_context_length: Optional[int] = 1024,
+    output_dir: str = "gift_eval_results",
+    model_name: str = "TimeSeriesModel",
+    create_plots: bool = False,
+    max_plots: int = 10,
+) -> None:
+    """Shared evaluation workflow used by both entry points."""
+    datasets_to_run = _expand_datasets_arg(datasets)
+    results_root = Path(output_dir)
+    for ds_name in datasets_to_run:
+        items = evaluate_datasets(
+            predictor=predictor,
+            dataset=ds_name,
+            dataset_storage_path=dataset_storage_path,
+            terms=terms,
+            max_windows=max_windows,
+            batch_size=batch_size,
+            max_context_length=max_context_length,
+            create_plots=create_plots,
+            max_plots_per_dataset=max_plots,
+        )
+        write_results_to_disk(
+            items=items,
+            dataset_name=ds_name,
+            output_dir=results_root,
+            model_name=model_name,
+            create_plots=create_plots,
+        )
+def evaluate_from_paths(
+    model_path: str,
+    config_path: str,
+    datasets: List[str] | str,
+    terms: List[str],
+    dataset_storage_path: str,
+    max_windows: Optional[int] = None,
+    batch_size: int = 48,
+    max_context_length: Optional[int] = 1024,
+    output_dir: str = "gift_eval_results",
+    model_name: str = "TimeSeriesModel",
+    create_plots: bool = False,
+    max_plots: int = 10,
+) -> None:
+    """Entry point: load model from disk and save metrics/plots to disk."""
+    # Validate inputs early
+    if not Path(model_path).exists():
+        raise FileNotFoundError(f"Model path does not exist: {model_path}")
+    if not Path(config_path).exists():
+        raise FileNotFoundError(f"Config path does not exist: {config_path}")
+    predictor = TimeSeriesPredictor.from_paths(
+        model_path=model_path,
+        config_path=config_path,
+        ds_prediction_length=1,  # placeholder; set per dataset below
+        ds_freq="D",  # placeholder; set per dataset below
+        batch_size=batch_size,
+        max_context_length=max_context_length,
+    )
+    _run_evaluation(
+        predictor=predictor,
+        datasets=datasets,
+        terms=terms,
+        dataset_storage_path=dataset_storage_path,
+        max_windows=max_windows,
+        batch_size=batch_size,
+        max_context_length=max_context_length,
+        output_dir=output_dir,
+        model_name=model_name,
+        create_plots=create_plots,
+        max_plots=max_plots,
+    )
+def evaluate_in_memory(
+    model,
+    config: dict,
+    datasets: List[str] | str,
+    terms: List[str],
+    dataset_storage_path: str,
+    max_windows: Optional[int] = None,
+    batch_size: int = 48,
+    max_context_length: Optional[int] = 1024,
+    output_dir: str = "gift_eval_results",
+    model_name: str = "TimeSeriesModel",
+    create_plots: bool = False,
+    max_plots: int = 10,
+) -> None:
+    """Entry point: evaluate in-memory model and return results per dataset."""
+    predictor = TimeSeriesPredictor.from_model(
+        model=model,
+        config=config,
+        ds_prediction_length=1,  # placeholder; set per dataset below
+        ds_freq="D",  # placeholder; set per dataset below
+        batch_size=batch_size,
+        max_context_length=max_context_length,
+    )
+    _run_evaluation(
+        predictor=predictor,
+        datasets=datasets,
+        terms=terms,
+        dataset_storage_path=dataset_storage_path,
+        max_windows=max_windows,
+        batch_size=batch_size,
+        max_context_length=max_context_length,
+        output_dir=output_dir,
+        model_name=model_name,
+        create_plots=create_plots,
+        max_plots=max_plots,
+    )
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Evaluate TimeSeriesModel on GIFT-Eval datasets"
+    )
+    # Model configuration
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        required=True,
+        help="Path to the trained model checkpoint",
+    )
+    parser.add_argument(
+        "--config_path",
+        type=str,
+        required=True,
+        help="Path to the model configuration YAML file",
+    )
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        default="TimeSeriesModel",
+        help="Name identifier for the model",
+    )
+    # Dataset configuration
+    parser.add_argument(
+        "--datasets",
+        type=str,
+        default="all",
+        help="Comma-separated list of dataset names to evaluate (or 'all')",
+    )
+    parser.add_argument(
+        "--dataset_storage_path",
+        type=str,
+        required=True,
+        help="Path to the dataset storage directory",
+    )
+    parser.add_argument(
+        "--terms",
+        type=str,
+        default="short,medium,long",
+        help="Comma-separated list of prediction terms to evaluate",
+    )
+    parser.add_argument(
+        "--max_windows",
+        type=int,
+        default=None,
+        help="Maximum number of windows to use for evaluation",
+    )
+    # Inference configuration
+    parser.add_argument(
+        "--batch_size", type=int, default=48, help="Batch size for model inference"
+    )
+    parser.add_argument(
+        "--max_context_length",
+        type=int,
+        default=1024,
+        help="Maximum context length to use (None for no limit)",
+    )
+    # Output configuration
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default="gift_eval_results",
+        help="Directory to save evaluation results",
+    )
+    # Plotting configuration
+    parser.add_argument(
+        "--create_plots",
+        action="store_true",
+        help="Create and save plots for each evaluation window",
+    )
+    parser.add_argument(
+        "--max_plots_per_dataset",
+        type=int,
+        default=10,
+        help="Maximum number of plots to create per dataset term",
+    )
+    args = parser.parse_args()
+    args.terms = args.terms.split(",")
+    args.datasets = args.datasets.split(",")
+    return args
+def _configure_logging() -> None:
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+if __name__ == "__main__":
+    _configure_logging()
+    args = _parse_args()
+    logger.info(f"Command Line Arguments: {vars(args)}")
+    try:
+        evaluate_from_paths(
+            model_path=args.model_path,
+            config_path=args.config_path,
+            datasets=args.datasets,
+            terms=args.terms,
+            dataset_storage_path=args.dataset_storage_path,
+            max_windows=args.max_windows,
+            batch_size=args.batch_size,
+            max_context_length=args.max_context_length,
+            output_dir=args.output_dir,
+            model_name=args.model_name,
+            create_plots=args.create_plots,
+            max_plots=args.max_plots_per_dataset,
+        )
+    except Exception as e:
+        logger.error(f"Evaluation failed: {str(e)}")
+        raise

src/gift_eval/model_wrapper.py ADDED Viewed

	@@ -0,0 +1,349 @@

+import logging
+from typing import Iterator, List, Optional
+import numpy as np
+import torch
+import yaml
+from gluonts.itertools import batcher
+from gluonts.model.forecast import QuantileForecast
+from gluonts.model.predictor import Predictor
+from torch.nn.parallel import DistributedDataParallel as DDP
+from src.data.containers import BatchTimeSeriesContainer
+from src.data.frequency import parse_frequency
+from src.data.scalers import RobustScaler
+from src.models.model import TimeSeriesModel
+from src.utils.utils import device
+logger = logging.getLogger(__name__)
+class TimeSeriesPredictor(Predictor):
+    """
+    Unified predictor for TimeSeriesModel supporting both in-memory and file-based construction.
+    Use classmethods `from_model` and `from_paths` to construct instances.
+    Provides `set_dataset_context` to adjust dataset-specific parameters without reloading the model.
+    """
+    def __init__(
+        self,
+        model: TimeSeriesModel,
+        config: dict,
+        ds_prediction_length: int,
+        ds_freq: str,
+        batch_size: int = 32,
+        max_context_length: Optional[int] = None,
+        debug: bool = False,
+    ) -> None:
+        # Dataset-specific context (can be updated per dataset/term)
+        self.ds_prediction_length = ds_prediction_length
+        self.ds_freq = ds_freq
+        self.batch_size = batch_size
+        self.max_context_length = max_context_length
+        self.debug = debug
+        # Persistent model/config (unwrap DDP if needed)
+        self.model = model.module if isinstance(model, DDP) else model
+        self.model.eval()
+        self.config = config
+        # Initialize scaler (using same type as model)
+        scaler_type = self.config.get("TimeSeriesModel", {}).get(
+            "scaler", "custom_robust"
+        )
+        epsilon = self.config.get("TimeSeriesModel", {}).get("epsilon", 1e-3)
+        if scaler_type == "custom_robust":
+            self.scaler = RobustScaler(epsilon=epsilon)
+        else:
+            raise ValueError(f"Unsupported scaler type: {scaler_type}")
+    def set_dataset_context(
+        self,
+        prediction_length: Optional[int] = None,
+        freq: Optional[str] = None,
+        batch_size: Optional[int] = None,
+        max_context_length: Optional[int] = None,
+    ) -> None:
+        """Update lightweight dataset-specific attributes without reloading the model."""
+        if prediction_length is not None:
+            self.ds_prediction_length = prediction_length
+        if freq is not None:
+            self.ds_freq = freq
+        if batch_size is not None:
+            self.batch_size = batch_size
+        if max_context_length is not None:
+            self.max_context_length = max_context_length
+    @classmethod
+    def from_model(
+        cls,
+        model: TimeSeriesModel,
+        config: dict,
+        ds_prediction_length: int,
+        ds_freq: str,
+        batch_size: int = 32,
+        max_context_length: Optional[int] = None,
+        debug: bool = False,
+    ) -> "TimeSeriesPredictor":
+        return cls(
+            model=model,
+            config=config,
+            ds_prediction_length=ds_prediction_length,
+            ds_freq=ds_freq,
+            batch_size=batch_size,
+            max_context_length=max_context_length,
+            debug=debug,
+        )
+    @classmethod
+    def from_paths(
+        cls,
+        model_path: str,
+        config_path: str,
+        ds_prediction_length: int,
+        ds_freq: str,
+        batch_size: int = 32,
+        max_context_length: Optional[int] = None,
+        debug: bool = False,
+    ) -> "TimeSeriesPredictor":
+        with open(config_path, "r") as f:
+            config = yaml.safe_load(f)
+        model = cls._load_model_from_path(config=config, model_path=model_path)
+        return cls(
+            model=model,
+            config=config,
+            ds_prediction_length=ds_prediction_length,
+            ds_freq=ds_freq,
+            batch_size=batch_size,
+            max_context_length=max_context_length,
+            debug=debug,
+        )
+    @staticmethod
+    def _load_model_from_path(config: dict, model_path: str) -> TimeSeriesModel:
+        try:
+            model = TimeSeriesModel(**config["TimeSeriesModel"]).to(device)
+            checkpoint = torch.load(model_path, map_location=device)
+            model.load_state_dict(checkpoint["model_state_dict"])
+            model.eval()
+            logger.info(f"Successfully loaded model from {model_path}")
+            return model
+        except Exception as e:
+            logger.error(f"Failed to load model from {model_path}: {str(e)}")
+            raise
+    def predict(self, test_data_input) -> Iterator[QuantileForecast]:
+        """
+        Generate forecasts for the test data.
+        Args:
+            test_data_input: Iterator of gluonts DataEntry objects
+        Returns:
+            Iterable of QuantileForecast objects
+        """
+        if hasattr(test_data_input, "__iter__") and not isinstance(
+            test_data_input, list
+        ):
+            test_data_input = list(test_data_input)
+        logger.debug(f"Processing {len(test_data_input)} time series")
+        # Group series by their effective length (after optional truncation),
+        # then process each uniform-length group in sub-batches up to batch_size.
+        def _effective_length(entry) -> int:
+            target = entry["target"]
+            if target.ndim == 1:
+                seq_len = len(target)
+            else:
+                # target shape is [num_channels, seq_len]
+                seq_len = target.shape[1]
+            if self.max_context_length is not None:
+                seq_len = min(seq_len, self.max_context_length)
+            return seq_len
+        length_to_items: dict[int, List[tuple[int, object]]] = {}
+        for idx, entry in enumerate(test_data_input):
+            L = _effective_length(entry)
+            length_to_items.setdefault(L, []).append((idx, entry))
+        total = len(test_data_input)
+        ordered_results: List[Optional[QuantileForecast]] = [None] * total
+        for _, items in length_to_items.items():
+            for i in range(0, len(items), self.batch_size):
+                chunk = items[i : i + self.batch_size]
+                entries = [e for (_orig_idx, e) in chunk]
+                batch_forecasts = self._predict_batch(entries)
+                for f_idx, (orig_idx, _e) in enumerate(chunk):
+                    ordered_results[orig_idx] = batch_forecasts[f_idx]
+        # All results should be populated
+        return ordered_results  # type: ignore[return-value]
+    def _predict_batch(self, test_data_batch: List) -> List[QuantileForecast]:
+        """Generate predictions for a batch of time series."""
+        logger.debug(f"Processing batch of size: {len(test_data_batch)}")
+        try:
+            # Convert gluonts data to BatchTimeSeriesContainer
+            batch_container = self._convert_to_batch_container(test_data_batch)
+            # Autocast only when running on CUDA
+            if isinstance(device, torch.device):
+                device_type = device.type
+            else:
+                device_type = "cuda" if "cuda" in str(device).lower() else "cpu"
+            enable_autocast = device_type == "cuda"
+            with torch.autocast(
+                device_type=device_type, dtype=torch.bfloat16, enabled=enable_autocast
+            ):
+                with torch.no_grad():
+                    model_output = self.model(batch_container, drop_enc_allow=False)
+            # Convert predictions to QuantileForecast objects
+            forecasts = self._convert_to_forecasts(
+                model_output, test_data_batch, batch_container
+            )
+            logger.debug(f"Generated {len(forecasts)} forecasts")
+            return forecasts
+        except Exception as e:
+            logger.error(f"Error in batch prediction: {str(e)}")
+            raise
+    def _convert_to_batch_container(
+        self, test_data_batch: List
+    ) -> BatchTimeSeriesContainer:
+        """Convert gluonts test data to BatchTimeSeriesContainer."""
+        batch_size = len(test_data_batch)
+        # Extract data from test entries (all series in this batch must have equal length)
+        history_values_list = []
+        start_dates = []
+        frequencies = []
+        for entry in test_data_batch:
+            target = entry["target"]
+            # Handle both univariate and multivariate cases
+            if target.ndim == 1:
+                # Univariate: reshape to [seq_len, 1]
+                target = target.reshape(-1, 1)
+            else:
+                # Multivariate: assume shape is [num_channels, seq_len] -> transpose to [seq_len, num_channels]
+                target = target.T
+            # Apply context length limit if specified
+            if (
+                self.max_context_length is not None
+                and len(target) > self.max_context_length
+            ):
+                target = target[-self.max_context_length :]
+            history_values_list.append(target)
+            start_dates.append(entry["start"].to_timestamp().to_datetime64())
+            frequencies.append(parse_frequency(entry["freq"]))
+        # Stack sequences directly (no padding) -> shapes are uniform by construction
+        history_values_np = np.stack(history_values_list, axis=0)
+        num_channels = history_values_np.shape[2]
+        # Convert to tensors
+        history_values = torch.tensor(
+            history_values_np, dtype=torch.float32, device=device
+        )
+        # Create future values tensor (empty for prediction)
+        future_values = torch.zeros(
+            (batch_size, self.ds_prediction_length, num_channels),
+            dtype=torch.float32,
+            device=device,
+        )
+        return BatchTimeSeriesContainer(
+            history_values=history_values,
+            future_values=future_values,
+            start=start_dates,
+            frequency=frequencies,
+        )
+    def _convert_to_forecasts(
+        self,
+        model_output: dict,
+        test_data_batch: List,
+        batch_container: BatchTimeSeriesContainer,
+    ) -> List[QuantileForecast]:
+        """Convert model predictions to QuantileForecast objects."""
+        predictions = model_output[
+            "result"
+        ]  # Shape: [batch_size, pred_len, num_channels] or [batch_size, pred_len, num_channels, num_quantiles]
+        scale_statistics = model_output["scale_statistics"]
+        # Apply inverse scaling to get predictions in original scale
+        if predictions.ndim == 4:
+            # Quantile predictions: [batch_size, pred_len, num_channels, num_quantiles]
+            predictions_unscaled = self.scaler.inverse_scale(
+                predictions, scale_statistics
+            )
+            is_quantile = True
+            quantile_levels = self.model.quantiles
+        else:
+            # Point predictions: [batch_size, pred_len, num_channels]
+            predictions_unscaled = self.scaler.inverse_scale(
+                predictions, scale_statistics
+            )
+            is_quantile = False
+            quantile_levels = [0.5]  # Treat as median forecast
+        forecasts: List[QuantileForecast] = []
+        for i, entry in enumerate(test_data_batch):
+            # Get prediction start date
+            history_length = int(batch_container.history_values.shape[1])
+            start_date = entry["start"]
+            forecast_start = start_date + history_length
+            if is_quantile:
+                # Handle quantile forecasts
+                pred_array = (
+                    predictions_unscaled[i].cpu().numpy()
+                )  # [pred_len, num_channels, num_quantiles]
+                if pred_array.shape[1] == 1:
+                    # Univariate case: [pred_len, 1, num_quantiles] -> [pred_len, num_quantiles]
+                    pred_array = pred_array.squeeze(1)
+                    forecast_arrays = pred_array.T  # [num_quantiles, pred_len]
+                else:
+                    # Multivariate case: [pred_len, num_channels, num_quantiles] -> [num_quantiles, pred_len, num_channels]
+                    forecast_arrays = pred_array.transpose(2, 0, 1)
+                forecast = QuantileForecast(
+                    forecast_arrays=forecast_arrays,
+                    forecast_keys=[str(q) for q in quantile_levels],
+                    start_date=forecast_start,
+                )
+            else:
+                # Handle point forecasts
+                pred_array = (
+                    predictions_unscaled[i].cpu().numpy()
+                )  # [pred_len, num_channels]
+                if pred_array.shape[1] == 1:
+                    # Univariate case: [pred_len, 1] -> [pred_len]
+                    pred_array = pred_array.squeeze(1)
+                    forecast_arrays = pred_array.reshape(1, -1)  # [1, pred_len]
+                else:
+                    # Multivariate case: [pred_len, num_channels] -> [1, pred_len, num_channels]
+                    forecast_arrays = pred_array.reshape(1, *pred_array.shape)
+                forecast = QuantileForecast(
+                    forecast_arrays=forecast_arrays,
+                    forecast_keys=["0.5"],
+                    start_date=forecast_start,
+                )
+            forecasts.append(forecast)
+        return forecasts

src/models/__init__.py ADDED Viewed

File without changes

src/models/blocks.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import torch
+import torch.nn as nn
+from fla.models.gated_deltaproduct import GatedDeltaProductConfig
+from fla.models.gated_deltaproduct.modeling_gated_deltaproduct import GatedDeltaProductBlock
+class GatedDeltaProductEncoder(nn.Module):
+    """
+    GatedDeltaNet encoder using GatedDeltaProductBlock for sequence modeling.
+    """
+    def __init__(
+        self,
+        layer_idx: int,
+        token_embed_dim: int,
+        num_heads: int = 4,
+        attn_mode: str = "chunk",
+        expand_v: float = 1.0,
+        use_gate: bool = False,
+        use_short_conv: bool = True,
+        conv_size: int = 4,
+        allow_neg_eigval: bool = True,
+        use_forget_gate: bool = True,
+        num_householder: int = 1,
+        **kwargs,
+    ):
+        super().__init__()
+        config = GatedDeltaProductConfig(
+            attn_mode=attn_mode,
+            hidden_size=token_embed_dim,
+            expand_v=expand_v,
+            use_gate=use_gate,
+            use_short_conv=use_short_conv,
+            conv_size=conv_size,
+            head_dim=token_embed_dim // num_heads,
+            hidden_ratio=0.5,
+            num_heads=num_heads,
+            allow_neg_eigval=allow_neg_eigval,
+            use_forget_gate=use_forget_gate,
+            num_householder=num_householder,
+        )
+        self.encoder_layer = GatedDeltaProductBlock(layer_idx=layer_idx, config=config)
+    def forward(self, x, initial_state=None):
+        """
+        Forward pass through the GatedDeltaProductBlock.
+        Args:
+            x: Input tensor of shape [batch_size, seq_len, hidden_size]
+        Returns:
+            Output tensor of same shape as input
+        """
+        x, last_hidden_state, _ = self.encoder_layer(
+            x, output_attentions=True, initial_state=initial_state
+        )
+        return x, last_hidden_state

src/models/model.py ADDED Viewed

	@@ -0,0 +1,427 @@

+import torch
+import torch.nn as nn
+from fla.modules import GatedMLP
+from src.data.containers import BatchTimeSeriesContainer
+from src.data.scalers import MinMaxScaler, RobustScaler
+from src.data.time_features import compute_batch_time_features
+from src.models.blocks import GatedDeltaProductEncoder
+from src.utils.utils import device
+def create_scaler(scaler_type: str, epsilon: float = 1e-3):
+    """Create scaler instance based on type."""
+    if scaler_type == "custom_robust":
+        return RobustScaler(epsilon=epsilon)
+    elif scaler_type == "min_max":
+        return MinMaxScaler(epsilon=epsilon)
+    else:
+        raise ValueError(f"Unknown scaler: {scaler_type}")
+def apply_channel_noise(values: torch.Tensor, noise_scale: float = 0.1):
+    """Add noise to constant channels to prevent model instability."""
+    is_constant = torch.all(values == values[:, 0:1, :], dim=1)
+    noise = torch.randn_like(values) * noise_scale * is_constant.unsqueeze(1)
+    return values + noise
+class TimeSeriesModel(nn.Module):
+    """Time series forecasting model combining embedding, encoding, and prediction."""
+    def __init__(
+        self,
+        # Core architecture
+        embed_size: int = 128,
+        num_encoder_layers: int = 2,
+        # Scaling and preprocessing
+        scaler: str = "custom_robust",
+        epsilon: float = 1e-3,
+        scaler_clamp_value: float = None,
+        handle_constants: bool = False,
+        # Time features
+        K_max: int = 6,
+        time_feature_config: dict = None,
+        encoding_dropout: float = 0.0,
+        # Encoder configuration
+        encoder_config: dict = None,
+        # Loss configuration
+        loss_type: str = "huber",  # "huber", "quantile"
+        quantiles: list[float] = None,
+        **kwargs,
+    ):
+        super().__init__()
+        # Core parameters
+        self.embed_size = embed_size
+        self.num_encoder_layers = num_encoder_layers
+        self.epsilon = epsilon
+        self.scaler_clamp_value = scaler_clamp_value
+        self.handle_constants = handle_constants
+        self.encoding_dropout = encoding_dropout
+        self.K_max = K_max
+        self.time_feature_config = time_feature_config or {}
+        self.encoder_config = encoder_config or {}
+        # Store loss parameters
+        self.loss_type = loss_type
+        self.quantiles = quantiles
+        if self.loss_type == "quantile" and self.quantiles is None:
+            raise ValueError("Quantiles must be provided for quantile loss.")
+        if self.quantiles:
+            self.register_buffer(
+                "qt", torch.tensor(self.quantiles, device=device).view(1, 1, 1, -1)
+            )
+        # Validate configuration before initialization
+        self._validate_configuration()
+        # Initialize components
+        self.scaler = create_scaler(scaler, epsilon)
+        self._init_embedding_layers()
+        self._init_encoder_layers(self.encoder_config, num_encoder_layers)
+        self._init_projection_layers()
+    def _validate_configuration(self):
+        """Validate essential model configuration parameters."""
+        if "num_heads" not in self.encoder_config:
+            raise ValueError("encoder_config must contain 'num_heads' parameter")
+        if self.embed_size % self.encoder_config["num_heads"] != 0:
+            raise ValueError(
+                f"embed_size ({self.embed_size}) must be divisible by "
+                f"num_heads ({self.encoder_config['num_heads']})"
+            )
+    def _init_embedding_layers(self):
+        """Initialize value and time feature embedding layers."""
+        self.expand_values = nn.Linear(1, self.embed_size, bias=True)
+        self.nan_embedding = nn.Parameter(
+            torch.randn(1, 1, 1, self.embed_size) / self.embed_size,
+            requires_grad=True,
+        )
+        self.time_feature_projection = nn.Linear(self.K_max, self.embed_size)
+    def _init_encoder_layers(self, encoder_config: dict, num_encoder_layers: int):
+        """Initialize encoder layers."""
+        self.num_encoder_layers = num_encoder_layers
+        # Ensure encoder_config has token_embed_dim
+        encoder_config = encoder_config.copy()
+        encoder_config["token_embed_dim"] = self.embed_size
+        self.encoder_layers = nn.ModuleList(
+            [
+                GatedDeltaProductEncoder(layer_idx=layer_idx, **encoder_config)
+                for layer_idx in range(self.num_encoder_layers)
+            ]
+        )
+    def _init_projection_layers(self):
+        if self.loss_type == "quantile":
+            output_dim = len(self.quantiles)
+        else:
+            output_dim = 1
+        self.final_output_layer = nn.Linear(self.embed_size, output_dim)
+        self.mlp = GatedMLP(
+            hidden_size=self.embed_size,
+            hidden_ratio=4,
+            hidden_act="swish",
+            fuse_swiglu=True,
+        )
+        # Initialize learnable initial hidden state for the first encoder layer
+        # This will be expanded to match batch size during forward pass
+        head_k_dim = self.embed_size // self.encoder_config["num_heads"]
+        # Get expand_v from encoder_config, default to 1.0 if not present
+        expand_v = self.encoder_config.get("expand_v", 1.0)
+        head_v_dim = int(head_k_dim * expand_v)
+        num_initial_hidden_states = self.num_encoder_layers
+        self.initial_hidden_state = nn.ParameterList(
+            [
+                nn.Parameter(
+                    torch.randn(
+                        1, self.encoder_config["num_heads"], head_k_dim, head_v_dim
+                    )
+                    / head_k_dim,
+                    requires_grad=True,
+                )
+                for _ in range(num_initial_hidden_states)
+            ]
+        )
+    def _preprocess_data(self, data_container: BatchTimeSeriesContainer):
+        """Extract data shapes and handle constants without padding."""
+        history_values = data_container.history_values
+        future_values = data_container.future_values
+        history_mask = data_container.history_mask
+        batch_size, history_length, num_channels = history_values.shape
+        future_length = future_values.shape[1] if future_values is not None else 0
+        # Handle constants
+        if self.handle_constants:
+            history_values = apply_channel_noise(history_values)
+        return {
+            "history_values": history_values,
+            "future_values": future_values,
+            "history_mask": history_mask,
+            "num_channels": num_channels,
+            "history_length": history_length,
+            "future_length": future_length,
+            "batch_size": batch_size,
+        }
+    def _compute_scaling(
+        self, history_values: torch.Tensor, history_mask: torch.Tensor = None
+    ):
+        """Compute scaling statistics and apply scaling."""
+        scale_statistics = self.scaler.compute_statistics(history_values, history_mask)
+        return scale_statistics
+    def _apply_scaling_and_masking(
+        self, values: torch.Tensor, scale_statistics: dict, mask: torch.Tensor = None
+    ):
+        """Apply scaling and optional masking to values."""
+        scaled_values = self.scaler.scale(values, scale_statistics)
+        if mask is not None:
+            scaled_values = scaled_values * mask.unsqueeze(-1).float()
+        if self.scaler_clamp_value is not None:
+            scaled_values = torch.clamp(
+                scaled_values, -self.scaler_clamp_value, self.scaler_clamp_value
+            )
+        return scaled_values
+    def _get_positional_embeddings(
+        self,
+        time_features: torch.Tensor,
+        num_channels: int,
+        batch_size: int,
+        drop_enc_allow: bool = False,
+    ):
+        """Generate positional embeddings from time features."""
+        seq_len = time_features.shape[1]
+        if (torch.rand(1).item() < self.encoding_dropout) and drop_enc_allow:
+            return torch.zeros(
+                batch_size, seq_len, num_channels, self.embed_size, device=device
+            ).to(torch.float32)
+        pos_embed = self.time_feature_projection(time_features)
+        return pos_embed.unsqueeze(2).expand(-1, -1, num_channels, -1)
+    def _compute_embeddings(
+        self,
+        scaled_history: torch.Tensor,
+        history_pos_embed: torch.Tensor,
+        history_mask: torch.Tensor | None = None,
+    ):
+        """Compute value embeddings and combine with positional embeddings."""
+        nan_mask = torch.isnan(scaled_history)
+        history_for_embedding = torch.nan_to_num(scaled_history, nan=0.0)
+        channel_embeddings = self.expand_values(history_for_embedding.unsqueeze(-1))
+        channel_embeddings[nan_mask] = self.nan_embedding.to(channel_embeddings.dtype)
+        channel_embeddings = channel_embeddings + history_pos_embed
+        # Suppress padded time steps completely so padding is a pure batching artifact
+        # history_mask: [B, S] -> broadcast to [B, S, 1, 1]
+        if history_mask is not None:
+            mask_broadcast = (
+                history_mask.unsqueeze(-1).unsqueeze(-1).to(channel_embeddings.dtype)
+            )
+            channel_embeddings = channel_embeddings * mask_broadcast
+        batch_size, seq_len = scaled_history.shape[:2]
+        all_channels_embedded = channel_embeddings.view(batch_size, seq_len, -1)
+        return all_channels_embedded
+    def _generate_predictions(
+        self,
+        embedded: torch.Tensor,
+        target_pos_embed: torch.Tensor,
+        prediction_length: int,
+        num_channels: int,
+        history_mask: torch.Tensor = None,
+    ):
+        """
+        Generate predictions for all channels using vectorized operations.
+        """
+        batch_size, seq_len, _ = embedded.shape
+        # embedded shape: [B, S, N*E] -> Reshape to [B, S, N, E]
+        embedded = embedded.view(batch_size, seq_len, num_channels, self.embed_size)
+        # Vectorize across channels by merging the batch and channel dimensions.
+        # [B, S, N, E] -> [B*N, S, E]
+        channel_embedded = (
+            embedded.permute(0, 2, 1, 3)
+            .contiguous()
+            .view(batch_size * num_channels, seq_len, self.embed_size)
+        )
+        # Reshape target positional embeddings similarly: [B, P, N, E] -> [B*N, P, E]
+        target_pos_embed = (
+            target_pos_embed.permute(0, 2, 1, 3)
+            .contiguous()
+            .view(batch_size * num_channels, prediction_length, self.embed_size)
+        )
+        x = channel_embedded
+        target_repr = target_pos_embed
+        x = torch.concatenate([x, target_repr], dim=1)
+        if self.encoder_config.get("weaving", True):
+            # initial hidden state is learnable
+            hidden_state = torch.zeros_like(
+                self.initial_hidden_state[0].repeat(batch_size * num_channels, 1, 1, 1)
+            )
+            for layer_idx, encoder_layer in enumerate(self.encoder_layers):
+                x, hidden_state = encoder_layer(
+                    x,
+                    hidden_state
+                    + self.initial_hidden_state[layer_idx].repeat(
+                        batch_size * num_channels, 1, 1, 1
+                    ),
+                )
+        else:
+            # initial hidden state is separately learnable for each layer
+            for layer_idx, encoder_layer in enumerate(self.encoder_layers):
+                initial_hidden_state = self.initial_hidden_state[layer_idx].repeat(
+                    batch_size * num_channels, 1, 1, 1
+                )
+                x, _ = encoder_layer(x, initial_hidden_state)
+        # Use the last prediction_length positions
+        prediction_embeddings = x[:, -prediction_length:, :]
+        predictions = self.final_output_layer(self.mlp(prediction_embeddings))
+        # Reshape output to handle quantiles
+        # Original shape: [B*N, P, Q] where Q is num_quantiles or 1
+        # Reshape the output back to [B, P, N, Q]
+        output_dim = len(self.quantiles) if self.loss_type == "quantile" else 1
+        predictions = predictions.view(
+            batch_size, num_channels, prediction_length, output_dim
+        )
+        predictions = predictions.permute(0, 2, 1, 3)  # [B, P, N, Q]
+        # Squeeze the last dimension if not in quantile mode for backward compatibility
+        if self.loss_type != "quantile":
+            predictions = predictions.squeeze(-1)  # [B, P, N]
+        return predictions
+    def forward(
+        self, data_container: BatchTimeSeriesContainer, drop_enc_allow: bool = False
+    ):
+        """Main forward pass."""
+        # Preprocess data
+        preprocessed = self._preprocess_data(data_container)
+        # Compute time features dynamically based on actual lengths
+        history_time_features, target_time_features = compute_batch_time_features(
+            start=data_container.start,
+            history_length=preprocessed["history_length"],
+            future_length=preprocessed["future_length"],
+            batch_size=preprocessed["batch_size"],
+            frequency=data_container.frequency,
+            K_max=self.K_max,
+            time_feature_config=self.time_feature_config,
+        )
+        # Compute scaling
+        scale_statistics = self._compute_scaling(
+            preprocessed["history_values"], preprocessed["history_mask"]
+        )
+        # Apply scaling
+        history_scaled = self._apply_scaling_and_masking(
+            preprocessed["history_values"],
+            scale_statistics,
+            preprocessed["history_mask"],
+        )
+        # Scale future values if present
+        future_scaled = None
+        if preprocessed["future_values"] is not None:
+            future_scaled = self.scaler.scale(
+                preprocessed["future_values"], scale_statistics
+            )
+        # Get positional embeddings
+        history_pos_embed = self._get_positional_embeddings(
+            history_time_features,
+            preprocessed["num_channels"],
+            preprocessed["batch_size"],
+            drop_enc_allow,
+        )
+        target_pos_embed = self._get_positional_embeddings(
+            target_time_features,
+            preprocessed["num_channels"],
+            preprocessed["batch_size"],
+            drop_enc_allow,
+        )
+        # Compute embeddings
+        history_embed = self._compute_embeddings(
+            history_scaled, history_pos_embed, preprocessed["history_mask"]
+        )
+        # Generate predictions
+        predictions = self._generate_predictions(
+            history_embed,
+            target_pos_embed,
+            preprocessed["future_length"],
+            preprocessed["num_channels"],
+            preprocessed["history_mask"],
+        )
+        return {
+            "result": predictions,
+            "scale_statistics": scale_statistics,
+            "future_scaled": future_scaled,
+            "history_length": preprocessed["history_length"],
+            "future_length": preprocessed["future_length"],
+        }
+    def _quantile_loss(self, y_true: torch.Tensor, y_pred: torch.Tensor):
+        """
+        Compute the quantile loss.
+        y_true: [B, P, N]
+        y_pred: [B, P, N, Q]
+        """
+        # Add a dimension to y_true to match y_pred: [B, P, N] -> [B, P, N, 1]
+        y_true = y_true.unsqueeze(-1)
+        # Calculate errors
+        errors = y_true - y_pred
+        # Calculate quantile loss
+        # The max operator implements the two cases of the quantile loss formula
+        loss = torch.max((self.qt - 1) * errors, self.qt * errors)
+        # Average the loss across all dimensions
+        return loss.mean()
+    def compute_loss(self, y_true: torch.Tensor, y_pred: dict):
+        """Compute loss between predictions and scaled ground truth."""
+        predictions = y_pred["result"]
+        scale_statistics = y_pred["scale_statistics"]
+        if y_true is None:
+            return torch.tensor(0.0, device=predictions.device)
+        future_scaled = self.scaler.scale(y_true, scale_statistics)
+        if self.loss_type == "huber":
+            if predictions.shape != future_scaled.shape:
+                raise ValueError(
+                    f"Shape mismatch for Huber loss: predictions {predictions.shape} vs future_scaled {future_scaled.shape}"
+                )
+            return nn.functional.huber_loss(predictions, future_scaled)
+        elif self.loss_type == "quantile":
+            return self._quantile_loss(future_scaled, predictions)
+        else:
+            raise ValueError(f"Unknown loss type: {self.loss_type}")

src/optim/lr_scheduler.py ADDED Viewed

	@@ -0,0 +1,360 @@

+# src/utils/lr_scheduler.py
+import math
+from enum import Enum
+from functools import partial
+from typing import Optional
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import LambdaLR
+class SchedulerType(Enum):
+    """Enumeration of available learning rate schedulers."""
+    COSINE = "cosine"
+    COSINE_WITH_WARMUP = "cosine_with_warmup"
+    COSINE_WITH_RESTARTS = "cosine_with_restarts"
+    WARMUP_STABLE_DECAY = "warmup_stable_decay"
+    POLYNOMIAL_WITH_WARMUP = "polynomial_with_warmup"
+    LINEAR_WITH_WARMUP = "linear_with_warmup"
+    CONSTANT_WITH_WARMUP = "constant_with_warmup"
+    INVERSE_SQRT = "inverse_sqrt"
+def _get_warmup_stable_decay_lr_lambda(
+    current_step: int,
+    *,
+    num_warmup_steps: int,
+    num_stable_steps: int,
+    num_training_steps: int,
+    min_lr_ratio: float = 0.001,
+    decay_type: str = "cosine",
+):
+    """
+    Learning rate lambda function for Warmup-Stable-Decay (WSD) schedule.
+    This scheduler implements three phases:
+    1. Warmup: Linear increase from 0 to peak learning rate
+    2. Stable: Constant learning rate for majority of training
+    3. Decay: Gradual decrease using cosine or linear decay
+    Args:
+        current_step: Current training step
+        num_warmup_steps: Number of warmup steps
+        num_stable_steps: Number of stable learning rate steps
+        num_training_steps: Total number of training steps
+        min_lr_ratio: Minimum learning rate as ratio of peak learning rate
+        decay_type: Type of decay schedule ("cosine" or "linear")
+    """
+    if current_step < num_warmup_steps:
+        # Warmup phase: linear increase
+        return float(current_step) / float(max(1, num_warmup_steps))
+    elif current_step < num_warmup_steps + num_stable_steps:
+        # Stable phase: constant learning rate
+        return 1.0
+    else:
+        # Decay phase
+        decay_steps = num_training_steps - num_warmup_steps - num_stable_steps
+        if decay_steps <= 0:
+            return max(min_lr_ratio, 1.0)
+        progress = (current_step - num_warmup_steps - num_stable_steps) / decay_steps
+        progress = min(progress, 1.0)  # Clamp to [0, 1]
+        if decay_type == "cosine":
+            # Cosine decay
+            decay_factor = 0.5 * (1.0 + math.cos(math.pi * progress))
+            return max(min_lr_ratio, decay_factor)
+        elif decay_type == "linear":
+            # Linear decay
+            decay_factor = 1.0 - progress
+            return max(min_lr_ratio, decay_factor)
+        else:
+            raise ValueError(f"Unknown decay_type: {decay_type}")
+def get_warmup_stable_decay_schedule(
+    optimizer: Optimizer,
+    num_warmup_steps: int,
+    num_stable_steps: int,
+    num_training_steps: int,
+    min_lr_ratio: float = 0.01,
+    decay_type: str = "cosine",
+    last_epoch: int = -1,
+):
+    """
+    Create a Warmup-Stable-Decay learning rate schedule.
+    This scheduler is particularly well-suited for foundation model training as it:
+    - Provides stable learning during the majority of training
+    - Doesn't require pre-committing to exact training duration
+    - Allows for extended training without aggressive decay
+    Args:
+        optimizer: The optimizer for which to schedule the learning rate
+        num_warmup_steps: Number of steps for warmup phase
+        num_stable_steps: Number of steps for stable learning rate phase
+        num_training_steps: Total number of training steps
+        min_lr_ratio: Minimum learning rate as fraction of peak learning rate
+        decay_type: Type of decay ("cosine" or "linear")
+        last_epoch: The index of the last epoch when resuming training
+    Returns:
+        torch.optim.lr_scheduler.LambdaLR with the WSD schedule
+    """
+    lr_lambda = partial(
+        _get_warmup_stable_decay_lr_lambda,
+        num_warmup_steps=num_warmup_steps,
+        num_stable_steps=num_stable_steps,
+        num_training_steps=num_training_steps,
+        min_lr_ratio=min_lr_ratio,
+        decay_type=decay_type,
+    )
+    return LambdaLR(optimizer, lr_lambda, last_epoch=last_epoch)
+def _get_cosine_schedule_with_warmup_lr_lambda(
+    current_step: int,
+    *,
+    num_warmup_steps: int,
+    num_training_steps: int,
+    num_cycles: float = 0.5,
+    min_lr_ratio: float = 0.0,
+):
+    """Enhanced cosine schedule with configurable minimum learning rate."""
+    if current_step < num_warmup_steps:
+        return float(current_step) / float(max(1, num_warmup_steps))
+    progress = float(current_step - num_warmup_steps) / float(
+        max(1, num_training_steps - num_warmup_steps)
+    )
+    cosine_factor = 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))
+    return max(min_lr_ratio, cosine_factor)
+def get_enhanced_cosine_schedule_with_warmup(
+    optimizer: Optimizer,
+    num_warmup_steps: int,
+    num_training_steps: int,
+    num_cycles: float = 0.5,
+    min_lr_ratio: float = 0.01,
+    last_epoch: int = -1,
+):
+    """
+    Enhanced cosine schedule with warmup and configurable minimum learning rate.
+    Args:
+        optimizer: The optimizer for which to schedule the learning rate
+        num_warmup_steps: Number of steps for warmup phase
+        num_training_steps: Total number of training steps
+        num_cycles: Number of cosine cycles (0.5 = half cosine)
+        min_lr_ratio: Minimum learning rate as fraction of peak learning rate
+        last_epoch: The index of the last epoch when resuming training
+    """
+    lr_lambda = partial(
+        _get_cosine_schedule_with_warmup_lr_lambda,
+        num_warmup_steps=num_warmup_steps,
+        num_training_steps=num_training_steps,
+        num_cycles=num_cycles,
+        min_lr_ratio=min_lr_ratio,
+    )
+    return LambdaLR(optimizer, lr_lambda, last_epoch=last_epoch)
+def _get_cosine_with_restarts_lr_lambda(
+    current_step: int,
+    *,
+    num_warmup_steps: int,
+    num_training_steps: int,
+    num_cycles: int = 1,
+    min_lr_ratio: float = 0.0,
+):
+    """Cosine schedule with hard restarts and configurable minimum learning rate."""
+    if current_step < num_warmup_steps:
+        return float(current_step) / float(max(1, num_warmup_steps))
+    progress = float(current_step - num_warmup_steps) / float(
+        max(1, num_training_steps - num_warmup_steps)
+    )
+    if progress >= 1.0:
+        return min_lr_ratio
+    cosine_factor = 0.5 * (
+        1.0 + math.cos(math.pi * ((float(num_cycles) * progress) % 1.0))
+    )
+    return max(min_lr_ratio, cosine_factor)
+def get_cosine_with_restarts_schedule(
+    optimizer: Optimizer,
+    num_warmup_steps: int,
+    num_training_steps: int,
+    num_cycles: int = 4,
+    min_lr_ratio: float = 0.01,
+    last_epoch: int = -1,
+):
+    """
+    Cosine schedule with hard restarts.
+    Args:
+        optimizer: The optimizer for which to schedule the learning rate
+        num_warmup_steps: Number of steps for warmup phase
+        num_training_steps: Total number of training steps
+        num_cycles: Number of restart cycles
+        min_lr_ratio: Minimum learning rate as fraction of peak learning rate
+        last_epoch: The index of the last epoch when resuming training
+    """
+    lr_lambda = partial(
+        _get_cosine_with_restarts_lr_lambda,
+        num_warmup_steps=num_warmup_steps,
+        num_training_steps=num_training_steps,
+        num_cycles=num_cycles,
+        min_lr_ratio=min_lr_ratio,
+    )
+    return LambdaLR(optimizer, lr_lambda, last_epoch=last_epoch)
+# Scheduler registry for easy lookup
+SCHEDULER_REGISTRY = {
+    SchedulerType.WARMUP_STABLE_DECAY: get_warmup_stable_decay_schedule,
+    SchedulerType.COSINE_WITH_WARMUP: get_enhanced_cosine_schedule_with_warmup,
+    SchedulerType.COSINE_WITH_RESTARTS: get_cosine_with_restarts_schedule,
+}
+def get_scheduler(
+    scheduler_type: str | SchedulerType,
+    optimizer: Optimizer,
+    num_warmup_steps: int,
+    num_training_steps: int,
+    scheduler_kwargs: Optional[dict] = None,
+):
+    """
+    Unified interface to create learning rate schedulers.
+    Args:
+        scheduler_type: Type of scheduler to create
+        optimizer: The optimizer to schedule
+        num_warmup_steps: Number of warmup steps
+        num_training_steps: Total training steps
+        scheduler_kwargs: Additional scheduler-specific parameters
+    Returns:
+        Configured learning rate scheduler
+    """
+    if isinstance(scheduler_type, str):
+        scheduler_type = SchedulerType(scheduler_type)
+    if scheduler_kwargs is None:
+        scheduler_kwargs = {}
+    if scheduler_type not in SCHEDULER_REGISTRY:
+        raise ValueError(f"Unsupported scheduler type: {scheduler_type}")
+    scheduler_func = SCHEDULER_REGISTRY[scheduler_type]
+    return scheduler_func(
+        optimizer=optimizer,
+        num_warmup_steps=num_warmup_steps,
+        num_training_steps=num_training_steps,
+        **scheduler_kwargs,
+    )
+class WarmupStableDecayScheduler:
+    """
+    Alternative implementation as a standalone scheduler class.
+    This provides more flexibility and better state management for
+    complex training scenarios with checkpointing.
+    """
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        num_warmup_steps: int,
+        num_stable_steps: int,
+        total_steps: int,
+        min_lr_ratio: float = 0.01,
+        decay_type: str = "cosine",
+        verbose: bool = False,
+    ):
+        self.optimizer = optimizer
+        self.num_warmup_steps = num_warmup_steps
+        self.num_stable_steps = num_stable_steps
+        self.total_steps = total_steps
+        self.min_lr_ratio = min_lr_ratio
+        self.decay_type = decay_type
+        self.verbose = verbose
+        # Store initial learning rates
+        self.base_lrs = [group["lr"] for group in optimizer.param_groups]
+        self.current_step = 0
+    def get_lr_factor(self, step: int) -> float:
+        """Calculate the learning rate multiplication factor for given step."""
+        if step < self.num_warmup_steps:
+            # Warmup phase
+            return step / max(1, self.num_warmup_steps)
+        elif step < self.num_warmup_steps + self.num_stable_steps:
+            # Stable phase
+            return 1.0
+        else:
+            # Decay phase
+            decay_steps = (
+                self.total_steps - self.num_warmup_steps - self.num_stable_steps
+            )
+            if decay_steps <= 0:
+                return max(self.min_lr_ratio, 1.0)
+            progress = (
+                step - self.num_warmup_steps - self.num_stable_steps
+            ) / decay_steps
+            progress = min(progress, 1.0)
+            if self.decay_type == "cosine":
+                decay_factor = 0.5 * (1.0 + math.cos(math.pi * progress))
+            elif self.decay_type == "linear":
+                decay_factor = 1.0 - progress
+            else:
+                raise ValueError(f"Unknown decay_type: {self.decay_type}")
+            return max(self.min_lr_ratio, decay_factor)
+    def step(self):
+        """Update learning rates for all parameter groups."""
+        lr_factor = self.get_lr_factor(self.current_step)
+        for param_group, base_lr in zip(self.optimizer.param_groups, self.base_lrs):
+            param_group["lr"] = base_lr * lr_factor
+        if self.verbose and self.current_step % 1000 == 0:
+            phase = self.get_phase()
+            print(
+                f"Step {self.current_step}: LR factor = {lr_factor:.6f}, Phase = {phase}"
+            )
+        self.current_step += 1
+    def get_phase(self) -> str:
+        """Get current training phase."""
+        if self.current_step < self.num_warmup_steps:
+            return "warmup"
+        elif self.current_step < self.num_warmup_steps + self.num_stable_steps:
+            return "stable"
+        else:
+            return "decay"
+    def state_dict(self) -> dict:
+        """Return scheduler state for checkpointing."""
+        return {
+            "current_step": self.current_step,
+            "base_lrs": self.base_lrs,
+        }
+    def load_state_dict(self, state_dict: dict):
+        """Load scheduler state from checkpoint."""
+        self.current_step = state_dict["current_step"]
+        self.base_lrs = state_dict["base_lrs"]

src/plotting/__init__.py ADDED Viewed

File without changes

src/plotting/gift_eval_utils.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import logging
+from typing import List, Optional, Tuple
+import numpy as np
+import pandas as pd
+from gluonts.model.forecast import QuantileForecast
+from src.data.frequency import parse_frequency
+from src.plotting.plot_multivariate_timeseries import (
+    plot_multivariate_timeseries,
+)
+logger = logging.getLogger(__name__)
+def _prepare_data_for_plotting(
+    input_data: dict, label_data: dict, max_context_length: int
+):
+    history_values = np.asarray(input_data["target"], dtype=np.float32)
+    future_values = np.asarray(label_data["target"], dtype=np.float32)
+    start_period = input_data["start"]
+    def ensure_time_first(arr: np.ndarray) -> np.ndarray:
+        if arr.ndim == 1:
+            return arr.reshape(-1, 1)
+        elif arr.ndim == 2:
+            if arr.shape[0] < arr.shape[1]:
+                return arr.T
+            return arr
+        else:
+            return arr.reshape(arr.shape[-1], -1).T
+    history_values = ensure_time_first(history_values)
+    future_values = ensure_time_first(future_values)
+    if max_context_length is not None and history_values.shape[0] > max_context_length:
+        history_values = history_values[-max_context_length:]
+    # Convert Period to Timestamp if needed
+    start_timestamp = (
+        start_period.to_timestamp()
+        if hasattr(start_period, "to_timestamp")
+        else pd.Timestamp(start_period)
+    )
+    return history_values, future_values, start_timestamp
+def _extract_quantile_predictions(
+    forecast,
+) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]:
+    def ensure_2d_time_first(arr):
+        if arr is None:
+            return None
+        arr = np.asarray(arr)
+        if arr.ndim == 1:
+            return arr.reshape(-1, 1)
+        elif arr.ndim == 2:
+            return arr
+        else:
+            return arr.reshape(arr.shape[0], -1)
+    if isinstance(forecast, QuantileForecast):
+        try:
+            median_pred = forecast.quantile(0.5)
+            try:
+                lower_bound = forecast.quantile(0.1)
+                upper_bound = forecast.quantile(0.9)
+            except (KeyError, ValueError):
+                lower_bound = None
+                upper_bound = None
+            median_pred = ensure_2d_time_first(median_pred)
+            lower_bound = ensure_2d_time_first(lower_bound)
+            upper_bound = ensure_2d_time_first(upper_bound)
+            return median_pred, lower_bound, upper_bound
+        except Exception:
+            try:
+                median_pred = forecast.quantile(0.5)
+                median_pred = ensure_2d_time_first(median_pred)
+                return median_pred, None, None
+            except Exception:
+                return None, None, None
+    else:
+        try:
+            samples = forecast.samples
+            if samples.ndim == 1:
+                median_pred = samples
+            elif samples.ndim == 2:
+                if samples.shape[0] == 1:
+                    median_pred = samples[0]
+                else:
+                    median_pred = np.median(samples, axis=0)
+            elif samples.ndim == 3:
+                median_pred = np.median(samples, axis=0)
+            else:
+                median_pred = samples[0] if len(samples) > 0 else samples
+            median_pred = ensure_2d_time_first(median_pred)
+            return median_pred, None, None
+        except Exception:
+            return None, None, None
+def _create_plot(
+    input_data: dict,
+    label_data: dict,
+    forecast,
+    dataset_full_name: str,
+    dataset_freq: str,
+    max_context_length: int,
+    title: Optional[str] = None,
+):
+    try:
+        history_values, future_values, start_timestamp = _prepare_data_for_plotting(
+            input_data, label_data, max_context_length
+        )
+        median_pred, lower_bound, upper_bound = _extract_quantile_predictions(forecast)
+        if median_pred is None:
+            logger.warning(f"Could not extract predictions for {dataset_full_name}")
+            return None
+        def ensure_compatible_shape(pred_arr, target_arr):
+            if pred_arr is None:
+                return None
+            pred_arr = np.asarray(pred_arr)
+            target_arr = np.asarray(target_arr)
+            if pred_arr.ndim == 1:
+                pred_arr = pred_arr.reshape(-1, 1)
+            if target_arr.ndim == 1:
+                target_arr = target_arr.reshape(-1, 1)
+            if pred_arr.shape != target_arr.shape:
+                if pred_arr.shape[0] == target_arr.shape[0]:
+                    if pred_arr.shape[1] == 1 and target_arr.shape[1] > 1:
+                        pred_arr = np.broadcast_to(pred_arr, target_arr.shape)
+                    elif pred_arr.shape[1] > 1 and target_arr.shape[1] == 1:
+                        pred_arr = pred_arr[:, :1]
+                elif pred_arr.shape[1] == target_arr.shape[1]:
+                    min_time = min(pred_arr.shape[0], target_arr.shape[0])
+                    pred_arr = pred_arr[:min_time]
+                else:
+                    if pred_arr.T.shape == target_arr.shape:
+                        pred_arr = pred_arr.T
+                    else:
+                        if pred_arr.size >= target_arr.shape[0]:
+                            pred_arr = pred_arr.flatten()[
+                                : target_arr.shape[0]
+                            ].reshape(-1, 1)
+                            if target_arr.shape[1] > 1:
+                                pred_arr = np.broadcast_to(pred_arr, target_arr.shape)
+            return pred_arr
+        median_pred = ensure_compatible_shape(median_pred, future_values)
+        lower_bound = ensure_compatible_shape(lower_bound, future_values)
+        upper_bound = ensure_compatible_shape(upper_bound, future_values)
+        title = title or f"GIFT-Eval: {dataset_full_name}"
+        frequency = parse_frequency(dataset_freq)
+        fig = plot_multivariate_timeseries(
+            history_values=history_values,
+            future_values=future_values,
+            predicted_values=median_pred,
+            lower_bound=lower_bound,
+            upper_bound=upper_bound,
+            start=start_timestamp,
+            frequency=frequency,
+            title=title,
+            show=False,
+        )
+        return fig
+    except Exception as e:
+        logger.warning(f"Failed to create plot for {dataset_full_name}: {e}")
+        return None
+def create_plots_for_dataset(
+    forecasts: List,
+    test_data,
+    dataset_metadata,
+    max_plots: int,
+    max_context_length: int,
+) -> List[Tuple[object, str]]:
+    input_data_list = list(test_data.input)
+    label_data_list = list(test_data.label)
+    num_plots = min(len(forecasts), max_plots)
+    logger.info(
+        f"Creating {num_plots} plots for {getattr(dataset_metadata, 'full_name', str(dataset_metadata))}"
+    )
+    figures_with_names: List[Tuple[object, str]] = []
+    for i in range(num_plots):
+        try:
+            forecast = forecasts[i]
+            input_data = input_data_list[i]
+            label_data = label_data_list[i]
+            title = (
+                f"GIFT-Eval: {dataset_metadata.full_name} - Window {i + 1}/{num_plots}"
+                if hasattr(dataset_metadata, "full_name")
+                else f"Window {i + 1}/{num_plots}"
+            )
+            fig = _create_plot(
+                input_data=input_data,
+                label_data=label_data,
+                forecast=forecast,
+                dataset_full_name=getattr(dataset_metadata, "full_name", "dataset"),
+                dataset_freq=getattr(dataset_metadata, "freq", "D"),
+                max_context_length=max_context_length,
+                title=title,
+            )
+            if fig is not None:
+                filename = (
+                    f"{getattr(dataset_metadata, 'freq', 'D')}_window_{i + 1:03d}.png"
+                )
+                figures_with_names.append((fig, filename))
+        except Exception as e:
+            logger.warning(f"Error creating plot for window {i + 1}: {e}")
+            continue
+    return figures_with_names

src/plotting/plot_timeseries.py ADDED Viewed

	@@ -0,0 +1,292 @@

+import logging
+from typing import List, Optional, Tuple, Union
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import torch
+import torchmetrics
+from matplotlib.figure import Figure
+from src.data.containers import BatchTimeSeriesContainer
+from src.data.frequency import Frequency
+logger = logging.getLogger(__name__)
+def calculate_smape(y_true: np.ndarray, y_pred: np.ndarray) -> float:
+    """Calculate Symmetric Mean Absolute Percentage Error (SMAPE)."""
+    pred_tensor = torch.from_numpy(y_pred).float()
+    true_tensor = torch.from_numpy(y_true).float()
+    return torchmetrics.SymmetricMeanAbsolutePercentageError()(
+        pred_tensor, true_tensor
+    ).item()
+def _create_date_ranges(
+    start: Optional[Union[np.datetime64, pd.Timestamp]],
+    frequency: Optional[Union[Frequency, str]],
+    history_length: int,
+    prediction_length: int,
+) -> Tuple[pd.DatetimeIndex, pd.DatetimeIndex]:
+    """Create date ranges for history and future periods."""
+    if start is not None and frequency is not None:
+        start_timestamp = pd.Timestamp(start)
+        pandas_freq = frequency.to_pandas_freq(for_date_range=True)
+        history_dates = pd.date_range(
+            start=start_timestamp, periods=history_length, freq=pandas_freq
+        )
+        if prediction_length > 0:
+            next_timestamp = history_dates[-1] + pd.tseries.frequencies.to_offset(
+                pandas_freq
+            )
+            future_dates = pd.date_range(
+                start=next_timestamp, periods=prediction_length, freq=pandas_freq
+            )
+        else:
+            future_dates = pd.DatetimeIndex([])
+    else:
+        # Fallback to default daily frequency
+        history_dates = pd.date_range(
+            end=pd.Timestamp.now(), periods=history_length, freq="D"
+        )
+        if prediction_length > 0:
+            future_dates = pd.date_range(
+                start=history_dates[-1] + pd.Timedelta(days=1),
+                periods=prediction_length,
+                freq="D",
+            )
+        else:
+            future_dates = pd.DatetimeIndex([])
+    return history_dates, future_dates
+def _plot_single_channel(
+    ax: plt.Axes,
+    channel_idx: int,
+    history_dates: pd.DatetimeIndex,
+    future_dates: pd.DatetimeIndex,
+    history_values: np.ndarray,
+    future_values: Optional[np.ndarray] = None,
+    predicted_values: Optional[np.ndarray] = None,
+    lower_bound: Optional[np.ndarray] = None,
+    upper_bound: Optional[np.ndarray] = None,
+) -> None:
+    """Plot a single channel's time series data."""
+    # Plot history
+    ax.plot(
+        history_dates, history_values[:, channel_idx], color="black", label="History"
+    )
+    # Plot ground truth future
+    if future_values is not None:
+        ax.plot(
+            future_dates,
+            future_values[:, channel_idx],
+            color="blue",
+            label="Ground Truth",
+        )
+    # Plot predictions
+    if predicted_values is not None:
+        ax.plot(
+            future_dates,
+            predicted_values[:, channel_idx],
+            color="orange",
+            linestyle="--",
+            label="Prediction (Median)",
+        )
+    # Plot uncertainty band
+    if lower_bound is not None and upper_bound is not None:
+        ax.fill_between(
+            future_dates,
+            lower_bound[:, channel_idx],
+            upper_bound[:, channel_idx],
+            color="orange",
+            alpha=0.2,
+            label="Uncertainty Band",
+        )
+    ax.set_title(f"Channel {channel_idx + 1}")
+    ax.grid(True, which="both", linestyle="--", linewidth=0.5)
+def _setup_figure(num_channels: int) -> Tuple[Figure, List[plt.Axes]]:
+    """Create and configure the matplotlib figure and axes."""
+    fig, axes = plt.subplots(
+        num_channels, 1, figsize=(15, 3 * num_channels), sharex=True
+    )
+    if num_channels == 1:
+        axes = [axes]
+    return fig, axes
+def _finalize_plot(
+    fig: Figure,
+    axes: List[plt.Axes],
+    title: Optional[str] = None,
+    smape_value: Optional[float] = None,
+    output_file: Optional[str] = None,
+    show: bool = True,
+) -> None:
+    """Add legend, title, and save/show the plot."""
+    # Create legend from first axis
+    handles, labels = axes[0].get_legend_handles_labels()
+    fig.legend(handles, labels, loc="upper right")
+    # Set title with optional SMAPE
+    if title:
+        if smape_value is not None:
+            title = f"{title} | SMAPE: {smape_value:.4f}"
+        fig.suptitle(title, fontsize=16)
+    # Adjust layout
+    plt.tight_layout(rect=[0, 0.03, 1, 0.95] if title else None)
+    # Save and/or show
+    if output_file:
+        plt.savefig(output_file, dpi=300)
+    if show:
+        plt.show()
+    else:
+        plt.close(fig)
+def plot_multivariate_timeseries(
+    history_values: np.ndarray,
+    future_values: Optional[np.ndarray] = None,
+    predicted_values: Optional[np.ndarray] = None,
+    start: Optional[Union[np.datetime64, pd.Timestamp]] = None,
+    frequency: Optional[Union[Frequency, str]] = None,
+    title: Optional[str] = None,
+    output_file: Optional[str] = None,
+    show: bool = True,
+    lower_bound: Optional[np.ndarray] = None,
+    upper_bound: Optional[np.ndarray] = None,
+) -> Figure:
+    """Plot a multivariate time series with history, future, predictions, and uncertainty bands."""
+    # Calculate SMAPE if both predicted and true values are available
+    smape_value = None
+    if predicted_values is not None and future_values is not None:
+        try:
+            smape_value = calculate_smape(future_values, predicted_values)
+        except Exception as e:
+            logger.warning(f"Failed to calculate SMAPE: {str(e)}")
+    # Extract dimensions
+    num_channels = history_values.shape[1]
+    history_length = history_values.shape[0]
+    prediction_length = (
+        predicted_values.shape[0]
+        if predicted_values is not None
+        else (future_values.shape[0] if future_values is not None else 0)
+    )
+    # Create date ranges
+    history_dates, future_dates = _create_date_ranges(
+        start, frequency, history_length, prediction_length
+    )
+    # Setup figure
+    fig, axes = _setup_figure(num_channels)
+    # Plot each channel
+    for i in range(num_channels):
+        _plot_single_channel(
+            ax=axes[i],
+            channel_idx=i,
+            history_dates=history_dates,
+            future_dates=future_dates,
+            history_values=history_values,
+            future_values=future_values,
+            predicted_values=predicted_values,
+            lower_bound=lower_bound,
+            upper_bound=upper_bound,
+        )
+    # Finalize plot
+    _finalize_plot(fig, axes, title, smape_value, output_file, show)
+    return fig
+def _extract_quantile_predictions(
+    predicted_values: np.ndarray,
+    model_quantiles: List[float],
+) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]:
+    """Extract median, lower, and upper bound predictions from quantile output."""
+    try:
+        median_idx = model_quantiles.index(0.5)
+        lower_idx = model_quantiles.index(0.1)
+        upper_idx = model_quantiles.index(0.9)
+        median_preds = predicted_values[..., median_idx]
+        lower_bound = predicted_values[..., lower_idx]
+        upper_bound = predicted_values[..., upper_idx]
+        return median_preds, lower_bound, upper_bound
+    except (ValueError, IndexError):
+        logger.warning(
+            "Could not find 0.1, 0.5, 0.9 quantiles for plotting. Using median of available quantiles."
+        )
+        median_preds = predicted_values[..., predicted_values.shape[-1] // 2]
+        return median_preds, None, None
+def plot_from_container(
+    batch: BatchTimeSeriesContainer,
+    sample_idx: int,
+    predicted_values: Optional[np.ndarray] = None,
+    model_quantiles: Optional[List[float]] = None,
+    title: Optional[str] = None,
+    output_file: Optional[str] = None,
+    show: bool = True,
+) -> Figure:
+    """Plot a single sample from a BatchTimeSeriesContainer with proper quantile handling."""
+    # Extract data for the specific sample
+    history_values = batch.history_values[sample_idx].cpu().numpy()
+    future_values = batch.future_values[sample_idx].cpu().numpy()
+    # Process predictions
+    if predicted_values is not None:
+        # Handle batch vs single sample predictions
+        if predicted_values.ndim >= 3 or (
+            predicted_values.ndim == 2
+            and predicted_values.shape[0] > future_values.shape[0]
+        ):
+            sample_preds = predicted_values[sample_idx]
+        else:
+            sample_preds = predicted_values
+        # Extract quantile information if available
+        if model_quantiles:
+            median_preds, lower_bound, upper_bound = _extract_quantile_predictions(
+                sample_preds, model_quantiles
+            )
+        else:
+            median_preds = sample_preds
+            lower_bound = None
+            upper_bound = None
+    else:
+        median_preds = None
+        lower_bound = None
+        upper_bound = None
+    # Create the plot
+    return plot_multivariate_timeseries(
+        history_values=history_values,
+        future_values=future_values,
+        predicted_values=median_preds,
+        start=batch.start[sample_idx],
+        frequency=batch.frequency[sample_idx],
+        title=title,
+        output_file=output_file,
+        show=show,
+        lower_bound=lower_bound,
+        upper_bound=upper_bound,
+    )

src/synthetic_generation/__init__.py ADDED Viewed

File without changes

src/synthetic_generation/abstract_classes.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional
+import numpy as np
+import torch
+from src.data.containers import TimeSeriesContainer
+from src.data.frequency import (
+    select_safe_random_frequency,
+    select_safe_start_date,
+)
+from src.synthetic_generation.generator_params import GeneratorParams
+class AbstractTimeSeriesGenerator(ABC):
+    """
+    Abstract base class for synthetic time series generators.
+    """
+    @abstractmethod
+    def generate_time_series(self, random_seed: Optional[int] = None) -> np.ndarray:
+        """
+        Generate synthetic time series data.
+        Parameters
+        ----------
+        random_seed : int, optional
+            Random seed for reproducibility.
+        Returns
+        -------
+        np.ndarray
+            Time series values of shape (length,) for univariate or
+            (length, num_channels) for multivariate time series.
+        """
+        pass
+class GeneratorWrapper:
+    """
+    Unified base class for all generator wrappers, using a GeneratorParams dataclass
+    for configuration. Provides parameter sampling, validation, and batch formatting utilities.
+    """
+    def __init__(self, params: GeneratorParams):
+        """
+        Initialize the GeneratorWrapper with a GeneratorParams dataclass.
+        Parameters
+        ----------
+        params : GeneratorParams
+            Dataclass instance containing all generator configuration parameters.
+        """
+        self.params = params
+        self._set_random_seeds(self.params.global_seed)
+    def _set_random_seeds(self, seed: int) -> None:
+        # For parameter sampling, we want diversity across batches even with similar seeds
+        # Use a hash of the generator class name to ensure different generators get different parameter sequences
+        param_seed = seed + hash(self.__class__.__name__) % 2**31
+        self.rng = np.random.default_rng(param_seed)
+        # Set global numpy and torch seeds for deterministic behavior in underlying generators
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+    def _sample_parameters(self, batch_size: int) -> Dict[str, Any]:
+        """
+        Sample parameters with total_length fixed and history_length calculated.
+        Returns
+        -------
+        Dict[str, Any]
+            Dictionary containing sampled parameter values where
+            history_length = total_length - future_length.
+        """
+        # Select a suitable frequency based on the total length
+        frequency = [
+            select_safe_random_frequency(self.params.length, self.rng)
+            for _ in range(batch_size)
+        ]
+        start = [
+            select_safe_start_date(self.params.length, frequency[i], self.rng)
+            for i in range(batch_size)
+        ]
+        return {
+            "frequency": frequency,
+            "start": start,
+        }
+    @abstractmethod
+    def generate_batch(
+        self, batch_size: int, seed: Optional[int] = None, **kwargs
+    ) -> TimeSeriesContainer:
+        raise NotImplementedError("Subclasses must implement generate_batch()")

src/synthetic_generation/anomalies/anomaly_generator.py ADDED Viewed

	@@ -0,0 +1,293 @@

+from typing import List, Optional, Set
+import numpy as np
+from src.synthetic_generation.abstract_classes import AbstractTimeSeriesGenerator
+from src.synthetic_generation.generator_params import (
+    AnomalyGeneratorParams,
+    AnomalyType,
+    MagnitudePattern,
+)
+class AnomalyGenerator(AbstractTimeSeriesGenerator):
+    """
+    Generator for synthetic time series with realistic spike anomalies.
+    Creates clean constant baseline signals with periodic spike patterns that
+    resemble real-world time series behavior, including clustering and magnitude patterns.
+    """
+    def __init__(self, params: AnomalyGeneratorParams):
+        """
+        Initialize the AnomalyGenerator.
+        Parameters
+        ----------
+        params : AnomalyGeneratorParams
+            Configuration parameters for anomaly generation.
+        """
+        self.params = params
+    def _determine_spike_direction(self) -> AnomalyType:
+        """
+        Determine if this series will have only up or only down spikes.
+        Returns
+        -------
+        AnomalyType
+            Either SPIKE_UP or SPIKE_DOWN for the entire series.
+        """
+        if np.random.random() < self.params.spike_direction_probability:
+            return AnomalyType.SPIKE_UP
+        else:
+            return AnomalyType.SPIKE_DOWN
+    def _generate_spike_positions(self) -> List[List[int]]:
+        """
+        Generate spike positions:
+        - Always create uniformly spaced single spikes (base schedule)
+        - With 25% probability: add clusters (1-3 extra spikes) near a fraction of base spikes
+        - With 25% probability: add single random spikes across the series
+        Returns
+        -------
+        List[List[int]]
+            List of spike events, where each event is a list of positions
+            (single spike = [pos], cluster = [pos, pos+offset, ...]).
+        """
+        # Base uniform schedule (no jitter/variance)
+        base_period = np.random.randint(*self.params.base_period_range)
+        start_position = base_period // 2
+        base_positions = list(range(start_position, self.params.length, base_period))
+        # Start with single-spike events at base positions
+        spike_events: List[List[int]] = [[pos] for pos in base_positions]
+        if not base_positions:
+            return spike_events
+        # Decide series type
+        series_draw = np.random.random()
+        # 25%: augment with clusters near some base spikes
+        if series_draw < self.params.cluster_series_probability:
+            num_base_events = len(base_positions)
+            num_to_augment = max(
+                1, int(round(self.params.cluster_event_fraction * num_base_events))
+            )
+            num_to_augment = min(num_to_augment, num_base_events)
+            chosen_indices = (
+                np.random.choice(num_base_events, size=num_to_augment, replace=False)
+                if num_to_augment > 0
+                else np.array([], dtype=int)
+            )
+            for idx in chosen_indices:
+                base_pos = base_positions[int(idx)]
+                # Number of additional spikes (1..3) per selected event
+                num_additional = np.random.randint(
+                    *self.params.cluster_additional_spikes_range
+                )
+                if num_additional <= 0:
+                    continue
+                # Draw offsets around base spike and exclude zero to avoid duplicates
+                offsets = np.random.randint(
+                    self.params.cluster_offset_range[0],
+                    self.params.cluster_offset_range[1],
+                    size=num_additional,
+                )
+                offsets = [int(off) for off in offsets if off != 0]
+                cluster_positions: Set[int] = set([base_pos])
+                for off in offsets:
+                    pos = base_pos + off
+                    if 0 <= pos < self.params.length:
+                        cluster_positions.add(pos)
+                spike_events[int(idx)] = sorted(cluster_positions)
+        # Next 25%: add random single spikes across the series
+        elif series_draw < (
+            self.params.cluster_series_probability
+            + self.params.random_series_probability
+        ):
+            num_base_events = len(base_positions)
+            num_random = int(
+                round(self.params.random_spike_fraction_of_base * num_base_events)
+            )
+            if num_random > 0:
+                all_indices = np.arange(self.params.length)
+                base_array = np.array(base_positions, dtype=int)
+                candidates = np.setdiff1d(all_indices, base_array, assume_unique=False)
+                if candidates.size > 0:
+                    choose_n = min(num_random, candidates.size)
+                    rand_positions = np.random.choice(
+                        candidates, size=choose_n, replace=False
+                    )
+                    for pos in rand_positions:
+                        spike_events.append([int(pos)])
+        # Else: 50% clean series (uniform singles only)
+        return spike_events
+    def _generate_spike_magnitudes(self, total_spikes: int) -> np.ndarray:
+        """
+        Generate spike magnitudes based on the configured pattern.
+        Parameters
+        ----------
+        total_spikes : int
+            Total number of individual spikes to generate magnitudes for.
+        Returns
+        -------
+        np.ndarray
+            Array of spike magnitudes.
+        """
+        base_magnitude = np.random.uniform(*self.params.base_magnitude_range)
+        magnitudes = np.zeros(total_spikes)
+        if self.params.magnitude_pattern == MagnitudePattern.CONSTANT:
+            # All spikes have similar magnitude with small noise
+            magnitudes = np.full(total_spikes, base_magnitude)
+            noise = np.random.normal(
+                0, self.params.magnitude_noise * base_magnitude, total_spikes
+            )
+            magnitudes += noise
+        elif self.params.magnitude_pattern == MagnitudePattern.INCREASING:
+            # Magnitude increases over time
+            trend = np.linspace(
+                0,
+                self.params.magnitude_trend_strength * base_magnitude * total_spikes,
+                total_spikes,
+            )
+            magnitudes = base_magnitude + trend
+        elif self.params.magnitude_pattern == MagnitudePattern.DECREASING:
+            # Magnitude decreases over time
+            trend = np.linspace(
+                0,
+                -self.params.magnitude_trend_strength * base_magnitude * total_spikes,
+                total_spikes,
+            )
+            magnitudes = base_magnitude + trend
+        elif self.params.magnitude_pattern == MagnitudePattern.CYCLICAL:
+            # Cyclical magnitude pattern
+            cycle_length = int(total_spikes * self.params.cyclical_period_ratio)
+            if cycle_length == 0:
+                cycle_length = max(1, total_spikes // 4)
+            phase = np.linspace(
+                0, 2 * np.pi * total_spikes / cycle_length, total_spikes
+            )
+            cyclical_component = 0.3 * base_magnitude * np.sin(phase)
+            magnitudes = base_magnitude + cyclical_component
+        elif self.params.magnitude_pattern == MagnitudePattern.RANDOM_BOUNDED:
+            # Random with correlation between consecutive spikes
+            magnitudes[0] = base_magnitude
+            for i in range(1, total_spikes):
+                # Correlated random walk
+                prev_magnitude = magnitudes[i - 1]
+                random_component = np.random.normal(0, 0.2 * base_magnitude)
+                magnitudes[i] = (
+                    self.params.magnitude_correlation * prev_magnitude
+                    + (1 - self.params.magnitude_correlation) * base_magnitude
+                    + random_component
+                )
+        # Add noise to all patterns
+        noise = np.random.normal(
+            0, self.params.magnitude_noise * base_magnitude, total_spikes
+        )
+        magnitudes += noise
+        # Ensure magnitudes are positive and within reasonable bounds
+        min_magnitude = 0.1 * base_magnitude
+        max_magnitude = 3.0 * base_magnitude
+        magnitudes = np.clip(magnitudes, min_magnitude, max_magnitude)
+        return magnitudes
+    def _inject_spike_anomalies(
+        self, signal: np.ndarray, spike_direction: AnomalyType
+    ) -> np.ndarray:
+        """
+        Inject spike anomalies into the clean signal using realistic patterns.
+        Parameters
+        ----------
+        signal : np.ndarray
+            Clean baseline signal to inject spikes into.
+        spike_direction : AnomalyType
+            Direction of spikes for this series (all up or all down).
+        Returns
+        -------
+        np.ndarray
+            Signal with injected spike anomalies.
+        """
+        anomalous_signal = signal.copy()
+        # Generate spike positions based on pattern
+        spike_events = self._generate_spike_positions()
+        # Flatten spike events to get total number of individual spikes
+        all_positions = []
+        for event in spike_events:
+            all_positions.extend(event)
+        if not all_positions:
+            return anomalous_signal
+        # Generate magnitudes for all spikes
+        magnitudes = self._generate_spike_magnitudes(len(all_positions))
+        # Inject spikes
+        for i, position in enumerate(all_positions):
+            if position < len(anomalous_signal):
+                magnitude = magnitudes[i]
+                if spike_direction == AnomalyType.SPIKE_UP:
+                    anomalous_signal[position] += magnitude
+                else:  # SPIKE_DOWN
+                    anomalous_signal[position] -= magnitude
+        return anomalous_signal
+    def generate_time_series(self, random_seed: Optional[int] = None) -> np.ndarray:
+        """
+        Generate a synthetic time series with realistic spike anomalies.
+        Parameters
+        ----------
+        random_seed : int, optional
+            Random seed for reproducibility.
+        Returns
+        -------
+        np.ndarray
+            Generated time series of shape (length,) - clean baseline with periodic spikes.
+        """
+        if random_seed is not None:
+            np.random.seed(random_seed)
+        # Generate clean baseline signal (constant level)
+        baseline_level = np.random.uniform(*self.params.base_level_range)
+        signal = np.full(self.params.length, baseline_level)
+        # Determine spike direction for this series (all up or all down)
+        spike_direction = self._determine_spike_direction()
+        # Inject spike anomalies with realistic patterns
+        anomalous_signal = self._inject_spike_anomalies(signal, spike_direction)
+        return anomalous_signal

src/synthetic_generation/anomalies/anomaly_generator_wrapper.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from typing import Optional
+import numpy as np
+from src.data.containers import TimeSeriesContainer
+from src.synthetic_generation.abstract_classes import GeneratorWrapper
+from src.synthetic_generation.anomalies.anomaly_generator import AnomalyGenerator
+from src.synthetic_generation.generator_params import AnomalyGeneratorParams
+class AnomalyGeneratorWrapper(GeneratorWrapper):
+    """
+    Wrapper for AnomalyGenerator that handles batch generation and formatting.
+    """
+    def __init__(self, params: AnomalyGeneratorParams):
+        """
+        Initialize the AnomalyGeneratorWrapper.
+        Parameters
+        ----------
+        params : AnomalyGeneratorParams
+            Parameters for the anomaly generator.
+        """
+        super().__init__(params)
+        self.generator = AnomalyGenerator(params)
+    def generate_batch(
+        self, batch_size: int, seed: Optional[int] = None
+    ) -> TimeSeriesContainer:
+        """
+        Generate a batch of anomaly time series.
+        Parameters
+        ----------
+        batch_size : int
+            Number of time series to generate.
+        seed : int, optional
+            Random seed for reproducibility.
+        Returns
+        -------
+        TimeSeriesContainer
+            TimeSeriesContainer containing the generated time series.
+        """
+        if seed is not None:
+            self._set_random_seeds(seed)
+        # Sample parameters for the batch
+        sampled_params = self._sample_parameters(batch_size)
+        # Generate time series
+        values = []
+        for i in range(batch_size):
+            # Use a different seed for each series in the batch
+            series_seed = (seed + i) if seed is not None else None
+            series = self.generator.generate_time_series(series_seed)
+            values.append(series)
+        return TimeSeriesContainer(
+            values=np.array(values),
+            start=sampled_params["start"],
+            frequency=sampled_params["frequency"],
+        )

src/synthetic_generation/audio_generators/financial_volatility_generator.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from typing import Optional
+import numpy as np
+from pyo import LFO, BrownNoise, Follower, Metro, Mix, Sine, TrigExpseg
+from src.synthetic_generation.abstract_classes import AbstractTimeSeriesGenerator
+from src.synthetic_generation.audio_generators.utils import (
+    normalize_waveform,
+    run_offline_pyo,
+)
+class FinancialVolatilityAudioGenerator(AbstractTimeSeriesGenerator):
+    """
+    Generate synthetic univariate time series that mimics financial market
+    behavior with volatility clustering and occasional jumps.
+    """
+    def __init__(
+        self,
+        length: int,
+        server_duration: float,
+        sample_rate: int,
+        normalize_output: bool,
+        # Trend LFO
+        trend_lfo_freq_range: tuple[float, float],
+        trend_lfo_mul_range: tuple[float, float],
+        # Volatility clustering
+        volatility_carrier_freq_range: tuple[float, float],
+        follower_freq_range: tuple[float, float],
+        volatility_range: tuple[float, float],
+        # Jumps
+        jump_metro_time_range: tuple[float, float],
+        jump_env_start_range: tuple[float, float],
+        jump_env_decay_time_range: tuple[float, float],
+        jump_freq_range: tuple[float, float],
+        jump_direction_up_probability: float,
+        random_seed: Optional[int] = None,
+    ):
+        self.length = length
+        self.server_duration = server_duration
+        self.sample_rate = sample_rate
+        self.normalize_output = normalize_output
+        self.trend_lfo_freq_range = trend_lfo_freq_range
+        self.trend_lfo_mul_range = trend_lfo_mul_range
+        self.volatility_carrier_freq_range = volatility_carrier_freq_range
+        self.follower_freq_range = follower_freq_range
+        self.volatility_range = volatility_range
+        self.jump_metro_time_range = jump_metro_time_range
+        self.jump_env_start_range = jump_env_start_range
+        self.jump_env_decay_time_range = jump_env_decay_time_range
+        self.jump_freq_range = jump_freq_range
+        self.jump_direction_up_probability = jump_direction_up_probability
+        self.rng = np.random.default_rng(random_seed)
+    def _build_synth(self):
+        # Trend
+        trend_freq = self.rng.uniform(*self.trend_lfo_freq_range)
+        trend_mul = self.rng.uniform(*self.trend_lfo_mul_range)
+        trend = LFO(freq=trend_freq, type=0, mul=trend_mul)
+        # Volatility clustering
+        carrier_freq = self.rng.uniform(*self.volatility_carrier_freq_range)
+        follower_freq = self.rng.uniform(*self.follower_freq_range)
+        volatility_min, volatility_max = self.volatility_range
+        volatility_osc = Sine(freq=carrier_freq)
+        volatility = Follower(volatility_osc, freq=follower_freq).range(
+            volatility_min, volatility_max
+        )
+        market_noise = BrownNoise(mul=volatility)
+        # Jumps
+        jump_time = self.rng.uniform(*self.jump_metro_time_range)
+        jump_env_start = self.rng.uniform(*self.jump_env_start_range)
+        jump_env_decay = self.rng.uniform(*self.jump_env_decay_time_range)
+        jump_freq = self.rng.uniform(*self.jump_freq_range)
+        direction = (
+            1.0 if self.rng.random() < self.jump_direction_up_probability else -1.0
+        )
+        jump_trigger = Metro(time=jump_time).play()
+        jump_env = TrigExpseg(
+            jump_trigger, list=[(0.0, jump_env_start), (jump_env_decay, 0.0)]
+        )
+        jumps = Sine(freq=jump_freq, mul=jump_env * direction)
+        return Mix([trend, market_noise, jumps], voices=1)
+    def generate_time_series(self, random_seed: Optional[int] = None) -> np.ndarray:
+        if random_seed is not None:
+            self.rng = np.random.default_rng(random_seed)
+        waveform = run_offline_pyo(
+            synth_builder=self._build_synth,
+            server_duration=self.server_duration,
+            sample_rate=self.sample_rate,
+            length=self.length,
+        )
+        if self.normalize_output:
+            waveform = normalize_waveform(waveform)
+        return waveform

src/synthetic_generation/audio_generators/financial_volatility_wrapper.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from typing import Any, Dict, Optional
+import numpy as np
+from src.data.containers import TimeSeriesContainer
+from src.synthetic_generation.abstract_classes import GeneratorWrapper
+from src.synthetic_generation.audio_generators.financial_volatility_generator import (
+    FinancialVolatilityAudioGenerator,
+)
+from src.synthetic_generation.generator_params import FinancialVolatilityAudioParams
+class FinancialVolatilityAudioWrapper(GeneratorWrapper):
+    def __init__(self, params: FinancialVolatilityAudioParams):
+        super().__init__(params)
+        self.params: FinancialVolatilityAudioParams = params
+    def _sample_parameters(self, batch_size: int) -> Dict[str, Any]:
+        params = super()._sample_parameters(batch_size)
+        params.update(
+            {
+                "length": self.params.length,
+                "server_duration": self.params.server_duration,
+                "sample_rate": self.params.sample_rate,
+                "normalize_output": self.params.normalize_output,
+                # Trend LFO
+                "trend_lfo_freq_range": self.params.trend_lfo_freq_range,
+                "trend_lfo_mul_range": self.params.trend_lfo_mul_range,
+                # Volatility clustering
+                "volatility_carrier_freq_range": self.params.volatility_carrier_freq_range,
+                "follower_freq_range": self.params.follower_freq_range,
+                "volatility_range": self.params.volatility_range,
+                # Jumps
+                "jump_metro_time_range": self.params.jump_metro_time_range,
+                "jump_env_start_range": self.params.jump_env_start_range,
+                "jump_env_decay_time_range": self.params.jump_env_decay_time_range,
+                "jump_freq_range": self.params.jump_freq_range,
+                "jump_direction_up_probability": self.params.jump_direction_up_probability,
+            }
+        )
+        return params
+    def generate_batch(
+        self,
+        batch_size: int,
+        seed: Optional[int] = None,
+        params: Optional[Dict[str, Any]] = None,
+    ) -> TimeSeriesContainer:
+        if seed is not None:
+            self._set_random_seeds(seed)
+        if params is None:
+            params = self._sample_parameters(batch_size)
+        generator = FinancialVolatilityAudioGenerator(
+            length=params["length"],
+            server_duration=params["server_duration"],
+            sample_rate=params["sample_rate"],
+            normalize_output=params["normalize_output"],
+            trend_lfo_freq_range=params["trend_lfo_freq_range"],
+            trend_lfo_mul_range=params["trend_lfo_mul_range"],
+            volatility_carrier_freq_range=params["volatility_carrier_freq_range"],
+            follower_freq_range=params["follower_freq_range"],
+            volatility_range=params["volatility_range"],
+            jump_metro_time_range=params["jump_metro_time_range"],
+            jump_env_start_range=params["jump_env_start_range"],
+            jump_env_decay_time_range=params["jump_env_decay_time_range"],
+            jump_freq_range=params["jump_freq_range"],
+            jump_direction_up_probability=params["jump_direction_up_probability"],
+            random_seed=seed,
+        )
+        def _derive_series_seed(base_seed: int, index: int) -> int:
+            # Mix base seed with index and class hash to decorrelate adjacent seeds
+            mixed = (
+                (base_seed & 0x7FFFFFFF)
+                ^ ((index * 0x9E3779B1) & 0x7FFFFFFF)
+                ^ (hash(self.__class__.__name__) & 0x7FFFFFFF)
+            )
+            return int(mixed)
+        batch_values = []
+        for i in range(batch_size):
+            series_seed = None if seed is None else _derive_series_seed(seed, i)
+            values = generator.generate_time_series(random_seed=series_seed)
+            batch_values.append(values)
+        return TimeSeriesContainer(
+            values=np.array(batch_values),
+            start=params["start"],
+            frequency=params["frequency"],
+        )

src/synthetic_generation/audio_generators/multi_scale_fractal_generator.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from typing import Optional
+import numpy as np
+from pyo import Biquad, BrownNoise, Mix
+from src.synthetic_generation.abstract_classes import AbstractTimeSeriesGenerator
+from src.synthetic_generation.audio_generators.utils import (
+    normalize_waveform,
+    run_offline_pyo,
+)
+class MultiScaleFractalAudioGenerator(AbstractTimeSeriesGenerator):
+    """
+    Generate multi-scale fractal-like patterns by filtering noise at
+    multiple frequency bands with varying Q and attenuation per scale.
+    """
+    def __init__(
+        self,
+        length: int,
+        server_duration: float,
+        sample_rate: int,
+        normalize_output: bool,
+        base_noise_mul_range: tuple[float, float],
+        num_scales_range: tuple[int, int],
+        scale_freq_base_range: tuple[float, float],
+        q_factor_range: tuple[float, float],
+        per_scale_attenuation_range: tuple[float, float],
+        random_seed: Optional[int] = None,
+    ):
+        self.length = length
+        self.server_duration = server_duration
+        self.sample_rate = sample_rate
+        self.normalize_output = normalize_output
+        self.base_noise_mul_range = base_noise_mul_range
+        self.num_scales_range = num_scales_range
+        self.scale_freq_base_range = scale_freq_base_range
+        self.q_factor_range = q_factor_range
+        self.per_scale_attenuation_range = per_scale_attenuation_range
+        self.rng = np.random.default_rng(random_seed)
+    def _build_synth(self):
+        base_mul = self.rng.uniform(*self.base_noise_mul_range)
+        base = BrownNoise(mul=base_mul)
+        num_scales = int(
+            self.rng.integers(self.num_scales_range[0], self.num_scales_range[1] + 1)
+        )
+        scales = []
+        for i in range(num_scales):
+            scale_freq = self.rng.uniform(*self.scale_freq_base_range) * (0.5**i)
+            q_factor = self.rng.uniform(*self.q_factor_range)
+            per_scale_att = self.rng.uniform(*self.per_scale_attenuation_range)
+            filtered = Biquad(base, freq=scale_freq, q=q_factor, type=0)
+            scales.append(filtered * (per_scale_att**i))
+        return Mix(scales, voices=1)
+    def generate_time_series(self, random_seed: Optional[int] = None) -> np.ndarray:
+        if random_seed is not None:
+            self.rng = np.random.default_rng(random_seed)
+        waveform = run_offline_pyo(
+            synth_builder=self._build_synth,
+            server_duration=self.server_duration,
+            sample_rate=self.sample_rate,
+            length=self.length,
+        )
+        if self.normalize_output:
+            waveform = normalize_waveform(waveform)
+        return waveform

src/synthetic_generation/audio_generators/multi_scale_fractal_wrapper.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from typing import Any, Dict, Optional
+import numpy as np
+from src.data.containers import TimeSeriesContainer
+from src.synthetic_generation.abstract_classes import GeneratorWrapper
+from src.synthetic_generation.audio_generators.multi_scale_fractal_generator import (
+    MultiScaleFractalAudioGenerator,
+)
+from src.synthetic_generation.generator_params import MultiScaleFractalAudioParams
+class MultiScaleFractalAudioWrapper(GeneratorWrapper):
+    def __init__(self, params: MultiScaleFractalAudioParams):
+        super().__init__(params)
+        self.params: MultiScaleFractalAudioParams = params
+    def _sample_parameters(self, batch_size: int) -> Dict[str, Any]:
+        params = super()._sample_parameters(batch_size)
+        params.update(
+            {
+                "length": self.params.length,
+                "server_duration": self.params.server_duration,
+                "sample_rate": self.params.sample_rate,
+                "normalize_output": self.params.normalize_output,
+                "base_noise_mul_range": self.params.base_noise_mul_range,
+                "num_scales_range": self.params.num_scales_range,
+                "scale_freq_base_range": self.params.scale_freq_base_range,
+                "q_factor_range": self.params.q_factor_range,
+                "per_scale_attenuation_range": self.params.per_scale_attenuation_range,
+            }
+        )
+        return params
+    def generate_batch(
+        self,
+        batch_size: int,
+        seed: Optional[int] = None,
+        params: Optional[Dict[str, Any]] = None,
+    ) -> TimeSeriesContainer:
+        if seed is not None:
+            self._set_random_seeds(seed)
+        if params is None:
+            params = self._sample_parameters(batch_size)
+        generator = MultiScaleFractalAudioGenerator(
+            length=params["length"],
+            server_duration=params["server_duration"],
+            sample_rate=params["sample_rate"],
+            normalize_output=params["normalize_output"],
+            base_noise_mul_range=params["base_noise_mul_range"],
+            num_scales_range=params["num_scales_range"],
+            scale_freq_base_range=params["scale_freq_base_range"],
+            q_factor_range=params["q_factor_range"],
+            per_scale_attenuation_range=params["per_scale_attenuation_range"],
+            random_seed=seed,
+        )
+        def _derive_series_seed(base_seed: int, index: int) -> int:
+            mixed = (
+                (base_seed & 0x7FFFFFFF)
+                ^ ((index * 0x9E3779B1) & 0x7FFFFFFF)
+                ^ (hash(self.__class__.__name__) & 0x7FFFFFFF)
+            )
+            return int(mixed)
+        batch_values = []
+        for i in range(batch_size):
+            series_seed = None if seed is None else _derive_series_seed(seed, i)
+            values = generator.generate_time_series(random_seed=series_seed)
+            batch_values.append(values)
+        return TimeSeriesContainer(
+            values=np.array(batch_values),
+            start=params["start"],
+            frequency=params["frequency"],
+        )

src/synthetic_generation/audio_generators/network_topology_generator.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from typing import Optional, Tuple
+import numpy as np
+from pyo import LFO, BrownNoise, Metro, Mix, Noise, TrigExpseg
+from src.synthetic_generation.abstract_classes import AbstractTimeSeriesGenerator
+from src.synthetic_generation.audio_generators.utils import (
+    normalize_waveform,
+    run_offline_pyo,
+)
+class NetworkTopologyAudioGenerator(AbstractTimeSeriesGenerator):
+    """
+    Simulate network traffic with base flow, packet bursts, periodic congestion,
+    protocol overhead, and DDoS-like attacks. Parameters are sampled per series.
+    """
+    def __init__(
+        self,
+        length: int,
+        server_duration: float,
+        sample_rate: int,
+        normalize_output: bool,
+        traffic_lfo_freq_range: tuple[float, float],
+        traffic_lfo_mul_range: tuple[float, float],
+        burst_rate_hz_range: tuple[float, float],
+        burst_duration_range: tuple[float, float],
+        burst_mul_range: tuple[float, float],
+        congestion_period_range: tuple[float, float],
+        congestion_depth_range: tuple[float, float],
+        congestion_release_time_range: tuple[float, float],
+        overhead_lfo_freq_range: tuple[float, float],
+        overhead_mul_range: tuple[float, float],
+        attack_period_range: tuple[float, float],
+        attack_env_points: Tuple[
+            Tuple[float, float], Tuple[float, float], Tuple[float, float]
+        ],
+        attack_mul_range: tuple[float, float],
+        random_seed: Optional[int] = None,
+    ):
+        self.length = length
+        self.server_duration = server_duration
+        self.sample_rate = sample_rate
+        self.normalize_output = normalize_output
+        self.traffic_lfo_freq_range = traffic_lfo_freq_range
+        self.traffic_lfo_mul_range = traffic_lfo_mul_range
+        self.burst_rate_hz_range = burst_rate_hz_range
+        self.burst_duration_range = burst_duration_range
+        self.burst_mul_range = burst_mul_range
+        self.congestion_period_range = congestion_period_range
+        self.congestion_depth_range = congestion_depth_range
+        self.congestion_release_time_range = congestion_release_time_range
+        self.overhead_lfo_freq_range = overhead_lfo_freq_range
+        self.overhead_mul_range = overhead_mul_range
+        self.attack_period_range = attack_period_range
+        self.attack_env_points = attack_env_points
+        self.attack_mul_range = attack_mul_range
+        self.rng = np.random.default_rng(random_seed)
+    def _build_synth(self):
+        # Base traffic flow
+        traffic_freq = self.rng.uniform(*self.traffic_lfo_freq_range)
+        traffic_mul = self.rng.uniform(*self.traffic_lfo_mul_range)
+        traffic_base = LFO(freq=traffic_freq, type=0, mul=traffic_mul)
+        # Packet bursts
+        burst_rate = self.rng.uniform(*self.burst_rate_hz_range)
+        burst_trigger = Metro(time=1.0 / burst_rate).play()
+        burst_duration = self.rng.uniform(*self.burst_duration_range)
+        burst_env = TrigExpseg(burst_trigger, list=[(0.0, 0.8), (burst_duration, 0.0)])
+        burst_mul = self.rng.uniform(*self.burst_mul_range)
+        bursts = Noise(mul=burst_env * burst_mul)
+        # Periodic congestion (negative amplitude dip)
+        congestion_period = self.rng.uniform(*self.congestion_period_range)
+        congestion_trigger = Metro(time=congestion_period).play()
+        congestion_depth = self.rng.uniform(*self.congestion_depth_range)  # negative
+        congestion_release = self.rng.uniform(*self.congestion_release_time_range)
+        congestion_env = TrigExpseg(
+            congestion_trigger,
+            list=[(0.0, congestion_depth), (congestion_release, 0.0)],
+        )
+        # Protocol overhead
+        overhead_freq = self.rng.uniform(*self.overhead_lfo_freq_range)
+        overhead_mul = self.rng.uniform(*self.overhead_mul_range)
+        overhead = LFO(freq=overhead_freq, type=1, mul=overhead_mul)
+        # DDoS-like attacks
+        attack_period = self.rng.uniform(*self.attack_period_range)
+        attack_trigger = Metro(time=attack_period).play()
+        attack_env = TrigExpseg(attack_trigger, list=list(self.attack_env_points))
+        attack_mul = self.rng.uniform(*self.attack_mul_range)
+        attacks = BrownNoise(mul=attack_env * attack_mul)
+        return Mix([traffic_base, bursts, congestion_env, overhead, attacks], voices=1)
+    def generate_time_series(self, random_seed: Optional[int] = None) -> np.ndarray:
+        if random_seed is not None:
+            self.rng = np.random.default_rng(random_seed)
+        waveform = run_offline_pyo(
+            synth_builder=self._build_synth,
+            server_duration=self.server_duration,
+            sample_rate=self.sample_rate,
+            length=self.length,
+        )
+        if self.normalize_output:
+            waveform = normalize_waveform(waveform)
+        return waveform

src/synthetic_generation/audio_generators/network_topology_wrapper.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from typing import Any, Dict, Optional
+import numpy as np
+from src.data.containers import TimeSeriesContainer
+from src.synthetic_generation.abstract_classes import GeneratorWrapper
+from src.synthetic_generation.audio_generators.network_topology_generator import (
+    NetworkTopologyAudioGenerator,
+)
+from src.synthetic_generation.generator_params import NetworkTopologyAudioParams
+class NetworkTopologyAudioWrapper(GeneratorWrapper):
+    def __init__(self, params: NetworkTopologyAudioParams):
+        super().__init__(params)
+        self.params: NetworkTopologyAudioParams = params
+    def _sample_parameters(self, batch_size: int) -> Dict[str, Any]:
+        params = super()._sample_parameters(batch_size)
+        params.update(
+            {
+                "length": self.params.length,
+                "server_duration": self.params.server_duration,
+                "sample_rate": self.params.sample_rate,
+                "normalize_output": self.params.normalize_output,
+                "traffic_lfo_freq_range": self.params.traffic_lfo_freq_range,
+                "traffic_lfo_mul_range": self.params.traffic_lfo_mul_range,
+                "burst_rate_hz_range": self.params.burst_rate_hz_range,
+                "burst_duration_range": self.params.burst_duration_range,
+                "burst_mul_range": self.params.burst_mul_range,
+                "congestion_period_range": self.params.congestion_period_range,
+                "congestion_depth_range": self.params.congestion_depth_range,
+                "congestion_release_time_range": self.params.congestion_release_time_range,
+                "overhead_lfo_freq_range": self.params.overhead_lfo_freq_range,
+                "overhead_mul_range": self.params.overhead_mul_range,
+                "attack_period_range": self.params.attack_period_range,
+                "attack_env_points": self.params.attack_env_points,
+                "attack_mul_range": self.params.attack_mul_range,
+            }
+        )
+        return params
+    def generate_batch(
+        self,
+        batch_size: int,
+        seed: Optional[int] = None,
+        params: Optional[Dict[str, Any]] = None,
+    ) -> TimeSeriesContainer:
+        if seed is not None:
+            self._set_random_seeds(seed)
+        if params is None:
+            params = self._sample_parameters(batch_size)
+        generator = NetworkTopologyAudioGenerator(
+            length=params["length"],
+            server_duration=params["server_duration"],
+            sample_rate=params["sample_rate"],
+            normalize_output=params["normalize_output"],
+            traffic_lfo_freq_range=params["traffic_lfo_freq_range"],
+            traffic_lfo_mul_range=params["traffic_lfo_mul_range"],
+            burst_rate_hz_range=params["burst_rate_hz_range"],
+            burst_duration_range=params["burst_duration_range"],
+            burst_mul_range=params["burst_mul_range"],
+            congestion_period_range=params["congestion_period_range"],
+            congestion_depth_range=params["congestion_depth_range"],
+            congestion_release_time_range=params["congestion_release_time_range"],
+            overhead_lfo_freq_range=params["overhead_lfo_freq_range"],
+            overhead_mul_range=params["overhead_mul_range"],
+            attack_period_range=params["attack_period_range"],
+            attack_env_points=params["attack_env_points"],
+            attack_mul_range=params["attack_mul_range"],
+            random_seed=seed,
+        )
+        def _derive_series_seed(base_seed: int, index: int) -> int:
+            mixed = (
+                (base_seed & 0x7FFFFFFF)
+                ^ ((index * 0x9E3779B1) & 0x7FFFFFFF)
+                ^ (hash(self.__class__.__name__) & 0x7FFFFFFF)
+            )
+            return int(mixed)
+        batch_values = []
+        for i in range(batch_size):
+            series_seed = None if seed is None else _derive_series_seed(seed, i)
+            values = generator.generate_time_series(random_seed=series_seed)
+            batch_values.append(values)
+        return TimeSeriesContainer(
+            values=np.array(batch_values),
+            start=params["start"],
+            frequency=params["frequency"],
+        )

src/synthetic_generation/audio_generators/stochastic_rhythm_generator.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from typing import Optional
+import numpy as np
+from pyo import Metro, Mix, Sine, TrigExpseg
+from src.synthetic_generation.abstract_classes import AbstractTimeSeriesGenerator
+from src.synthetic_generation.audio_generators.utils import (
+    normalize_waveform,
+    run_offline_pyo,
+)
+class StochasticRhythmAudioGenerator(AbstractTimeSeriesGenerator):
+    """
+    Generate rhythmic patterns with layered triggers, per-layer envelopes
+    and tones. Parameters are sampled per series for diversity.
+    """
+    def __init__(
+        self,
+        length: int,
+        server_duration: float,
+        sample_rate: int,
+        normalize_output: bool,
+        base_tempo_hz_range: tuple[float, float],
+        num_layers_range: tuple[int, int],
+        subdivisions: tuple[int, ...],
+        attack_range: tuple[float, float],
+        decay_range: tuple[float, float],
+        tone_freq_range: tuple[float, float],
+        tone_mul_range: tuple[float, float],
+        random_seed: Optional[int] = None,
+    ):
+        self.length = length
+        self.server_duration = server_duration
+        self.sample_rate = sample_rate
+        self.normalize_output = normalize_output
+        self.base_tempo_hz_range = base_tempo_hz_range
+        self.num_layers_range = num_layers_range
+        self.subdivisions = subdivisions
+        self.attack_range = attack_range
+        self.decay_range = decay_range
+        self.tone_freq_range = tone_freq_range
+        self.tone_mul_range = tone_mul_range
+        self.rng = np.random.default_rng(random_seed)
+    def _build_synth(self):
+        base_tempo = self.rng.uniform(*self.base_tempo_hz_range)
+        num_layers = int(
+            self.rng.integers(self.num_layers_range[0], self.num_layers_range[1] + 1)
+        )
+        layers = []
+        for _ in range(num_layers):
+            subdivision = self.subdivisions[
+                int(self.rng.integers(0, len(self.subdivisions)))
+            ]
+            rhythm_freq = base_tempo * subdivision
+            trigger = Metro(time=1.0 / rhythm_freq).play()
+            attack = self.rng.uniform(*self.attack_range)
+            decay = self.rng.uniform(*self.decay_range)
+            env = TrigExpseg(trigger, list=[(0.0, 1.0), (attack, 0.8), (decay, 0.0)])
+            tone_freq = self.rng.uniform(*self.tone_freq_range)
+            tone_mul = self.rng.uniform(*self.tone_mul_range)
+            tone = Sine(freq=tone_freq, mul=env * tone_mul)
+            layers.append(tone)
+        return Mix(layers, voices=1)
+    def generate_time_series(self, random_seed: Optional[int] = None) -> np.ndarray:
+        if random_seed is not None:
+            self.rng = np.random.default_rng(random_seed)
+        waveform = run_offline_pyo(
+            synth_builder=self._build_synth,
+            server_duration=self.server_duration,
+            sample_rate=self.sample_rate,
+            length=self.length,
+        )
+        if self.normalize_output:
+            waveform = normalize_waveform(waveform)
+        return waveform

src/synthetic_generation/audio_generators/stochastic_rhythm_wrapper.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from typing import Any, Dict, Optional
+import numpy as np
+from src.data.containers import TimeSeriesContainer
+from src.synthetic_generation.abstract_classes import GeneratorWrapper
+from src.synthetic_generation.audio_generators.stochastic_rhythm_generator import (
+    StochasticRhythmAudioGenerator,
+)
+from src.synthetic_generation.generator_params import StochasticRhythmAudioParams
+class StochasticRhythmAudioWrapper(GeneratorWrapper):
+    def __init__(self, params: StochasticRhythmAudioParams):
+        super().__init__(params)
+        self.params: StochasticRhythmAudioParams = params
+    def _sample_parameters(self, batch_size: int) -> Dict[str, Any]:
+        params = super()._sample_parameters(batch_size)
+        params.update(
+            {
+                "length": self.params.length,
+                "server_duration": self.params.server_duration,
+                "sample_rate": self.params.sample_rate,
+                "normalize_output": self.params.normalize_output,
+                "base_tempo_hz_range": self.params.base_tempo_hz_range,
+                "num_layers_range": self.params.num_layers_range,
+                "subdivisions": self.params.subdivisions,
+                "attack_range": self.params.attack_range,
+                "decay_range": self.params.decay_range,
+                "tone_freq_range": self.params.tone_freq_range,
+                "tone_mul_range": self.params.tone_mul_range,
+            }
+        )
+        return params
+    def generate_batch(
+        self,
+        batch_size: int,
+        seed: Optional[int] = None,
+        params: Optional[Dict[str, Any]] = None,
+    ) -> TimeSeriesContainer:
+        if seed is not None:
+            self._set_random_seeds(seed)
+        if params is None:
+            params = self._sample_parameters(batch_size)
+        generator = StochasticRhythmAudioGenerator(
+            length=params["length"],
+            server_duration=params["server_duration"],
+            sample_rate=params["sample_rate"],
+            normalize_output=params["normalize_output"],
+            base_tempo_hz_range=params["base_tempo_hz_range"],
+            num_layers_range=params["num_layers_range"],
+            subdivisions=params["subdivisions"],
+            attack_range=params["attack_range"],
+            decay_range=params["decay_range"],
+            tone_freq_range=params["tone_freq_range"],
+            tone_mul_range=params["tone_mul_range"],
+            random_seed=seed,
+        )
+        def _derive_series_seed(base_seed: int, index: int) -> int:
+            mixed = (
+                (base_seed & 0x7FFFFFFF)
+                ^ ((index * 0x9E3779B1) & 0x7FFFFFFF)
+                ^ (hash(self.__class__.__name__) & 0x7FFFFFFF)
+            )
+            return int(mixed)
+        batch_values = []
+        for i in range(batch_size):
+            series_seed = None if seed is None else _derive_series_seed(seed, i)
+            values = generator.generate_time_series(random_seed=series_seed)
+            batch_values.append(values)
+        return TimeSeriesContainer(
+            values=np.array(batch_values),
+            start=params["start"],
+            frequency=params["frequency"],
+        )