AI-ModCon · ScSteffen · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026 · Mar 2, 2026
diff --git a/.gitignore b/.gitignore
@@ -10,6 +10,9 @@ __pycache__/
 *.py[codz]
 *$py.class
 data/*
+# Optional third-party example assets
+examples/matey/MATEY/
+examples/matey/data/
 # C extensions
 *.so
 

diff --git a/examples/matey/Demo_SOLPS_vit.yaml b/examples/matey/Demo_SOLPS_vit.yaml
@@ -0,0 +1,75 @@
+basic_config: &basic_config
+  # Run settings
+  log_to_wandb: !!bool False #True # Use wandb integration
+  log_to_screen: !!bool True # Log progress to screen.
+  save_checkpoint: !!bool True # Save checkpoints
+  checkpoint_save_interval: 10 # Save every # epochs - also saves "best" according to val loss
+  debug_grad: !!bool True # Compute gradient/step_sizes/ect for debugging
+  true_time: !!bool False # Debugging setting - sets num workers to zero and activates syncs
+  num_data_workers: 2 #6 # Generally pulling 8 cpu per process, so using 6 for DL - not sure if best ratio
+  enable_amp: !!bool False # Use automatic mixed precision - blows up with low variance fields right now
+  compile: !!bool False # Compile model - Does not currently work
+  gradient_checkpointing: !!bool False # Whether to use gradient checkpointing - Slow, but lower memory
+  exp_dir: './Dev_SOLPS' # Output path
+  log_interval: 1 # How often to log - Don't think this is actually implemented
+  pretrained: !!bool False # Whether to load a pretrained model
+  # Training settings
+  drop_path: 0.1
+  batch_size: 64
+  max_epochs: 10
+  scheduler_epochs: -1
+  epoch_size: 20 
+  rescale_gradients: !!bool False # Activate hook that scales block gradients to norm 1
+  optimizer: 'AdamW' # DAdaptAdam 'AdamW' 'SGD'
+  scheduler: 'none' # Only cosine implemented
+  warmup_steps: 0 # Warmup when not using DAdapt
+  learning_rate: 1e-3 # 
+  weight_decay: 1e-3
+  n_states: 29 # Must be >= max field label + 1 in the dataset
+  state_names: ['Pressure', 'Vx', 'Vy', 'Density',  'Vx', 'Vy', 'Density', 'Pressure'] # These are not used now!
+  dt: 1 # Striding of data - Not currently implemented > 1
+  leadtime_max: 10 #prediction lead time range [1, leadtime_max]
+  autoregressive: !!bool True # autoregressive training or one-step prediction
+  supportdata: # Whether to use support data (e.g. input control actuator) as input
+  - input_control_act: !!bool True
+  n_steps: 3 #16 # Length of history to include in input
+  enforce_max_steps: !!bool False # If false and n_steps > dataset steps, use dataset steps. Otherwise, raise Exception.
+  accum_grad: 1  
+  # Model settings
+  model_type:  'vit_all2all' # no need for time_type and space_type inputs
+  #model_type:  'svit' #currently only support time_type=="all2all_time" and space_type=="all2all"
+  #time_type: 'all2all_time' # 
+  #space_type: 'all2all' #
+  #model_type:  'avit' #currently only support space_type=="axial_attention" and time_type=="attention"
+  #time_type: 'attention' # 
+  #space_type: 'axial_attention' #
+  tie_fields: !!bool False # Whether to use 1 embedding per field per data
+  embed_dim: 192 # Dimension of internal representation - 192/384/768/1024 for Ti/S/B/L
+  num_heads: 3 # Number of heads for attention - 3/6/12/16 for Ti/S/B/L
+  processor_blocks: 12 # Number of transformer blocks in the backbone - 12/12/12/24 for Ti/S/B/L
+  ##patch_size: [[1, 2, 2]] #[[1, 40, 40]] #, [32, 32], [64, 64]] #
+  tokenizer_heads:
+  - head_name: "tk-2D"
+    patch_size: [[1, 2, 2]] 
+  sts_model: !!bool False
+  sts_train: !!bool False  #when True, we use loss function with two parts: l_coarse/base + l_total, so that the coarse ViT approximates true solutions directly
+  #gammaref: 0.2 #pick all tokens that with variances larger than gammaref*max_variance to refine
+  #refine_ratio: 0.2 #ratio of coarse tokens picked to be refined
+  bias_type: 'PositionAreaBias'  # Options rel, continuous, none, PositionAreaBias
+  bias_MLP: !!bool True
+  # Data settings
+  #train_val_test: [.6, .2, .2]
+  augmentation: !!bool False # Augmentation not implemented
+  use_all_fields: !!bool True # Prepopulate the field metadata dictionary from dictionary in datasets
+  tie_batches: !!bool False # Force everything in batch to come from one dset
+  extended_names: !!bool False # Whether to use extended names - not currently implemented
+  embedding_offset: 0  # Use when adding extra finetuning fields
+  train_data_paths: [
+              ['examples/matey/data/fusionMT-data/solps/train', 'SOLPS2D', '','tk-2D'],
+              ]
+  valid_data_paths: [
+              ['examples/matey/data/fusionMT-data/solps/valid', 'SOLPS2D', '','tk-2D'],
+              ]
+  append_datasets: [] # List of datasets to append to the input/output projections for finetuning
+
+
diff --git a/examples/matey/README.md b/examples/matey/README.md
@@ -0,0 +1,91 @@
+# MATEY Example Harness
+
+BaseSim harness for the [MATEY](https://github.com/FusionFM/MATEY) multiscale transformer codebase.
+
+## Setup
+
+1. Install BaseSim (from repo root):
+   ```bash
+   poetry install
+   ```
+
+2. Install the optional MATEY example dependency (pinned commit):
+   ```bash
+   poetry install --extras matey
+   ```
+   This uses GitHub SSH auth for the private MATEY repo, so your SSH key must
+   have access to `FusionFM/MATEY`.
+
+   This extra is pinned to:
+   `4e615bb5c86024632e386153bfbed028b38a8262`
+
+   Equivalent pip command:
+   ```bash
+   pip install "matey @ git+ssh://git@github.com/FusionFM/MATEY.git@4e615bb5c86024632e386153bfbed028b38a8262"
+   ```
+
+3. (Optional) Install heavy/system-dependent packages as needed for your environment:
+   ```bash
+   MAX_JOBS=4 NINJA_STATUS="[%f/%t] " pip install -vv --progress-bar on --no-build-isolation flash-attn
+   pip install dadaptation==3.1                             # for DAdaptAdam optimizer
+   pip install mpi4py                                       # requires MPI C library
+   pip install netCDF4                                      # requires HDF5/netCDF C libs
+   pip install git+https://github.com/sandialabs/exodusii.git # not on PyPI
+   ```
+
+   Alternative: install flash attention without screen output 
+
+   ``` 
+   MAX_JOBS=4 pip install flash-attn --no-build-isolation  # requires CUDA toolkit + nvcc
+   ```
+## Running
+
+```bash
+poetry run python -m src.main --config examples/matey/matey.toml
+```
+
+Outer-loop drift demo (L2 placeholder model + input-noise stream updates):
+
+```bash
+poetry run python -m src.main --config examples/matey/matey_outer_loop.toml
+# or
+./examples/matey/run_outer_loop.sh
+```
+
+## Configuration
+
+Edit [matey.toml](matey.toml) to adjust training parameters, drift detection, and data paths.
+
+The `[data].path` should point to your local SOLPS dataset root
+(must contain `train/` and `valid/` directories).
+This data is expected to be user-provided and is not tracked in git.
+
+For the SOLPS example, the harness builds a deterministic file-level split of
+`[0.7, 0.15, 0.15]` and materializes staged views under:
+
+```text
+output/matey_split_cache/<fingerprint>/{train,val,test}
+```
+
+The cache is reused when source files, split ratios, and seed are unchanged.
+
+The example TOML is tuned for short smoke runs to make drift-triggered continual
+learning dispatch easier to observe (`detection_interval=5`, `aggregation="last"`,
+`adwin_delta=0.05`, `max_stream_updates=10`).
+
+For the outer-loop harness, use [matey_outer_loop.toml](matey_outer_loop.toml):
+- `data.name = "matey_outer_loop"` selects `model_outer_loop.py`.
+- `data.path` points at `examples/matey/dump/SOLPS2DwION`.
+- `continual_learning.update_mode = "none"` disables parameter updates.
+- `drift_detection.metric_index = 0` monitors the `input_l2` metric.
+
+## Files
+
+| File | Description |
+|---|---|
+| `model.py` | `MATEYHarness` -- adapts MATEY models/data to BaseSim's `BaseModelHarness` interface |
+| `matey.toml` | Experiment config |
+| `model_outer_loop.py` | Outer-loop drift harness with L2 placeholder model and noisy input stream |
+| `matey_outer_loop.toml` | Outer-loop experiment config |
+| `run_outer_loop.sh` | Convenience run script for `matey_outer_loop.toml` |
+| `pyproject.toml` | Optional example dependency manifest |
diff --git a/examples/matey/__init__.py b/examples/matey/__init__.py
@@ -0,0 +1 @@
+
diff --git a/examples/matey/matey.toml b/examples/matey/matey.toml
@@ -0,0 +1,46 @@
+seed = 1337
+device = "auto"
+multi_gpu = false
+verbosity = "INFO"
+
+[model]
+name = "matey_vit"
+pretrained_path = ""
+
+[data]
+name = "matey"
+# User-provided SOLPS dataset root containing train/ and valid/ folders.
+# This path is local-only and should not be tracked in git.
+path = "/path/to/fusionMT-data/solps"
+
+[train]
+batch_size = 16
+num_workers = 2
+init_lr = 0.001
+max_iter = 200
+grad_accumulation_steps = 1
+
+[continual_learning]
+update_mode = "base"
+
+[drift_detection]
+detector_name = "ADWINDetector"
+detection_interval = 5
+aggregation = "last"
+metric_index = 0  # MATEY metrics: 0=nrmse, 1=rmse, 2=loss
+reset_after_learning = false
+max_stream_updates = 10
+
+# ADWIN hyperparameters
+adwin_delta = 0.05
+adwin_minor_threshold = 0.3
+adwin_moderate_threshold = 0.6
+
+[logging]
+backend = "wandb" 
+experiment_name = "matey-continual-learning"  # Optional: project/experiment name
+
+[visualization]
+baseline = 0.0
+input = "output/matey.csv"
+output = "output/matey_dashboard.png"
diff --git a/examples/matey/matey_batches.py b/examples/matey/matey_batches.py
@@ -0,0 +1,13 @@
+from examples.matey.src.matey_batches import (
+    MateyInputBatch,
+    MateyLoaderAdapter,
+    MateyModelAdapter,
+    MateyTargetBatch,
+)
+
+__all__ = [
+    "MateyInputBatch",
+    "MateyTargetBatch",
+    "MateyLoaderAdapter",
+    "MateyModelAdapter",
+]
diff --git a/examples/matey/matey_outer_loop.toml b/examples/matey/matey_outer_loop.toml
@@ -0,0 +1,44 @@
+seed = 1337
+device = "auto"
+multi_gpu = false
+verbosity = "INFO"
+
+[model]
+name = "matey_outer_loop_l2"
+pretrained_path = ""
+
+[data]
+name = "matey_outer_loop"
+path = "examples/matey/dump/SOLPS2DwION"
+
+[train]
+batch_size = 1
+num_workers = 0
+init_lr = 0.0
+max_iter = 50
+grad_accumulation_steps = 1
+
+[continual_learning]
+update_mode = "none"
+
+[drift_detection]
+detector_name = "ADWINDetector"
+detection_interval = 5
+aggregation = "last"
+metric_index = 0  # outer-loop metrics: 0=input_l2, 1=loss
+reset_after_learning = false
+max_stream_updates = 20
+
+# ADWIN hyperparameters
+adwin_delta = 0.05
+adwin_minor_threshold = 0.3
+adwin_moderate_threshold = 0.6
+
+[logging]
+backend = "wandb"
+experiment_name = "matey-outer-loop-drift"
+
+[visualization]
+baseline = 0.0
+input = "output/matey_outer_loop.csv"
+output = "output/matey_outer_loop_dashboard.png"