Source code for qeg_nmr_qua.analysis.data_saver

"""
Data Saving Module.

This module provides utilities for saving experiment data from NMR experiments
using the OPX-1000, including automated handling of NumPy arrays, matplotlib
figures, and other scientific computing types via :class:`QuantumEncoder`.
"""

import json
import re
import warnings
from pathlib import Path
from typing import Any

from qeg_nmr_qua.analysis.encoder import QuantumEncoder
from qeg_nmr_qua.config.settings import ExperimentSettings
from qeg_nmr_qua.config.config import OPXConfig


[docs] class DataSaver: """Manage saving and loading of NMR experiment data with metadata. Provides a structured approach to persisting experiment configurations, settings, commands, and results to disk. Each experiment is saved to a uniquely named folder containing JSON files and optionally PNG figures. **Folder Structure:** Each saved experiment creates a folder with:: experiment_001/ ├── config.json # OPX configuration ├── settings.json # Experiment settings ├── commands.json # Command sequence executed ├── data.json # Experimental results and metadata ├── figures.json # (Optional) Mapping of figure keys to filenames └── figure_*.png # (Optional) Saved matplotlib figures **Data Handling:** - JSON-serializable data (dicts, lists, numbers, strings) are saved directly - NumPy arrays/scalars are converted to native Python types - Matplotlib figures are automatically saved as PNG files (300 dpi) - Non-serializable objects are converted to descriptive strings with warnings - Path objects are converted to strings **Error Recovery:** If saving fails, the partially created experiment folder is automatically cleaned up, maintaining a consistent state. Attributes: root_data_folder (Path): The root directory where experiment data will be saved. Example: >>> saver = DataSaver("./experiment_data") >>> config = {"qop_ip": "192.168.1.100", ...} >>> settings = {"n_avg": 8, ...} >>> commands = [{"type": "pulse", ...}] >>> data = {"I_data": np.array([...]), "Q_data": np.array([...])} >>> folder = saver.save_experiment("exp_001", config, settings, commands, data) >>> loaded = saver.load_experiment("exp_001") """
[docs] def __init__(self, root_data_folder: str | Path): """Initialize the DataSaver with a root data folder. Creates the root data folder if it doesn't exist. All experiments will be saved as subfolders within this directory. Args: root_data_folder (str | Path): The root directory for saving experiment data. Can be a string path or Path object. Will be created with parents=True if it doesn't exist. Example: >>> saver = DataSaver("./data") >>> saver.root_data_folder PosixPath('data') """ self.root_data_folder = Path(root_data_folder) self.root_data_folder.mkdir(parents=True, exist_ok=True)
[docs] def save_experiment( self, experiment_prefix: str, config: OPXConfig, settings: ExperimentSettings, commands: list[dict[str, Any]], data: dict[str, Any], ) -> Path: """Save experiment metadata and data to a structured directory. Creates a folder with an auto-incremented name based on ``experiment_prefix`` (e.g., ``prefix_0001``, ``prefix_0002``) and atomically saves OPX configuration, experiment settings, command sequence, and experimental results. Handles special data types (numpy arrays, matplotlib figures) and non-serializable objects gracefully. **Saved Files:** - ``config.json``: OPX-1000 configuration dictionary - ``settings.json``: Experiment settings (frequencies, pulse params, etc.) - ``commands.json``: List of pulse commands executed - ``data.json``: Experimental results and metadata (numpy arrays converted to lists) - ``figures.json``: (Optional) Mapping of data keys to saved figure filenames - ``figure_*.png``: (Optional) Matplotlib figures extracted from data **Data Processing:** - NumPy arrays are converted to JSON-serializable lists - NumPy scalars are converted to native Python types - Matplotlib figures are automatically saved as PNG files with 300 dpi - Non-serializable objects are converted to descriptive strings with warnings - Failed keys are tracked in ``_failed_keys`` in the saved data Args: experiment_prefix (str): Prefix for the experiment folder name (e.g., "experiment"). The actual folder created will be ``<experiment_prefix>_NNNN`` with a zero-padded 4-digit counter (starting at 0001). Must not contain path separators or be ".". config (OPXConfig): OPX configuration object. settings (ExperimentSettings): Experiment settings object. commands (list[dict[str, Any]]): List of command dictionaries defining the pulse sequence. data (dict[str, Any]): Experimental data dictionary. Can contain numpy arrays, matplotlib figures, and other Python objects. NumPy types and figures are handled automatically. Returns: Path: The path to the created experiment folder. Raises: ValueError: If experiment_prefix contains path separators or is invalid. RuntimeError: If saving fails (folder is cleaned up on failure). Example: >>> saver = DataSaver("./data") >>> folder = saver.save_experiment( ... "exp", ... config={"qop_ip": "192.168.1.100"}, ... settings={"n_avg": 8}, ... commands=[{"type": "pulse", "name": "pi_half"}], ... data={"I": np.array([1, 2, 3]), "Q": np.array([4, 5, 6])} ... ) >>> folder.name # first call 'exp_0001' """ # Validate experiment prefix if not isinstance(experiment_prefix, str) or experiment_prefix == "": raise ValueError("experiment_prefix must be a non-empty string.") if ( "/" in experiment_prefix or "\\" in experiment_prefix or experiment_prefix == "." ): raise ValueError( f"Invalid experiment prefix '{experiment_prefix}'. " "Must be a simple name without path separators." ) # Determine next available experiment folder name for this prefix experiment_name = self._next_experiment_name(experiment_prefix) experiment_folder = self.root_data_folder / experiment_name experiment_folder.mkdir(parents=True, exist_ok=False) try: # Save config self._save_json(experiment_folder / "config.json", config) # Save settings self._save_json(experiment_folder / "settings.json", settings) # Save commands self._save_json(experiment_folder / "commands.json", commands) # Process and save data (extract figures, handle failures gracefully) cleaned_data, figure_map = self._process_data_payload( data, experiment_folder ) # Save the cleaned data (without figures) self._save_json(experiment_folder / "data.json", cleaned_data) # Save a mapping of figure keys to their filenames if figure_map: self._save_json(experiment_folder / "figures.json", figure_map) return experiment_folder except Exception as e: # Clean up on failure import shutil shutil.rmtree(experiment_folder, ignore_errors=True) raise RuntimeError( f"Failed to save experiment '{experiment_name}': {e}" ) from e
def _next_experiment_name(self, prefix: str, width: int = 4) -> str: """Compute the next available experiment folder name for a prefix. Scans the root_data_folder for folders matching ``{prefix}_NNNN`` and returns the next sequential name, starting at ``{prefix}_0001`` if none exist. Args: prefix: The experiment prefix. width: Zero-padding width for the counter (default: 4). Returns: str: Next experiment folder name like ``prefix_0001``. """ pattern = re.compile(rf"^{re.escape(prefix)}_(\d{{{width}}})$") max_idx = 0 if self.root_data_folder.exists(): for entry in self.root_data_folder.iterdir(): if entry.is_dir(): m = pattern.match(entry.name) if m: try: idx = int(m.group(1)) if idx > max_idx: max_idx = idx except ValueError: continue next_idx = max_idx + 1 return f"{prefix}_{next_idx:0{width}d}"
[docs] def load_experiment(self, experiment_name: str) -> dict[str, Any]: """Load experiment metadata and data from a saved folder. Reconstructs the complete experiment state from JSON files. Returns all saved data including configuration, settings, commands, and results. Args: experiment_name (str): Name of the experiment folder to load. Returns: dict[str, Any]: Dictionary with keys: - ``config``: OPX configuration - ``settings``: Experiment settings - ``commands``: Command sequence - ``data``: Experimental results - ``figures``: (Optional) Mapping of figure keys to filenames Raises: FileNotFoundError: If the experiment folder or required files don't exist. Example: >>> saver = DataSaver("./data") >>> loaded = saver.load_experiment("exp_001") >>> config = loaded["config"] >>> data = loaded["data"] """ experiment_folder = self.root_data_folder / experiment_name if not experiment_folder.exists(): raise FileNotFoundError( f"Experiment folder not found at {experiment_folder}" ) result = {} # Load each file required_files = ["config.json", "settings.json", "commands.json", "data.json"] for filename in required_files: filepath = experiment_folder / filename if not filepath.exists(): raise FileNotFoundError( f"Required file '{filename}' not found in {experiment_folder}" ) key = filename.replace(".json", "") result[key] = self._load_json(filepath) # Load figure mapping if it exists figures_file = experiment_folder / "figures.json" if figures_file.exists(): result["figures"] = self._load_json(figures_file) return result
@staticmethod def _save_json(filepath: Path, data: Any, indent: int = 2) -> None: """Save data to a JSON file with NumPy type handling. Writes data to JSON format using the custom :class:`QuantumEncoder`, which handles NumPy arrays, scalars, and Path objects automatically. Args: filepath (Path): Path where the JSON file will be saved. data (Any): Data to serialize. Can contain numpy arrays, Path objects, etc. indent (int): JSON indentation level for human readability (default: 2). Raises: TypeError: If data contains non-serializable types not handled by :class:`QuantumEncoder`. OSError: If the file cannot be written. """ with open(filepath, "w", encoding="utf-8") as f: json.dump(data, f, indent=indent, cls=QuantumEncoder) @staticmethod def _load_json(filepath: Path) -> Any: """Load data from a JSON file. Reads and deserializes JSON data from file. Returns standard Python types (no automatic reconstruction of NumPy arrays). Args: filepath (Path): Path to the JSON file to load. Returns: Any: Deserialized JSON data (dict, list, str, int, float, bool, or None). Raises: FileNotFoundError: If the file doesn't exist. json.JSONDecodeError: If the file contains invalid JSON. """ with open(filepath, "r", encoding="utf-8") as f: return json.load(f) def _prepare_settings_for_save(self, settings: Any) -> Any: """Convert a settings object to a JSON-serializable dict. Accepts either a dict or an object that implements ``to_dict()`` (e.g. :class:`ExperimentSettings`). On failure returns a safe fallback dict containing a string representation and the error message. """ try: if isinstance(settings, dict): sdict = dict(settings) elif hasattr(settings, "to_dict") and callable( getattr(settings, "to_dict") ): try: sdict = settings.to_dict() except Exception as e: warnings.warn( f"settings.to_dict() raised an exception: {e}. Using repr fallback.", UserWarning, ) return {"_repr": repr(settings), "_error": str(e)} elif hasattr(settings, "__dict__"): sdict = dict(getattr(settings, "__dict__")) else: return {"_repr": repr(settings)} # Convert Path objects to strings for k, v in list(sdict.items()): if isinstance(v, Path): sdict[k] = str(v) # Ensure serializable with QuantumEncoder try: json.dumps(sdict, cls=QuantumEncoder) except Exception as e: warnings.warn( f"Settings not JSON-serializable: {e}. Saving repr instead.", UserWarning, ) return {"_repr": repr(settings), "_error": str(e)} return sdict except Exception as e: warnings.warn( f"Unexpected error preparing settings for save: {e}. Using repr fallback.", UserWarning, ) return {"_repr": repr(settings), "_error": str(e)}
[docs] def save_settings( self, settings: ExperimentSettings, name: str, overwrite: bool = False ) -> Path: """Save settings independently for later reuse. The settings are stored under the `settings/` subfolder in the root data folder as a JSON file named ``<name>.json``. Accepts either a dict or an object that implements ``to_dict()``. By default this method will not overwrite an existing settings file unless ``overwrite=True``. Args: name: Simple name for the settings (no path separators). settings: Settings object or dict. overwrite: Whether to overwrite an existing settings file. Returns: Path: Path to the saved settings JSON file. Raises: ValueError: If `name` is invalid or the file exists and overwrite is False. RuntimeError: On I/O or serialization failures. """ if not isinstance(name, str) or name == "": raise ValueError("name must be a non-empty string") if "/" in name or "\\" in name or name == ".": raise ValueError( "The parameter 'name' must be a simple filename without path separators" ) settings_folder = self.root_data_folder / "settings" settings_folder.mkdir(parents=True, exist_ok=True) filename = f"{name}.json" filepath = settings_folder / filename if filepath.exists() and not overwrite: raise ValueError( f"Settings file '{filename}' already exists. Pass overwrite=True to replace it." ) prepared = self._prepare_settings_for_save(settings) try: self._save_json(filepath, prepared) except Exception as e: raise RuntimeError(f"Failed to save settings '{name}': {e}") from e return filepath
[docs] def load_settings(self, name: str) -> dict[str, Any]: """Load a previously saved settings JSON by name. Returns the deserialized dict as saved. Raises FileNotFoundError if not found. """ if not isinstance(name, str) or name == "": raise ValueError("name must be a non-empty string") if "/" in name or "\\" in name or name == ".": raise ValueError( "The parameter 'name' must be a simple filename without path separators" ) filepath = self.root_data_folder / "settings" / f"{name}.json" if not filepath.exists(): raise FileNotFoundError(f"Settings file not found: {filepath}") return self._load_json(filepath)
[docs] def list_saved_settings(self) -> list[str]: """Return list of saved settings names (without .json extension).""" folder = self.root_data_folder / "settings" if not folder.exists(): return [] names = [ p.stem for p in folder.iterdir() if p.is_file() and p.suffix == ".json" ] return sorted(names)
def _process_data_payload( self, data: dict[str, Any], experiment_folder: Path ) -> tuple[dict[str, Any], dict[str, str]]: """Process the data payload to extract matplotlib figures and handle serialization. Inspects each field in the data dictionary: - Matplotlib figures: Saved as PNG files, replaced with reference strings - NumPy arrays/scalars: Converted to native Python types by encoder - JSON-serializable objects: Passed through as-is - Non-serializable objects: Converted to descriptive strings with warnings Args: data (dict[str, Any]): The data payload that may contain figures, numpy arrays, and other objects. experiment_folder (Path): Folder where figures will be saved. Returns: tuple[dict[str, Any], dict[str, str]]: Tuple of: - cleaned_data: Data dict with figures removed, numpy types converted, and failed keys recorded in ``_failed_keys`` - figure_map: Dict mapping original data keys to saved figure filenames """ cleaned_data = {} figure_map = {} failed_keys = [] for key, value in data.items(): try: # Check if value is a matplotlib figure if self._is_matplotlib_figure(value): # Save figure as PNG figure_filename = f"figure_{key}.png" figure_path = experiment_folder / figure_filename self._save_figure(value, figure_path) figure_map[key] = figure_filename # Replace with a reference string in the data cleaned_data[key] = f"<figure saved as {figure_filename}>" else: # Try to serialize the value try: # Test if it's JSON serializable json.dumps(value, cls=QuantumEncoder) cleaned_data[key] = value except (TypeError, ValueError) as e: # If serialization fails, save as string representation warnings.warn( f"Could not serialize data['{key}'] as JSON: {e}. " f"Saving as string representation instead.", UserWarning, ) cleaned_data[key] = ( f"<non-serializable: {type(value).__name__}>" ) failed_keys.append(key) except Exception as e: # If anything goes wrong, log and continue warnings.warn( f"Failed to process data['{key}']: {e}. Skipping this field.", UserWarning, ) failed_keys.append(key) continue if failed_keys: cleaned_data["_failed_keys"] = failed_keys return cleaned_data, figure_map @staticmethod def _is_matplotlib_figure(obj: Any) -> bool: """Check if an object is a matplotlib Figure instance. Safely checks if the object is a matplotlib Figure without raising an error if matplotlib is not installed. Returns False if matplotlib is unavailable. Args: obj: Object to check. Returns: bool: True if obj is a matplotlib.figure.Figure, False otherwise. """ try: import matplotlib.figure return isinstance(obj, matplotlib.figure.Figure) except ImportError: return False @staticmethod def _save_figure(fig: Any, filepath: Path) -> None: """Save a matplotlib figure to a PNG file. Saves the figure with 300 dpi resolution and tight bounding box for publication-quality output. Warnings are issued if the save fails, but execution continues. Args: fig: Matplotlib Figure object to save. filepath (Path): Path where the PNG file will be saved. Note: If saving fails, a UserWarning is issued and execution continues. """ try: fig.savefig(filepath, dpi=300, bbox_inches="tight") except Exception as e: warnings.warn(f"Failed to save figure to {filepath}: {e}", UserWarning)
[docs] def list_experiments(self) -> list[str]: """List all saved experiments in the root data folder. Scans the root folder and returns a sorted list of all experiment folders that contain a valid ``data.json`` file. Returns: list[str]: List of experiment folder names sorted alphabetically. Returns an empty list if no experiments have been saved. Example: >>> saver = DataSaver("./data") >>> experiments = saver.list_experiments() >>> experiments ['exp_001', 'exp_002', 'exp_003'] """ if not self.root_data_folder.exists(): return [] experiments = [ d.name for d in self.root_data_folder.iterdir() if d.is_dir() and (d / "data.json").exists() ] return sorted(experiments)