Source code for quantem.data.schema

"""Metadata schema definition and validation for quantem.data datasets."""

SCHEMA_VERSION = "1.0"

VALID_TECHNIQUES = [
    "4dstem",
    "hrtem",
    "eels",
    "tomo",
    "diffraction",
    "complex",
    "image",
]

# Fields that must be present in every metadata JSON.
REQUIRED_FIELDS = {
    "schema_version",
    "name",
    "technique",
    "description",
    "data",
    "attribution",
}

REQUIRED_DATA_FIELDS = {"shape", "dtype"}
REQUIRED_ATTRIBUTION_FIELDS = {"contributor", "license"}


[docs] def validate(meta: dict) -> list[str]: """Validate a metadata dict against the schema. Returns a list of error strings (empty if valid). """ errors: list[str] = [] for field in REQUIRED_FIELDS: if field not in meta: errors.append(f"Missing required field: {field!r}") if "technique" in meta and meta["technique"] not in VALID_TECHNIQUES: errors.append( f"Invalid technique {meta['technique']!r}. " f"Must be one of: {VALID_TECHNIQUES}" ) if "data" in meta: data = meta["data"] if not isinstance(data, dict): errors.append("'data' must be a dict") else: for field in REQUIRED_DATA_FIELDS: if field not in data: errors.append(f"Missing required field: data.{field!r}") if "attribution" in meta: attr = meta["attribution"] if not isinstance(attr, dict): errors.append("'attribution' must be a dict") else: for field in REQUIRED_ATTRIBUTION_FIELDS: if field not in attr: errors.append( f"Missing required field: attribution.{field!r}" ) return errors
[docs] def make_template( name: str, technique: str, shape: list[int] | tuple[int, ...], dtype: str = "float32", description: str = "", contributor: str = "", license: str = "CC-BY-4.0", ) -> dict: """Create a metadata dict with required fields pre-filled.""" return { "schema_version": SCHEMA_VERSION, "name": name, "technique": technique, "description": description, "data": { "shape": list(shape), "dtype": dtype, }, "instrument": {}, "calibration": {}, "processing": {}, "attribution": { "contributor": contributor, "license": license, }, }