Source code for quantem.data.schema
"""Metadata schema definition and validation for quantem.data datasets."""
SCHEMA_VERSION = "1.0"
VALID_TECHNIQUES = [
"4dstem",
"hrtem",
"eels",
"tomo",
"diffraction",
"complex",
"image",
]
# Fields that must be present in every metadata JSON.
REQUIRED_FIELDS = {
"schema_version",
"name",
"technique",
"description",
"data",
"attribution",
}
REQUIRED_DATA_FIELDS = {"shape", "dtype"}
REQUIRED_ATTRIBUTION_FIELDS = {"contributor", "license"}
[docs]
def validate(meta: dict) -> list[str]:
"""Validate a metadata dict against the schema.
Returns a list of error strings (empty if valid).
"""
errors: list[str] = []
for field in REQUIRED_FIELDS:
if field not in meta:
errors.append(f"Missing required field: {field!r}")
if "technique" in meta and meta["technique"] not in VALID_TECHNIQUES:
errors.append(
f"Invalid technique {meta['technique']!r}. "
f"Must be one of: {VALID_TECHNIQUES}"
)
if "data" in meta:
data = meta["data"]
if not isinstance(data, dict):
errors.append("'data' must be a dict")
else:
for field in REQUIRED_DATA_FIELDS:
if field not in data:
errors.append(f"Missing required field: data.{field!r}")
if "attribution" in meta:
attr = meta["attribution"]
if not isinstance(attr, dict):
errors.append("'attribution' must be a dict")
else:
for field in REQUIRED_ATTRIBUTION_FIELDS:
if field not in attr:
errors.append(
f"Missing required field: attribution.{field!r}"
)
return errors
[docs]
def make_template(
name: str,
technique: str,
shape: list[int] | tuple[int, ...],
dtype: str = "float32",
description: str = "",
contributor: str = "",
license: str = "CC-BY-4.0",
) -> dict:
"""Create a metadata dict with required fields pre-filled."""
return {
"schema_version": SCHEMA_VERSION,
"name": name,
"technique": technique,
"description": description,
"data": {
"shape": list(shape),
"dtype": dtype,
},
"instrument": {},
"calibration": {},
"processing": {},
"attribution": {
"contributor": contributor,
"license": license,
},
}