Source code for libertem.io.dataset.base.meta
from typing import Any, Optional, TYPE_CHECKING
from collections.abc import Sequence
import jsonschema
import numpy as np
from sparseconverter import CUDA, NUMPY, ArrayBackend
from libertem.common import Shape
if TYPE_CHECKING:
from numpy import typing as nt
[docs]
class DataSetMeta:
"""
shape
"native" dataset shape, can have any dimensionality
array_backends: Optional[Sequence[ArrayBackend]]
raw_dtype : np.dtype
dtype used internally in the data set for reading
dtype : np.dtype
Best-fitting output dtype. This can be different from raw_dtype, for example
if there are post-processing steps done as part of reading, which need a different
dtype. Assumed equal to raw_dtype if not given
sync_offset: int, optional
If positive, number of frames to skip from start
If negative, number of blank frames to insert at start
image_count
Total number of frames in the dataset
metadata
Any metadata offered by the DataSet, not specified yet
"""
def __init__(
self,
shape: Shape,
array_backends: Optional[Sequence[ArrayBackend]] = None,
image_count: int = 0,
raw_dtype: "Optional[nt.DTypeLike]" = None,
dtype: "Optional[nt.DTypeLike]" = None,
metadata: Optional[Any] = None,
sync_offset: int = 0
):
self.shape = shape
if array_backends is None:
array_backends = (NUMPY, CUDA)
self.array_backends = array_backends
if dtype is None:
dtype = raw_dtype
self.dtype: np.dtype = np.dtype(dtype)
self.raw_dtype: np.dtype = np.dtype(raw_dtype)
self.image_count = image_count
self.sync_offset = sync_offset
self.metadata = metadata
def __getitem__(self, key):
return self.metadata[key]
[docs]
class PartitionStructure:
"""
Structure of the dataset.
Assumed to be contiguous on the flattened navigation axis.
Parameters
----------
slices : List[Tuple[Int, ...]]
List of tuples [start_idx, end_idx) that partition the data set by the flattened
navigation axis
shape : Shape
shape of the whole dataset
dtype : numpy dtype
The dtype of the data as it is on disk. Can contain endian indicator, for
example >u2 for big-endian 16bit data.
"""
SCHEMA = {
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://libertem.org/PartitionStructure.schema.json",
"title": "PartitionStructure",
"type": "object",
"properties": {
"version": {"const": 1},
"slices": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "number", "minItems": 2, "maxItems": 2,
}
},
"minItems": 1,
},
"shape": {
"type": "array",
"items": {"type": "number", "minimum": 1},
"minItems": 2,
},
"sig_dims": {"type": "number"},
"dtype": {"type": "string"},
},
"required": ["version", "slices", "shape", "sig_dims", "dtype"]
}
def __init__(self, shape, slices, dtype):
self.slices = slices
self.shape = shape
self.dtype = np.dtype(dtype)
[docs]
def serialize(self):
data = {
"version": 1,
"slices": [[s[0], s[1]] for s in self.slices],
"shape": list(self.shape),
"sig_dims": self.shape.sig.dims,
"dtype": str(self.dtype),
}
jsonschema.validate(schema=self.SCHEMA, instance=data)
return data
[docs]
@classmethod
def from_json(cls, data):
jsonschema.validate(schema=cls.SCHEMA, instance=data)
shape = Shape(tuple(data["shape"]), sig_dims=data["sig_dims"])
return PartitionStructure(
slices=[tuple(item) for item in data["slices"]],
shape=shape,
dtype=np.dtype(data["dtype"]),
)
[docs]
@classmethod
def from_ds(cls, ds):
data = {
"version": 1,
"slices": [
[p.slice.origin[0], p.slice.origin[0] + p.slice.shape[0]]
for p in ds.get_partitions()
],
"shape": list(ds.shape),
"sig_dims": ds.shape.sig.dims,
"dtype": str(ds.dtype),
}
return cls.from_json(data)