import os
from typing import Optional, TYPE_CHECKING
import libertem.api as lt
from libertem.io.dataset.base import DataSetException
from libertem.io.dataset.dm_single import SingleDMDataSet
from libertem.common import Shape
from libertem.udf.record import RecordUDF
if TYPE_CHECKING:
from libertem.api import Context, DataSet
class ConvertTransposedDatasetUDF(RecordUDF):
def get_method(self):
return self.UDF_METHOD.PARTITION
@property
def _ds_shape(self) -> Shape:
nav_shape = self.meta.dataset_shape.sig.to_tuple()
sig_shape = self.meta.dataset_shape.nav.to_tuple()
return Shape(nav_shape + sig_shape, sig_dims=len(sig_shape))
@property
def _memmap_flat_shape(self) -> tuple[int, ...]:
return (self._ds_shape.nav.size, self._ds_shape.sig.size)
def process_partition(self, partition):
# partition will be of shape (n_sig_pix, *ds.shape.nav)
n_sig_px = partition.shape[0]
# flatten the nav dimensions
partition = partition.reshape((n_sig_px, -1))
# Do the transpose, this is fast but becomes costly
# the moment we assign into the memmap
partition = partition.T
# the LT flat nav origin is actually the sig origin in the memmap
flat_sig_origin = self.meta.slice.origin[0]
self.task_data.memmap[
:, flat_sig_origin:flat_sig_origin + n_sig_px
] = partition
def _convert_transposed_ds(
ctx: 'Context',
ds: 'DataSet',
out_path: os.PathLike,
**run_kwargs,
):
ctx.run_udf(
ds,
ConvertTransposedDatasetUDF(
out_path,
),
**run_kwargs,
)
[docs]
def convert_dm4_transposed(
dm4_path: os.PathLike,
out_path: os.PathLike,
ctx: Optional['Context'] = None,
num_cpus: Optional[int] = None,
dataset_index: Optional[int] = None,
progress: bool = False,
):
"""
Convenience function to convert a transposed Gatan Digital Micrograph
(.dm4) STEM dataset into a numpy (.npy) file with standard ordering for
processing with LiberTEM.
Transposed .dm4 files are stored in :code:`(sig, nav)` order, i.e.
all frame values for a given signal pixel are stored as blocks,
which means that extracting a single frame requires traversal of the
whole file. LiberTEM requires :code:`(nav, sig)` order for processing
using the UDF interface, i.e. each frame is stored sequentially.
.. versionadded:: 0.13.0
Parameters
----------
dm4_path : PathLike
The path to the .dm4 file
out_path : PathLike
The path to the output .npy file
ctx : libertem.api.Context, optional
The Context to use to perform the conversion, by default None
in which case a Dask-based context will be created (optionally)
following the :code:`num_cpus` argument.
num_cpus : int, optional
When :code:`ctx` is not supplied, this argument limits
the number of CPUs to perform the conversion. This can be
important as conversion is a RAM-intensive operation and limiting
the number of CPUs can help reduce bottlenecking.
dataset_index : int, optional
If the .dm4 file contains multiple datasets, this can be used
to select the dataset to convert
(see :class:`~libertem.io.dataset.dm_single.SingleDMDataSet`)
for more information.
progress : bool, optional
Whether to display a progress bar during conversion, by default False
Raises
------
DataSetException
If the DM4 dataset is not stored as transposed
ValueError
If both :code:`ctx` and :code:`num_cpus` are supplied
"""
if ctx is not None and num_cpus is not None:
raise ValueError('Either supply a Context or number of cpus to use in conversion')
elif ctx is None:
ctx = lt.Context.make_with('dask', cpus=num_cpus)
ds_meta = SingleDMDataSet._read_metadata(dm4_path, use_ds=dataset_index)
if ds_meta['c_order']:
raise DataSetException('The DM4 data is not transposed')
ds = ctx.load('dm', dm4_path, force_c_order=True, dataset_index=dataset_index)
return _convert_transposed_ds(ctx, ds, out_path, progress=progress)