Binning

This example shows how LiberTEM can be used for fast binning of a large dataset.

[1]:
%matplotlib nbagg
[2]:
import numpy as np
import sparse
import matplotlib.pyplot as plt

from libertem.api import Context
from libertem.udf.masks import ApplyMasksUDF
[3]:
ctx = Context()

Generate test data

Just a folded linear gradient for illustration

[4]:
shape = (17, 19, 23, 29)
data = np.linspace(0, 1, np.prod(shape, dtype=np.int64)).reshape(shape)
[5]:
ds = ctx.load('memory', data=data)

Define the binning factor and determine target shape

[6]:
bin_factor = 4
[7]:
# ceil() to include output bins that only fit partially into the
# source frame
size_y = int(np.ceil(ds.shape.sig[0] / bin_factor))
size_x = int(np.ceil(ds.shape.sig[1] / bin_factor))

Create a sparse matrix for binning

Each layer corresponds to an output bin. We calculate which pixels are in each output bin.

[8]:
bin_matrix_layers = []
for y in range(size_y):
    for x in range(size_x):
        layer = np.zeros(ds.shape.sig)
        start_y = y * bin_factor
        stop_y = (y + 1) * bin_factor
        start_x = x * bin_factor
        stop_x = (x + 1) * bin_factor
        layer[start_y:stop_y, start_x:stop_x] = 1
        layer /= np.sum(layer)
        bin_matrix_layers.append(sparse.COO(layer))
bin_matrix = sparse.stack(bin_matrix_layers)
[9]:
def mask_factory():
    return bin_matrix

Apply the matrix

[10]:
udf = ApplyMasksUDF(mask_factories=mask_factory)
result = ctx.run_udf(dataset=ds, udf=udf, progress=True)
  0%|                                                                                            | 0/5 [00:00<?, ?it/s]C:\Users\weber\miniconda3\envs\jupytertest\lib\site-packages\distributed\worker.py:4606: UserWarning: Large object of size 1.65 MiB detected in task graph:
  (<UDFTask [<class 'libertem.udf.masks.ApplyMasksUD ... 960', 0, False)
Consider scattering large objects ahead of time
with client.scatter to reduce scheduler burden and
keep data on workers

    future = client.submit(func, big_data)    # bad

    big_future = client.scatter(big_data)     # good
    future = client.submit(func, big_future)  # good
  warnings.warn(
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:01<00:00,  4.48it/s]

Reshape and show result

[11]:
reshaped_result = result['intensity'].raw_data.reshape(*ds.shape.nav, size_y, size_x)
[12]:
fig, axes = plt.subplots(1, 2)
axes[0].imshow(data[0, 0])
axes[1].imshow(reshaped_result[0, 0])
[12]:
<matplotlib.image.AxesImage at 0x191684e4ee0>
[ ]: