Source code for sap.utils

#!/usr/bin/env python
# file utils.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 18 mars 2020
"""
Utils
=====

Various utilities unrelated to trees or profiles.

"""

import numpy as np

[docs]def ndarray_hash(x, l=8, c=1000): """ Compute a hash from a numpy array. Parameters ---------- x : ndarray The array to hash. l : int, optional The length of the hash. Must be an even number. c : int, optional A variable to affect the sampling of the hash. It has to be the same along the matching process. Refer to notes. Returns ------- hash : str The hash of array x. Notes ----- Python hash is slow and will offset the random generator in each kernel. The hash of the same data will not match in different kernels. The idea is to sparsely sample the data to speed up the hash computation. By fixing the number of samples the hash computation will take a fixed amount of time, no matter the size of the data. This hash function output a hash of :math:`x` in hexadecimal. The length of the hash is :math:`l`. The hashes are consistent when tuning the length :math:`l`: shorter hashes are contained in the longer ones for the same data :math:`x`. The samples count taken in :math:`x` is :math:`\\frac{l \\times c}{2}`. """ rs = np.random.RandomState(42) x = np.require(x, requirements='C') bt = np.frombuffer(x, np.uint8) ss = rs.choice(bt, int(l / 2) * c).reshape(-1, c).sum(1, np.uint8) return ''.join(['{:02x}'.format(x) for x in ss])
[docs]def local_patch(arr, patch_size=7): """ Create local patches around each value of the array Parameters ---------- arr : ndarray The input data. patch_size : int The size :math:`w` of the patches. For a 2D nadarray the returned patch size will be :math:`w \\times w`. Returns ------- patches : ndarray The local patches. The shape of the returned array is ``arr.shape + (patch_size,) * arr.ndim``. Notes ----- This implementation is memory efficient. The returned patches are a view of original array and are not writeable. This function works regardless of the dimension of ``arr`` with hypercubes shaped patches, according to the dimension of ``arr``. See Also -------- local_patch_f : use a function over the local patches. """ a = np.pad(arr, int(patch_size / 2), 'reflect') shape = tuple(np.array(a.shape) - patch_size + 1) + (patch_size,) * a.ndim strides = a.strides * 2 return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides, writeable=False)
[docs]def local_patch_f(arr, patch_size=7, f=np.mean): """local_patch_f(arr, patch_size=7, f=np.mean) Describe local patches around each value of the array Parameters ---------- arr : ndarray The input data. patch_size : int The size :math:`w` of the patches. f : function The function to run over the local patches. For now it is necessary to use a function with ``axis`` parameter such as ``np.mean``, ``np.std``, etc... See more functions on `Numpy documentation <https://docs.scipy.org/doc/numpy/reference/routines.statistics.html>`_. Returns ------- patches : ndarray The description of the local patches. The shape of the returned array is ``arr.shape``. Notes ----- Refer to :func:`local_patch` for full documentation. See Also -------- local_patch : create the local patches. """ n = local_patch(arr, patch_size) return f(n, axis=tuple(~(np.arange(arr.ndim))))