Source code for pwspy.dataTypes._other

# Copyright 2018-2020 Nick Anthony, Backman Biophotonics Lab, Northwestern University
#
# This file is part of PWSpy.
#
# PWSpy is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# PWSpy is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with PWSpy.  If not, see <https://www.gnu.org/licenses/>.

# -*- coding: utf-8 -*-
"""
Created on Sat Feb  9 15:57:52 2019

@author: Nick Anthony
"""
from __future__ import annotations
import json
import logging
import os
import dataclasses
from enum import Enum, auto
from glob import glob
import h5py
import numpy as np
from scipy import io as spio
from shapely import geometry, wkb
import cv2
from rasterio import features
import shapely
import typing as t_
import copy
import pwspy.dataTypes._metadata as metadata


[docs]@dataclasses.dataclass(frozen=True) class CameraCorrection: """This class represents all the information needed to correct camera related hardware defects in our data. This includes a dark count value (The counts registered when no light is incident on the camera. It also includes a polynomial that is used to linearize the counts. E.G. if you image something over a range of exposure times you would expect the measured counts to be proportional to the exposure time. However on some cameras this is not the case. Attributes: darkCounts: Dark count for a single pixel of the camera. This will be subtracted from the data in pre-processing. When binning is used the dark counts are summed together, so if you measure a dark count of 400 with 2x2 binning then the single pixel dark count is 100. linearityPolynomial: Sequence of polynomial coefficients [a,b,c,etc...] in the order a*x + b*x^2 + c*x^3 + etc... Used to linearize the counts from the camera so that they are linearly proportional to the image brightness. This can generally be left as `None` for sCMOS cameras but it is often required for CCD type cameras. """ darkCounts: float linearityPolynomial: t_.Sequence[float, ...] = None def __post_init__(self): # This code runs right after the built-in dataclass initializer runs. # Force the linearity polynomial to be a tuple. if self.linearityPolynomial is not None: object.__setattr__(self, 'linearityPolynomial', tuple(self.linearityPolynomial)) assert isinstance(self.linearityPolynomial, tuple)
[docs] def toJsonFile(self, filePath: str): """ Save the camera correction to a JSON formatted text file. Args: filePath: The file path for the new JSON file. """ if os.path.splitext(filePath)[-1] != '.json': filePath = filePath + '.json' with open(filePath, 'w') as f: json.dump(dataclasses.asdict(self), f)
[docs] @classmethod def fromJsonFile(cls, filePath: str) -> CameraCorrection: """ Load the camera correction from a json text file. Args: filePath: The file path of the JSON file to load from. Returns: A new instance of `CameraCorrection`. Examples: corr = CameraCorrection.fromJsonFile('~/Desktop/camera.json') """ with open(filePath, 'r') as f: return cls(**json.load(f))
[docs]class Roi: """This class represents a single Roi used to select a specific region of an image. The Roi consists of a `mask` (a boolean array specifying which pixels are included in the Roi), a set of of `vertices` (a 2 x N array specifying the vertices of the polygon enclosing the mask, this is useful if you want to adjust the Roi later. Rather than calling the constructor directly you will generally create one of these objects through one of the `class methods` that construct one for you. Args: mask: A 2D boolean array where the True values indicate pixels that are within the ROI. verts: Can be a sequence of 2D (x, y) coordinates indicating the border of the ROI or a shapely `Polygon`. If an array of coordinates is used then it will be converted to the shell of a shapely polygon internally. While this information is partially redundant with the mask it is useful for many applications and can be complicated to calculate from `mask`. """ def __init__(self, mask: np.ndarray, verts: t_.Union[np.ndarray, geometry.Polygon]): assert isinstance(mask, np.ndarray), f"Mask data is of type: {type(mask)}. Must be numpy array." assert len(mask.shape) == 2 assert mask.dtype == np.bool self.polygon: geometry.Polygon if isinstance(verts, geometry.MultiPolygon): # I'm not sure how but it is possible to get a multipolygon. In this case just select out the biggest polygon. verts = verts[0] if isinstance(verts, geometry.Polygon): self.polygon = verts else: assert len(verts.shape) == 2 assert verts.shape[1] == 2 self.polygon = geometry.Polygon(shell=verts) self.polygon = self.polygon.buffer(0) # This little trick `normalizes` the format of the polygon so that holes will plot properly. https://gis.stackexchange.com/questions/374001/plotting-shapely-polygon-with-holes-does-not-plot-all-holes self.mask = mask @property def verts(self) -> np.ndarray: """An array of vertices for the outer ring of the polygon. For most ROIs they only have an outer ring anyway.""" return np.array(self.polygon.exterior.coords)
[docs] @classmethod def fromVerts(cls, verts: np.ndarray, dataShape: t_.Tuple[float, float]) -> Roi: """ Automatically generate the mask for an Roi using just the vertices of an enclosing polygon. Args: verts: A sequence of 2D (x, y) coordinates indicating the border of the ROI. dataShape: A tuple giving the shape of the array that this Roi is associated with. Returns: A new instance of `Roi` Examples: myRoi = Roi.fromVerts('nucleus', 1, polyVerts, (1024, 1024)) """ assert isinstance(verts, np.ndarray) assert isinstance(dataShape, tuple) assert len(dataShape) == 2 assert verts.shape[1] == 2 assert len(verts.shape) == 2 iVerts = np.rint([verts]).astype(np.int32) # The brackets here convert to a 3d array which is what cv2.fillpoly expects. We have to round to integers for cv2 to work. mask = np.zeros(dataShape, dtype=np.int32) cv2.fillPoly(mask, iVerts, 1) mask = mask.astype(bool) return cls(mask, verts)
[docs] @classmethod def fromMask(cls, mask: np.ndarray) -> Roi: """ Use rasterio to create find the vertices of a mask. Args: mask: A boolean array. The mask have only one contiguous `True` region Returns: A new instance of `Roi` TODO: This function doesn't work properly if there is a `False` region of `mask` completely enclosed by a `True` region of `mask`. """ all_polygons = [] for shape, value in features.shapes(mask.astype(np.uint8), mask=mask): all_polygons.append(shapely.geometry.shape(shape)) poly = sorted(all_polygons, key=lambda ply: ply.area)[-1] # Return the biggest found polygon return cls(mask=mask, verts=poly)
[docs] def transform(self, matrix: np.ndarray) -> Roi: """Return a copy of this Roi that has been transformed by an affine transform matrix like the one returned by opencv.estimateRigidTransform. This can be obtained using the functions in the utility.machineVision module. Args: matrix: A 2x3 numpy array representing an affine transformation. Returns: A new instance of Roi representing this Roi after transformation. """ mask = cv2.warpAffine(self.mask.astype(np.uint8), matrix, self.mask.shape).astype(np.bool) verts = cv2.transform(self.verts[None, :, :], matrix)[0, :, :] # For some reason this needs to be 3d for opencv to work. return Roi(mask=mask, verts=verts)
[docs]class RoiFile: """This class represents a single Roi File used to save and load an ROI. Each Roi File is identified by a `name` and a `number`. The recommended file format is HDF2, in this format multiple rois of the same name but differing numbers can be saved in a single HDF file. Args: name: The name used to identify this ROI. Multiple ROIs can share the same name but must have unique numbers. number: The number used to identify this ROI. Each ROI with the same name must have a unique number. roi: The ROI object associated with this file. filePath: The path to the file that this object was loaded from. fileFormat: The format of the file that this object was loaded from. acquisition: The acquisition object that this ROI belongs to. """
[docs] class FileFormats(Enum): """An enumerator of the different file formats that an ROI can be saved to.""" MAT = auto() # The oldest file format. Each ROI was saved to its own matlab .mat file as a boolean mask. HDF = auto() # This was originally the default file format of this Python software. Each ROI of the same name was saved as a dataset in an HDF file. The dataset contained the boolean mask. HDF2 = auto() # For a long time this was the default. Each ROI of the same name is saved as an H5PY.Group in an HDF file. Each ROI group contains a dataset for the boolean mask as well as a dataset for the XY coordinates of the enclosing polygon. This saves us from having to constantly recalculate the outline of the ROI for processing purposes. HDF3 = auto() # On 3/26/2021 We switched to this from HDF2. Rather than verts we now store 'wkb' of the underlying shapely file. Allow for ROIs with holes, and other more complex situations.
def __init__(self, name: str, number: int, roi: Roi, filePath: str, fileFormat: RoiFile.FileFormats, acquisition: metadata.Acquisition): self._roi = roi self.name = name self.number = number self.filePath = filePath self.fformat = fileFormat self.acquisition = acquisition self.isDeleted = False
[docs] def getRoi(self) -> Roi: """Return the ROI object associated with this file. Returns: The `Roi` object containing geometry information. """ return copy.deepcopy(self._roi) # Rois are mutable so return a copy.
def __repr__(self): return f"RoiFile({self.name}, {self.number})"
[docs] @staticmethod def getValidRoisInPath(path: str) -> t_.List[t_.Tuple[str, int, RoiFile.FileFormats]]: """Search the `path` for valid roiFile files and return the detected rois as a list of tuple where each tuple contains the `name`, `number`, and file format for the Roi. Args: path: The path to the folder containing the Roi files. Returns: A list of tuples containing: name: The detected Roi name number: The detected Roi number fformat: The file format of the file that the Roi is stored in """ patterns = [('BW*_*.mat', RoiFile.FileFormats.MAT), ('ROI_*.h5', RoiFile.FileFormats.HDF)] files = {fformat: glob(os.path.join(path, p)) for p, fformat in patterns} # Lists of the found files keyed by file format ret = [] for fformat, fileNames in files.items(): if fformat == RoiFile.FileFormats.HDF: # Could still technically be HDF2 or HDF3 for i in fileNames: with h5py.File(i, 'r') as hf: # making sure to open this file in read mode makes the function way faster! for g in hf.keys(): if isinstance(hf[g], h5py.Group): # HDF3 or HDF2 file format if 'fileFormat' in hf[g].attrs: # HDF3 format assert 'wkb' in hf[g] assert 'ROI_' in i name = i.split("ROI_")[-1][:-3] try: ret.append((name, int(g), RoiFile.FileFormats.HDF3)) except ValueError: logging.getLogger(__name__).warning(f"File {i} contains uninterpretable dataset named {g}") else: # HDF2 did not have this fileformat attribute if 'mask' in hf[g] and 'verts' in hf[g]: assert 'ROI_' in i name = i.split("ROI_")[-1][:-3] try: ret.append((name, int(g), RoiFile.FileFormats.HDF2)) except ValueError: logging.getLogger(__name__).warning(f"File {i} contains uninterpretable dataset named {g}") else: raise ValueError("File is missing datasets") elif isinstance(hf[g], h5py.Dataset): # Legacy format assert 'roi_' in i name = i.split('roi_')[-1][:-3] # Old files used lower case rather than "ROI_" try: ret.append((name, int(g), RoiFile.FileFormats.HDF)) except ValueError: logging.getLogger(__name__).warning(f"File {i} contains uninterpretable dataset named {g}") elif fformat == RoiFile.FileFormats.MAT: for i in fileNames: # list in files i = os.path.split(i)[-1] if len(i.split("_")) != 2: # Some old data has files that are not ROIs but are named almost identically, this helps us avoid bugs with them. continue num = int(i.split('_')[0][2:]) name = i.split('_')[1][:-4] ret.append((name, num, RoiFile.FileFormats.MAT)) return ret
[docs] @staticmethod def deleteRoi(directory: str, name: str, number: int, fformat: t_.Optional[RoiFile.FileFormats] = None): """Delete the dataset associated with the Roi object specified by `name` and `num`. Args: directory: The path to the folder containing the Roi file. name: The name used to identify this ROI. number: The number used to identify this ROI. fformat: The format of the file. Raises: FileNotFoundError: If the file isn't found. """ assert os.path.isdir(directory) if fformat is RoiFile.FileFormats.MAT: path = os.path.join(directory, f"BW{number}_{name}.mat") elif fformat in [RoiFile.FileFormats.HDF, RoiFile.FileFormats.HDF2, RoiFile.FileFormats.HDF3]: path = os.path.join(directory, f"ROI_{name}.h5") elif fformat is None: # AutoDetect the file format try: RoiFile.deleteRoi(directory, name, number, fformat=RoiFile.FileFormats.MAT) return except FileNotFoundError: try: RoiFile.deleteRoi(directory, name, number, fformat=RoiFile.FileFormats.HDF) return except FileNotFoundError as e: raise e else: raise Exception(f"fformat of {fformat} is not accepted") if not os.path.exists(path): raise FileNotFoundError(f"The ROI file {name},{number} and format {fformat} was not found in {directory}.") if fformat in [RoiFile.FileFormats.HDF, RoiFile.FileFormats.HDF2, RoiFile.FileFormats.HDF3]: with h5py.File(path, 'a') as hf: if np.string_(str(number)) not in hf.keys(): raise ValueError(f"The file {path} does not contain ROI number {number}.") del hf[np.string_(str(number))] remaining = len(list(hf.keys())) if remaining == 0: # If the file is empty then remove it. os.remove(path) elif fformat is RoiFile.FileFormats.MAT: os.remove(path) else: raise Exception("Programming error.")
[docs] @classmethod def fromHDF_legacy_legacy(cls, directory: str, name: str, number: int, acquisition: metadata.Acquisition = None) -> RoiFile: """Load an Roi from an older version of the HDF file format which did not include the vertices parameter. Args: directory: The path to the directory containing the HDF file. name: The name used to identify this ROI. number: The number used to identify this ROI. Raises: OSError: If the file was not found Returns: A new instance of Roi loaded from file """ path = os.path.join(directory, f'ROI_{name}.h5') if not os.path.exists(path): raise OSError(f"File {path} does not exist.") with h5py.File(path, 'r') as hf: roi = Roi.fromMask(np.array(hf[str(number)]).astype(np.bool)) return cls(name, number, roi, filePath=path, fileFormat=RoiFile.FileFormats.HDF, acquisition=acquisition)
[docs] @classmethod def fromHDF_legacy(cls, directory: str, name: str, number: int, acquisition: metadata.Acquisition = None) -> RoiFile: """Load an Roi from an HDF file. Uses the old HDF2 format. Args: directory: The path to the directory containing the HDF file. name: The name used to identify this ROI. number: The number used to identify this ROI. Raises: OSError: If the file was not found Returns: A new instance of Roi loaded from file Examples: myRoi = Roi.fromHDF('~/Desktop', 'nucleus', 1) """ path = os.path.join(directory, f'ROI_{name}.h5') if not os.path.exists(path): raise OSError(f"File {path} does not exist.") with h5py.File(path, 'r') as hf: dset = hf[str(number)] verts = dset['verts'] if verts.shape is None: roi = Roi.fromMask(np.array(dset['mask']).astype(np.bool)) # Some old files could be saved without verts. allow loading them. else: roi = Roi(np.array(dset['mask']).astype(np.bool), verts=np.array(verts)) return cls(name, number, roi, filePath=path, fileFormat=RoiFile.FileFormats.HDF2, acquisition=acquisition)
[docs] @classmethod def fromHDF(cls, directory: str, name: str, number: int, acquisition: metadata.Acquisition = None) -> RoiFile: """Load an Roi from the newest ROI format of HDF file. Args: directory: The path to the directory containing the HDF file. name: The name used to identify this ROI. number: The number used to identify this ROI. Raises: OSError: If the file was not found Returns: A new instance of Roi loaded from file Examples: myRoi = Roi.fromHDF('~/Desktop', 'nucleus', 1)""" path = os.path.join(directory, f'ROI_{name}.h5') if not os.path.exists(path): raise OSError(f"File {path} does not exist.") with h5py.File(path, 'r') as hf: group = hf[str(number)] assert 'fileFormat' in group.attrs, "No fileFormat attribute found for the ROI file. Try using one of the legacy ROI loading methods." assert group.attrs['fileFormat'] == RoiFile.FileFormats.HDF3.name, f'Only HDF3 format is supported by this loading method, not {group.attrs["fileFormat"]}' wkbBytes = bytes(group['wkb'][()]) polygon = wkb.loads(wkbBytes) mask = np.array(group['mask']).astype(np.bool) roi = Roi(mask, verts=polygon) return cls(name, number, roi, filePath=path, fileFormat=RoiFile.FileFormats.HDF3, acquisition=acquisition)
[docs] @classmethod def fromMat(cls, directory: str, name: str, number: int, acquisition: metadata.Acquisition = None) -> RoiFile: """Load an Roi from a .mat file saved in matlab. This file format is not recommended as it does not include the `vertices` parameter which is useful for visually rendering and readjusting the Roi. Args: directory: The path to the directory containing the HDF file. name: The name used to identify this ROI. number: The number used to identify this ROI. Returns: A new instance of Roi loaded from file """ filePath = os.path.join(directory, f'BW{number}_{name}.mat') spFile = spio.loadmat(filePath) if 'BW' in spFile.keys(): mask = spFile['BW'].astype(np.bool) elif 'mask' in spFile.keys(): mask = spFile['mask'].astype(np.bool) else: raise KeyError(f"A `mask` was not found in the `mat` file: {filePath}") roi = Roi.fromMask(mask) return cls(name, number, roi, filePath=filePath, fileFormat=RoiFile.FileFormats.MAT, acquisition=acquisition)
[docs] @classmethod def loadAny(cls, directory: str, name: str, number: int, acquisition: metadata.Acquisition = None) -> RoiFile: """Attempt loading any of the known file formats. Args: directory: The path to the directory containing the HDF file. name: The name used to identify this ROI. number: The number used to identify this ROI. Returns: A new instance of Roi loaded from file """ try: return RoiFile.fromHDF(directory, name, number, acquisition=acquisition) except: try: return RoiFile.fromHDF_legacy(directory, name, number, acquisition=acquisition) except: try: return RoiFile.fromHDF_legacy_legacy(directory, name, number, acquisition=acquisition) except OSError: # For backwards compatibility purposes return RoiFile.fromMat(directory, name, number, acquisition=acquisition)
[docs] @classmethod def toHDF(cls, roi: Roi, name: str, number: int, directory: str, overwrite: t_.Optional[bool] = False, acquisition: metadata.Acquisition = None) -> RoiFile: """ Save the Roi to an HDF file in the specified directory. The filename is automatically chosen based on the `name` parameter of the Roi. Multiple Roi's with the same `name` will be saved into the same file if they have differing `number` parameters. If `overwrite` is true then any existing dataset will be replaced, otherwise an error will be raised. Args: roi: The ROI to save. name: The name name to save as. This will be part of the file name number: The ROI number to save as. Multiple ROIS of the same name can be saved to the same file but the numbers must be unique directory: The path of the folder to save the new HDF file to. The file will be named automatically based on the `name` attribute of the Roi overwrite: If True then if an Roi with the same `number` as this Roi is found it will be overwritten. """ savePath = os.path.join(directory, f'ROI_{name}.h5') numStr = np.string_(str(number)) mask = roi.mask.astype(np.uint8) with h5py.File(savePath, 'a') as hf: if numStr in hf.keys(): if overwrite: del hf[numStr] else: raise OSError(f"The Roi file {savePath} already contains a dataset {number}") g = hf.create_group(numStr) g.attrs['fileFormat'] = RoiFile.FileFormats.HDF3.name g.create_dataset(np.string_("wkb"), data=np.void(roi.polygon.wkb)) # np.void is required here so we can save a byte array with `null` in it. g.create_dataset(np.string_("mask"), data=mask, compression=5) return cls(name, number, roi, filePath=savePath, fileFormat=RoiFile.FileFormats.HDF2, acquisition=acquisition)
[docs] def delete(self): """ Delete the dataset associated with the Roi object. """ self.deleteRoi(os.path.split(self.filePath)[0], self.name, self.number) self.isDeleted = True
[docs] def update(self, roi: Roi): """ Save a new roiFile to the existing file. Args: roi: The updated ROI to save """ assert not self.isDeleted if self.fformat not in [RoiFile.FileFormats.HDF2, RoiFile.FileFormats.HDF3]: raise NotImplementedError(f"RoiFile of format: {self.fformat} cannot be updated.") self.toHDF(roi, self.name, self.number, os.path.split(self.filePath)[0], overwrite=True) self._roi = copy.deepcopy(roi) # We don't wont to use the same object that might still have external mutable references