Source code for pywsi.normalization.macenko

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import matplotlib
matplotlib.use('Agg', warn=False)
import matplotlib.pyplot as plt

from ..io.operations import read_as_rgb
from .color_conversion import RGB2OD, OD2RGB, get_nonwhite_mask, normalize_rows
import numpy as np
from numpy import linalg as LA
import six
import spams


[docs]class MacenkoNormalization(object): def __init__(self, beta=0.15, alpha=1, lambda1=0.01, lambda2=0.01, gamma1=0.01, maskout_white=True, nonwhite_threshold=0.8): """Implementation of Macenko's method. See: http://wwwx.cs.unc.edu/~mn/sites/default/files/macenko2009.pdf Input: RGB Slide 1. Convert RGB to OD 2. Remove data with OD intensity less than beta 3. Calculate SVD on the OD tuples 4. Create plane from the SVD directions corresponding to the two largest singular values 5. Project data onto the plane, and normalize to unit length 6. Calculate angle of each point wrt the first SVD direction 7. Find robust extremes (alpha th and (100 - alpha)th percentiles) of the angle 8. Convert extreme values back to OD space More formally, consider I is a n x 3 matrix (n rows, 3 columns for RGB), we look for two matrixes C and S such that: C = n x 2 : Matrix of Concentrations in Channel H[1] and Channel E[2] S = 2 x 3 : Matrix of Stain vectors, 3 channels for H and 3 channels for E This is achieved by doing a non negative matrix factorization of I min_{C,S} ||I-CS||_2^2 """ self.beta = beta self.alpha = alpha self.lambda1 = lambda1 self.lambda2 = lambda2 self.gamma1 = gamma1 self.OD = None self.maskout_white = maskout_white self.nonwhite_threshold = nonwhite_threshold
[docs] def fit(self, target_image): """Fit attributes to target image. Parameters ---------- target_image: """ if isinstance(target_image, six.string_types): target_image = read_as_rgb(target_image) self.h, self.w, self.c = target_image.shape self.target_stain_matrix = self.get_stain_matrix(target_image) self.target_concentrations = self.get_concentrations( target_image, self.target_stain_matrix)
[docs] def get_stain_matrix(self, source_image): """ OD = SV. Get V Parameters ---------- source_image: array_like np.unit8 array of rgb values Returns ------- stain_matrix: array_like 2 x M matrix for a N x M matrix """ OD = RGB2OD(source_image) OD = OD.reshape((-1, 3)) if self.maskout_white: nonwhite_mask = get_nonwhite_mask( source_image, self.nonwhite_threshold).reshape((-1, )) OD = OD[nonwhite_mask] OD = (OD[(OD > self.beta).any(axis=1), :]) self.OD = OD # do-PCA OD_cov = np.cov(OD, rowvar=False) w, v = LA.eigh(OD_cov) # Project OD into first two directions v_first_two = v[:, [1, 2]] projected = np.dot(OD, v_first_two) # find the min and max vectors and project back to OD space angle = np.arctan2(projected[:, 1], projected[:, 0]) min_angle = np.percentile(angle, self.alpha) max_angle = np.percentile(angle, 100 - self.alpha) Vmin = np.dot(v_first_two, np.array([np.cos(min_angle), np.sin(min_angle)])) Vmax = np.dot(v_first_two, np.array([np.cos(max_angle), np.sin(max_angle)])) # For the sake of consistency, # we assume that the first vector with more concentation is the # Hematoxylin vector. The motivation for this is our visual understanding # that the purple Hematoxylin generally appears to be dominant over # the pink Eosin. if Vmax[0] < 0: Vmax[0] *= -1 if Vmin[0] < 0: Vmin[0] *= -1 if Vmax[0] > Vmin[0]: HE = np.array([Vmax, Vmin]) else: HE = np.array([Vmin, Vmax]) HE = HE / np.linalg.norm(HE, axis=1)[:, None] return HE
[docs] def get_concentrations(self, image, stain_matrix): """Get concentration matrix. Parameters ---------- image: array_like rgb Returns ------- concentration: array_like N x 2 matrix for an N x M case. """ OD = RGB2OD(image).reshape((-1, 3)) if self.maskout_white: nonwhite_mask = get_nonwhite_mask( image, self.nonwhite_threshold).reshape((-1, )) OD = OD[nonwhite_mask] coefs = spams.lasso( OD.T, D=stain_matrix.T, mode=2, lambda1=self.lambda1, pos=True).toarray().T return coefs
[docs] def transform(self, source_images): """Transform source image to target. Parameters ---------- source_image: list(array_like) np.unit8 rgb input Returns ------- reconstructed: array_like np.uint8 transformed image """ if not isinstance(source_images, list): source_images = [source_images] normalized_images = [] for source_image in source_images: source_stain_matrix = self.get_stain_matrix(source_image) source_concentrations = self.get_concentrations( source_image, source_stain_matrix) maxC_source = np.percentile( source_concentrations, 99, axis=0).reshape((1, 2)) maxC_target = np.percentile( self.target_concentrations, 99, axis=0).reshape((1, 2)) source_concentrations *= (maxC_target / maxC_source) reconstructed = np.dot(source_concentrations, self.target_stain_matrix).reshape( source_image.shape) reconstructed = OD2RGB(reconstructed).reshape( source_image.shape).astype(np.uint8) normalized_images.append(reconstructed) return normalized_images
[docs] def get_hematoxylin_channel(self, source_image=None, total=True): """Get hematoxylin channel concentration of source or target image. If source image is providedd, then the returned value represents the target stain vector remains the same, the source image concentration is then calculated by an inverse calculation C = IS^{-1} If no source image is provided, it simply returns the concentration channel of the target image, first column in this case. """ if source_image is not None: h, w, _ = source_image.shape stain_matrix_source = self.get_stain_matrix(source_image) concentrations = self.get_concentrations(source_image, stain_matrix_source) else: h, w, _ = self.h, self.w, self.c concentrations = self.target_concentrations if total: H = np.multiply( np.array(concentrations[:, 0], ndmin=2).T, np.reshape(self.target_stain_matrix[0, :], (1, 3))).reshape( h, w, 3) else: H = concentrations[:, 0].reshape(h, w) H = OD2RGB(H) return H
[docs] def get_eosin_channel(self, source_image=None, total=True): """Get eosin channel concentration of source or target image. If source image is providedd, then the returned value represents the target stain vector remains the same, the source image concentration is then calculated by an inverse calculation C = IS^{-1} If no source image is provided, it simply returns the concentration channel of the target image, second column in this case. """ if source_image is not None: h, w, _ = source_image.shape stain_matrix_source = self.get_stain_matrix(source_image) concentrations = self.get_concentrations(source_image, stain_matrix_source) else: h, w, _ = self.h, self.w, self.c concentrations = self.target_concentrations if total: E = np.multiply( np.array(concentrations[:, 1], ndmin=2).T, np.reshape(self.target_stain_matrix[1, :], (1, 3))).reshape( h, w, 3) else: E = concentrations[:, 1].reshape(h, w) E = OD2RGB(E) return E
[docs] def get_hematoxylin_stain(self, vis=True): """ Get the 3 channel values belonging to the hematoxylin stain Parameters ---------- vis: bool Should visualize? Returns ------- H: array_like A length 3 vector of RGB values """ H = self.target_stain_matrix[0, :] H = OD2RGB(H) if vis: fig, ax = plt.subplots(figsize=(8, 8)) ax.set_axis_off() ax.plot([0, 1], [1, 1], c=H / 255, linewidth=40) ax.set_aspect('equal') fig.tight_layout() return H
[docs] def get_eosin_stain(self, vis=True): """ Get the 3 channel values belonging to the eosin stain Parameters ---------- vis: bool Should visualize? Returns ------- E: array_like A length 3 vector of RGB values """ E = self.target_stain_matrix[1, :] E = OD2RGB(E) if vis: fig, ax = plt.subplots(figsize=(8, 8)) ax.set_axis_off() ax.set_aspect('equal') ax.plot([0, 1], [1, 1], c=E / 255, linewidth=40) fig.tight_layout() return E
[docs] def get_both_stains(self, vis=True): """ Get the 3 channel values belonging to both stainsn Parameters ---------- vis: bool Should visualize? """ H = self.target_stain_matrix[0, :] H = OD2RGB(H) E = self.target_stain_matrix[1, :] E = OD2RGB(E) if vis: fig = plt.figure(figsize=(8, 4)) ax = plt.subplot(121) ax.set_axis_off() ax.set_aspect('equal') ax.plot([0, 1], [1, 1], c=H / 255, linewidth=40) ax = plt.subplot(122) ax.set_axis_off() ax.set_aspect('equal') ax.plot([0, 1], [1, 1], c=E / 255, linewidth=40) fig.tight_layout()