Source code for flexcalc.analyze

"""
@author: Alex Kostenko
This module contains data analysis routines.
"""

# >>>>>>>>>>>>>>>>>>>>>>>>>>>> Imports >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
import os
import numpy

from scipy import ndimage
from scipy import signal

from skimage.filters import threshold_otsu
from scipy.interpolate import interp1d
    
from flexdata import display
from flexdata import data

from flextomo import phantom
from flextomo import projector
from flextomo import model

from flexdata.data import logger

# >>>>>>>>>>>>>>>>>>>>>>>>>>>> Methods >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[docs]
def get_background(array, mode = 'histogram'):
    '''
    Get the background intensity.
    '''
    # Compute air if needed:
    if mode == 'histogram':
        x, y = histogram(array[::2,::2,::2], log = True, plot = False)
        
        # Make sure there are no 0s:
        y = numpy.log(y + 1)    
        y = ndimage.filters.gaussian_filter1d(y, sigma=1)
        
        # Find air maximum:
        air_index = numpy.argmax(y)
        
        flat = x[air_index]
    
    elif mode == 'sides':    
        
        # Get the intensity of the sides:
        left = array[:, :, 0:10].mean(2)
        right = array[:, :, 0:10].mean(2)
        
        left = left.max(1)
        right = left.max(1)
        
        linfit = interp1d([0,1], numpy.vstack([left, right]).T, axis=1)
        
        flat = linfit(numpy.linspace(0, 1, array.shape[2]))
        flat = flat.astype(array.dtype)
    else:
        logger.error('Unknown mode: ' + mode)
        
    return flat  



[docs]
def histogram(data, nbin = 256, rng = [], plot = True, log = False):
    """
    Compute histogram of the data.
    """
    
    #print('Calculating histogram...')
    data2 = data[::2, ::2, ::2]
    
    if rng == []:
        mi = min(data2.min(), 0)
        
        ma = numpy.percentile(data2, 99.99)
    else:
        mi = rng[0]
        ma = rng[1]

    y, x = numpy.histogram(data2, bins = nbin, range = [mi, ma])
    
    # Set bin values to the middle of the bin:
    x = (x[0:-1] + x[1:]) / 2

    if plot:
        display.plot(x, y, semilogy = log, title = 'Histogram')
    
    return x, y



[docs]
def intensity_range(data):
    """
    Compute intensity range based on the histogram.
    
    Returns:
        a: position of the highest spike (typically air)
        b: 99.99th percentile
        c: center of mass of the histogram
    """
    # 256 bins should be sufficient for our dynamic range:
    x, y = histogram(data, nbin = 256, plot = False)
    
    # Smooth and find the first and the third maximum:
    y = ndimage.filters.gaussian_filter(numpy.log(y + 0.1), sigma = 1)
    
    # Air:
    a = x[numpy.argmax(y)]
    
    # Most of the other stuff:
    b = numpy.percentile(data, 99.99) 
    
    # Compute the center of mass excluding the high air spike +10% and outlayers:
    y = y[(x > a + (b-a)/10) & (x < b)]    
    x = x[(x > a + (b-a)/10) & (x < b)]
          
    c = numpy.sum(y * x) / numpy.sum(y)  
    
    return [a, b, c] 

    

[docs]
def centre(array):
    """
    Compute the centre of the square of mass.
    """
    data2 = array[::2, ::2, ::2].copy().astype('float32')**2
    
    M00 = data2.sum()
            
    return [moment2(data2, 1, 0) / M00 * 2, moment2(data2, 1, 1) / M00 * 2, moment2(data2, 1, 2) / M00 * 2]



[docs]
def moment3(array, order, center = numpy.zeros(3), subsample = 1):
    '''
    Compute 3D image moments $mu_{ijk}$.
    
    Args:
        data(array): 3D dataset
        order(int): order of the moment
        center(array): coordinates of the center
        subsample: subsampling factor - 1,2,4...
        
    Returns:
        float: image moment
    
    '''
    # Create central indexes:
    shape = array.shape
       
    array_ = array[::subsample, ::subsample, ::subsample].copy()
    
    for dim in range(3):
        if order[dim] > 0:
            
            # Define moment:
            m = numpy.arange(0, shape[dim], dtype = numpy.float32)
            m -= center[dim]
                
            data.mult_dim(array_, m[::subsample] ** order[dim], dim)    
            
    return numpy.sum(array_) * (subsample**3)

    

[docs]
def moment2(array, power, dim, centered = True):
    """
    Compute 2D image moments (weighed averages) of the data. 
    
    sum( (x - x0) ** power * data ) 
    
    Args:
        power (float): power of the image moment
        dim (uint): dimension along which to compute the moment
        centered (bool): if centered, center of coordinates is in the middle of array.
        
    """
    # Create central indexes:
    shape = array.shape

    # Index:        
    x = numpy.arange(0, shape[dim])    
    if centered:
        x -= shape[dim] // 2
    
    x **= power
    
    if dim == 0:
        return numpy.sum(x[:, None, None] * array)
    elif dim == 1:
        return numpy.sum(x[None, :, None] * array)
    else:
        return numpy.sum(x[None, None, :] * array)



[docs]
def bounding_box(array):
    """
    Find a bounding box for the volume based on intensity (use for auto_crop).
    """
    # Avoid memory overflow!
    #data = data.copy()
    data2 = array[::2, ::2, ::2].copy().astype('float32')
    data2 = data.bin(data2)
    
    data2[data2 < binary_threshold(data2, mode = 'otsu')] = 0
    
    integral = numpy.float32(data2).sum(0)
    
    # Filter noise:
    integral = ndimage.gaussian_filter(integral, 3)
    mean = numpy.mean(integral[integral > 0])
    integral[integral < mean / 10] = 0
    
    # Compute bounding box:
    rows = numpy.any(integral, axis=1)
    cols = numpy.any(integral, axis=0)
    b = numpy.where(rows)[0][[0, -1]]
    c = numpy.where(cols)[0][[0, -1]]
    
    integral = numpy.float32(data2).sum(1)
        
    # Filter noise:
    integral = ndimage.gaussian_filter(integral, 3)
    mean = numpy.mean(integral[integral > 0])
    integral[integral < mean / 10] = 0
    
    # Compute bounding box:
    rows = numpy.any(integral, axis=1)
    a = numpy.where(rows)[0][[0, -1]]
    
    # Add a safe margin:
    a_int = (a[1] - a[0]) // 20
    b_int = (b[1] - b[0]) // 20
    c_int = (c[1] - c[0]) // 20
    
    a[0] = a[0] - a_int
    a[1] = a[1] + a_int
    b[0] = b[0] - b_int
    b[1] = b[1] + b_int
    c[0] = c[0] - c_int
    c[1] = c[1] + c_int
    
    a[0] = max(0, a[0] * 4)
    a[1] = min(array.shape[0], a[1] * 4)
    
    b[0] = max(0, b[0] * 4)
    b[1] = min(array.shape[1], b[1] * 4)
    
    c[0] = max(0, c[0] * 4)
    c[1] = min(array.shape[2], c[1] * 4)
    
    return a, b, c



[docs]
def binary_threshold(data, mode = 'histogram', threshold = 0):
    '''
    Compute binary threshold. Use 'histogram, 'otsu', or 'constant' mode.
    '''
    
    import matplotlib.pyplot as plt
    
    print('Applying binary threshold...')
    
    if mode == 'otsu':
        threshold = threshold_otsu(data[::2,::2,::2])    
        
    elif mode == 'histogram':
        x, y = histogram(data[::2,::2,::2], log = True, plot = False)
        
        # Make sure there are no 0s:
        y = numpy.log(y + 1)    
        y = ndimage.filters.gaussian_filter1d(y, sigma=1)
        
        plt.figure()
        plt.plot(x, y)
            
        # Find air maximum:
        air_index = numpy.argmax(y)
        
        print('Air found at %0.3f' % x[air_index])
    
        # Find the first shoulder after air peak in the histogram spectrum:
        x = x[air_index:]
        
        yd = abs(numpy.diff(y))
        yd = yd[air_index:]
        y = y[air_index:]
        
        # Minimum derivative = Saddle point or extremum:
        ind = signal.argrelextrema(yd, numpy.less)[0][0]
        min_ind = signal.argrelextrema(y, numpy.less)[0][0]
    
        plt.plot(x[ind], y[ind], '+')
        plt.plot(x[min_ind], y[min_ind], '*')
        plt.show()
        
        # Is it a Saddle point or extremum?
        if abs(ind - min_ind) < 2:    
            threshold = x[ind]         
    
            print('Minimum found next to the air peak at: %0.3f' % x[ind])        
        else:            
            # Move closer to the air peak since we are looking at some other material             
            threshold = x[ind] - abs(x[ind] - x[0]) / 4 
    
            print('Saddle point found next to the air peak at: %0.3f' % x[ind])        
            
    elif mode == 'constant':
        pass        
            
    else: raise ValueError('Wrong mode parameter. Can be histogram or otsu.')
    
    print('Threshold value is %0.3f' % threshold)
    
    return threshold



[docs]
def find_marker(array, geometry, d = 5):
    """
    Find a marker in 3D volume by applying a circular kernel with an inner diameter d [mm].
    """
    # TODO: it fail sometimes when the marker is adjuscent to something...
    
    #data = data.copy()
    # First subsample data to avoid memory overflow:
    data2 = array[::4, ::4, ::4]
    data2[data2 < 0] = 0
    
    r = d / 4
        
    # Get areas with significant density:
    t = binary_threshold(data2, mode = 'otsu')
    threshold = numpy.float32(data2 > t)
    
    # Create a circular kernel (take into account subsampling of data2):
    kernel = -0.5 * phantom.sphere(data2.shape, geometry, r * 2, [0,0,0])
    kernel += phantom.sphere(data2.shape, geometry, r, [0,0,0])

    kernel[kernel > 0] *= (2**3 - 1)
    
    print('Computing feature sizes...')
    
    # Map showing the relative size of feature
    data.convolve_kernel(threshold, kernel)
    
    A = threshold
    
    A[A < 0] = 0
    A /= A.max()
    
    display.max_projection(A, dim = 0, title = 'Feature size.')
    
    print('Estimating local variance...')
    
    # Now estimate the local variance:
    B = ndimage.filters.laplace(data2) ** 2    
    B /= (numpy.abs(data2) + data2.max()/100)
    
    # Make sure that boundaries don't affect variance estimation:
    threshold = threshold == 0
    
    threshold = ndimage.morphology.binary_dilation(threshold)
    
    B[threshold] = 0
    B = numpy.sqrt(B)
    B = ndimage.filters.gaussian_filter(B, 4)
    B /= B.max()
    
    display.max_projection(B, dim = 0, title = 'Variance.')
    
    # Compute final weight:    
    A -= B
    
    # Make it dependent on absolote intensity: (could be dependent on distance from some value....)
    A *= numpy.sqrt(data2)
    #A -= numpy.sqrt((data2 - density)**2 + density / 10)
    
    print('A.max', A.max())
    
    print('A.mean', A[A > 0].mean())
    
    index = numpy.argmax(A)
    
    # Display:
    display.max_projection(A, dim = 0, title = 'Marker map')
    
    # Coordinates:
    a, b, c = numpy.unravel_index(index, A.shape)
    
    # Upsample:
    a *= 4
    b *= 4
    c *= 4
    
    print('Found the marker at:', a, b, c)
    
    return a, b, c

    

[docs]
def moments_orientation(data, subsample = 1):
    '''
    Find the center of mass and the intensity axes of the image.
    
    Args:
        data(array): 3D input
        subsample: subsampling factor to to make it faster
        
    Returns:
        T, R: translation vector to the center of mass and rotation matrix to intensity axes 
    
    '''
    # find centroid:
    m000 = moment3(data, [0, 0, 0], subsample = subsample)
    m100 = moment3(data, [1, 0, 0], subsample = subsample)
    m010 = moment3(data, [0, 1, 0], subsample = subsample)
    m001 = moment3(data, [0, 0, 1], subsample = subsample)

    # Somehow this system of coordinates and the system of ndimage.interpolate require negation of j:
    T = [m100 / m000, m010 / m000, m001 / m000]
    
    # find central moments:
    mu200 = moment3(data, [2, 0, 0], T, subsample = subsample) / m000
    mu020 = moment3(data, [0, 2, 0], T, subsample = subsample) / m000
    mu002 = moment3(data, [0, 0, 2], T, subsample = subsample) / m000
    mu110 = moment3(data, [1, 1, 0], T, subsample = subsample) / m000
    mu101 = moment3(data, [1, 0, 1], T, subsample = subsample) / m000
    mu011 = moment3(data, [0, 1, 1], T, subsample = subsample) / m000
    
    # construct covariance matrix and compute rotation matrix:
    M = numpy.array([[mu200, mu110, mu101], [mu110, mu020, mu011], [mu101, mu011, mu002]])

    #Compute eigen vecors of the covariance matrix and sort by eigen values:
    vec = numpy.linalg.eig(M)[1].T
    lam = numpy.linalg.eig(M)[0]    
    
    # Here we sort the eigen values:
    ind = numpy.argsort(lam)
    
    # Matrix R is composed of basis vectors:
    R = numpy.array(vec[ind[::-1]])
    
    # Makes sure our basis always winds the same way:
    R[2, :] = numpy.cross(R[0, :], R[1, :])     
    
    # Centroid:
    T = numpy.array(T) - numpy.array(data.shape) // 2
    
    return T, R



[docs]
def calibrate_spectrum(projections, volume, geometry, compound = 'Al', density = 2.7, threshold = None, iterations = 1000, n_bin = 10):
    '''
    Use the projection stack of a homogeneous object to estimate system's 
    effective spectrum.
    Can be used by process.equivalent_thickness to produce an equivalent 
    thickness projection stack.
    Please, use conventional geometry. 
    ''' 
    
    #import random
    
    # Find the shape of the object:                                                    
    if threshold is not None:
        t = binary_threshold(volume, mode = 'constant', threshold = threshold)
    else:
        t = binary_threshold(volume, mode = 'otsu')
    segmentation = numpy.float32(volume > t)
        
    display.slice(segmentation, dim=0,title = 'Segmentation')        
        
    # Crop:    
    #height = segmentation.shape[0]   
    #w = 15

    #length = length[height//2-w:height//2 + w, : ,:]    
    
    # Forward project the shape:                  
    print('Calculating the attenuation length.')  
    
    length = numpy.zeros_like(projections)    
    length = numpy.ascontiguousarray(length)
    projector.forwardproject(length, segmentation, geometry)
        
    projections[projections < 0] = 0
    intensity = numpy.exp(-projections)
    
    # Crop to avoid cone artefacts:
    height = intensity.shape[0]//2
    window = 10
    intensity = intensity[height-window:height+window,:,:]
    length = length[height-window:height+window,:,:]
    
    # Make 1D:
    intensity = intensity[length > 0].ravel()
    length = length[length > 0].ravel()
    
    lmax = length.max()
    lmin = length.min()    
    
    print('Maximum reprojected length:', lmax)
    print('Minimum reprojected length:', lmin)
    
    print('Selecting a random subset of points.')  
    
    # Rare the sample to avoid slow times:
    #index = random.sample(range(length.size), 1000000)
    #length = length[index]
    #intensity = intensity[index]
    
    print('Computing the intensity-length transfer function.')
    
    # Bin number for lengthes:
    bin_n = 128
    bins = numpy.linspace(lmin, lmax, bin_n)
    
    # Sample the midslice:
    #segmentation = segmentation[height//2-w:height//2 + w, : ,:]    
    #projections_ = projections[height//2-w:height//2 + w, : ,:]
    
    
    #import flexModel
    #ctf = flexModel.get_ctf(length.shape[::2], 'gaussian', [1, 1])
    #length = flexModel.apply_ctf(length, ctf)  
            
    # TODO: Some cropping might be needed to avoid artefacts at the edges
    
    #flexUtil.slice(length, title = 'length sinogram')
    #flexUtil.slice(projections_, title = 'apparent sinogram')
        
    # Rebin:
    idx  = numpy.digitize(length, bins)
    
    # Rebin length and intensity:        
    length_0 = bins + (bins[1] - bins[0]) / 2
    intensity_0 = [numpy.median(intensity[idx==k]) for k in range(bin_n)]
    
    # In case some bins are empty:
    intensity_0 = numpy.array(intensity_0)
    length_0 = numpy.array(length_0)
    length_0 = length_0[numpy.isfinite(intensity_0)]
    intensity_0 = intensity_0[numpy.isfinite(intensity_0)]
    
    # Get rid of tales:
    rim = len(length_0) // 20
    length_0 = length_0[rim:-rim]    
    intensity_0 = intensity_0[rim:-rim]    
    
    # Dn't trust low counts!
    length_0 = length_0[intensity_0 > 0.05]
    intensity_0 = intensity_0[intensity_0 > 0.05]
    
    # Get rid of long rays (they are typically wrong...)   
    #intensity_0 = intensity_0[length_0 < 35]    
    #length_0 = length_0[length_0 < 35]    
    
    # Enforce zero-one values:
    length_0 = numpy.insert(length_0, 0, 0)
    intensity_0 = numpy.insert(intensity_0, 0, 1)
    
    #flexUtil.plot(length_0, intensity_0, title = 'Length v.s. Intensity')
        
    print('Intensity-length curve rebinned.')
        
    print('Computing the spectrum by Expectation Maximization.')
    
    volts = geometry.description.get('voltage')
    if not volts: volts = 100
    
    energy = numpy.linspace(5, max(100, volts), n_bin)
    
    mu = model.linear_attenuation(energy, compound, density)
    exp_matrix = numpy.exp(-numpy.outer(length_0, mu))
    
    # Initial guess of the spectrum:
    spec = model.bremsstrahlung(energy, volts) 
    spec *= model.scintillator_efficiency(energy, 'Si', rho = 5, thickness = 0.5)
    spec *= model.total_transmission(energy, 'H2O', 1, 1)
    spec *= energy
    spec /= spec.sum()
    
    #spec = numpy.ones_like(energy)
    #spec[0] = 0
    #spec[-1] = 0
    
    norm_sum = exp_matrix.sum(0)
    spec0 = spec.copy()
    #spec *= 0
    
    # exp_matrix at length == 0 is == 1. Sum of that is n_bin
    
    # EM type:   
    for ii in range(iterations): 
        frw = exp_matrix.dot(spec)

        epsilon = frw.max() / 100
        frw[frw < epsilon] = epsilon
        
        spec = spec * exp_matrix.T.dot(intensity_0 / frw) / norm_sum
        
        # Make sure that the total count of spec is 1 - that means intensity at length = 0 is equal to 1
        spec = spec / spec.sum()
        
    print('Spectrum computed.')
        
    #flexUtil.plot(length_0, title = 'thickness')
    #flexUtil.plot(mu, title = 'mu')
    #flexUtil.plot(_intensity, title = 'synth_counts')
    
    # synthetic intensity for a check:
    _intensity = exp_matrix.dot(spec)
    
    import matplotlib.pyplot as plt
    
    # Display:   
    plt.figure()
    plt.semilogy(length[::200], intensity[::200], 'b.', lw=4, alpha=.8)
    plt.semilogy(length_0, intensity_0, 'g--')
    plt.semilogy(length_0, _intensity, 'r-', lw=3, alpha=.6)
    
    #plt.scatter(length[::100], -numpy.log(intensity[::100]), color='b', alpha=.2, s=4)
    plt.axis('tight')
    plt.title('Log intensity v.s. absorption length.')
    plt.legend(['raw','binned','solution'])
    plt.show() 
    
    # Display:
    plt.figure()
    plt.plot(energy, spec, 'b')
    plt.plot(energy, spec0, 'r:')
    plt.axis('tight')
    plt.title('Calculated spectrum')
    plt.legend(['computed','initial guess'])
    plt.show() 
            
    
    return energy, spec