Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Convert_Wyko_ASCII.py 7.92 KiB
__author__ = "Erwan Plougonven"
import os
import sys
import re
import numpy as np
from scipy.ndimage import generic_filter
from scipy.interpolate import griddata
from skimage.morphology import remove_small_objects
from skimage.restoration import inpaint
from PIL import Image

def main():
    hfld, ifld = extract_Wyko_ASCII_data("profile.asc")
    hfld = removePeaks(hfld) # Remove Dirac noise
    hfld = fill_novals(hfld) # Fill undefined values
    save_PNG(hfld,"profile.png")


##########################
### Internal functions ###
##########################
# Extract data from file
def extract_Wyko_ASCII_data(input_file):
    allLines = [line.rstrip('\r\n') for line in open(input_file)]# if len(line)>0]

    dataFormatKey = "Wyko ASCII Data File Format"
    if dataFormatKey not in allLines[0]:
        print("File "+input_file+" does not appear to be Wyko ASCII file. Exiting.")
        sys.exit("Wrong file format. Header of file "+input_file+" is: \""+allLines[0]+"\". Was looking for: \""+dataFormatKey+"\".")

    codes = [int(i) for i in allLines[0][len(dataFormatKey):].split()]
    # First number is "Array format, 0 for standard, 2 for XYZ Triplet, Real".

    nbPixelsKey = ["X Size","Y Size"]
    dims = [int(allLines[x+1][len(nbPixelsKey[x]):]) for x in [0,1]]

    # Find last line of header
    endHeaderLine = r"OPD"
    for i, line in enumerate(allLines):
        if re.match(endHeaderLine+"[^\w]",line):
            sepChar = line[len(endHeaderLine)]
            break

    data = allLines[i+1:]
    startIntensityLine = r"Intensity" # There are two data sections
    for i, line in enumerate(data):
        if re.match(startIntensityLine,line):
            break
    data_H = data[0:i]
    data_I = data[i+1:]

    # Actual work
    def parse_data(datalines):
        arr = np.full(dims, np.nan)
        if codes[0] == 0:
            #print("Array Format: Standard")
            for j,line in enumerate(datalines):
                lineElems = line[:-1].split(sepChar)  # One too many separators at EOL
                arr[j,:] = [x if x else np.nan for x in lineElems]
            
        if codes[0] == 2:
            #print("Array Format: XYZ Triplet, Real")
            for j in range(0,dims[0]):
                for i in range(0,dims[1]):
                    nums = [float(x) for x in datalines[j*dims[1] + i].split()]
                    if(len(nums) == 3):
                        arr[j,i] = nums[2]
       
        arr = np.transpose(arr)
        return arr

    height_field = parse_data(data_H)
    intensity_field = parse_data(data_I)

    return [height_field,intensity_field]

# Custom function to remove the peaks.
# A peak is a Dirac impulse, in the positive or negative. In other words, a peak is an isolated pixel with a wildly different value compared to its neighbours.
# The basic idea is to look at the 4-neighbourhood of each pixel, and if its value differs from the average of its (defined) neighbours by more than a given threshold diffth, then it's considered a peak and set at the neighbour's average.
# Note: I had to write a custom function because peaks can arise next to or completely around undefined pixels (tried setting undefined to constant value beforehand, tried setting to inf so that median will work, but always an edge case made those inappropriate).
def removePeaks(I,diffth=10):
    # Considering the image as binary (the background being the undefined pixels), start with handling the small CCs (connected components), of size 2 or less.
    A = np.pad(I,1,constant_values=np.nan)
    M = ~np.isnan(A)
    M1 = remove_small_objects(M,2)
    nbIslands = (M!=M1).sum() # Number of CCs of size 1
    M2 = remove_small_objects(M1,3)
    M2_ = (M2!=M1) # Only the CCs of size 2
    nbCC2 = M2_.sum() / 2 # Number of CCs of size 2
              
    B = np.where(M==M1, A, np.nan) # Removing CCs of size 1. It could be a peak or not, but no way of being sure.
    #  The size 2 ones are handled in the main loop.
                                   
    B_ = B.copy()
    allPeaks = {}
    nbPeaksCC2 = 0
    for j in range(1,B.shape[0]-1):
        for i in range(1,B.shape[1]-1):
            if not np.isnan(B[j,i]):
                nb = np.full(4,np.nan)
                nb[0] = B[j-1,i]
                nb[1] = B[j+1,i]
                nb[2] = B[j,i-1]
                nb[3] = B[j,i+1]
                if abs(B[j,i] - np.nanmean(nb)) > diffth: # Since there must be at least one defined neighbour (CCs of size 1 were removed), nanmean(nb) is always a number
                    if M2_[j-1,i-1]: # It's part of a CC of size 2, and its value differs from its (one) neighbour by more than diffth, but no way of knowing which one is a peak, so remove both.
                        B_[j,i] = np.nan
                        nbPeaksCC2 = nbPeaksCC2+1
                    else:
                        allPeaks[np.ravel_multi_index((j,i),B.shape)] = B[j,i]

    nbPeaksCC2 = nbPeaksCC2/2 # Counted twice since incremented over each of the two pixels of each CC
    B = B_
    nbPeaks = 0
    for peak in sorted(allPeaks.items(), key=lambda item:item[1],reverse=True): 
    # With a very high peak, it and its neighbours are identified as peaks (the neighbour of a peak will see it's average neighbourhood value influenced by the peak, inducing a large difference).
    # If you start with the one with the largest difference, i.e. the peak, then it will be flattened so when you recheck its neighbours, their respective difference from their average neighbourdhood also flattens out.
        (j,i) = np.unravel_index(peak[0],B.shape)
        nb = np.full(4,np.nan)
        nb[0] = B[j-1,i]
        nb[1] = B[j+1,i]
        nb[2] = B[j,i-1]
        nb[3] = B[j,i+1]
        if abs(B[j,i] - np.nanmean(nb)) > diffth: 
            B[j,i] = np.nanmean(nb)
            nbPeaks = nbPeaks+1
            
    print("Removed "+ str(int(nbPeaks + nbPeaksCC2))+" peaks, "+str(nbIslands)+ " islands, and "+ str(int(nbPeaksCC2)) +" non-peaks.")
    return B[1:-1,1:-1]

# All undefined values are NaN, replace those with a specified method. 
def fill_novals(fld, noval = "inpaint", cval = 0.0):
    
    novals = {"average","constant","inpaint","interpolate"} # Possible methods
    noval_ = noval.lower()
    if noval_ not in novals:
        print("Error: fill method \""+noval+"\" not in list.")
    elif noval_ == "average":
        np.nan_to_num(fld, copy=False, nan = np.nanmean(fld))
    elif noval_ == "constant":
        np.nan_to_num(fld, copy=False, nan = cval)
    elif noval_ == "inpaint":
        fld = inpaint.inpaint_biharmonic(fld, np.isnan(fld))
    elif noval == "interpolate":
        x, y = np.indices(fld.shape)
        # Will only interpolate within the convex hull of the known values. All other at the border are still nan
        fld[np.isnan(fld)] = griddata( (x[~np.isnan(fld)], y[~np.isnan(fld)]), fld[~np.isnan(fld)], (x[np.isnan(fld)], y[np.isnan(fld)]))
 
        def replace_nan_with_mean(values):
            if np.isnan(values[4]):  # If the central pixel is NaN
                return np.nanmean(values)
            else:
                return values[4]  # Return the original value if not NaN

        fld = generic_filter(fld, replace_nan_with_mean, size=3) # Still NaNs?
    
    return fld

def save_PNG(fld,output_file):
    normed = (fld - np.nanmin(fld)) / (np.nanmax(fld) - np.nanmin(fld))
    normed = np.nan_to_num(normed)
    image = (255*normed).astype(np.uint8)
    Image.fromarray(image).save(output_file)

# Export data to Avizo format
def save_Avizo_ASCII(fld,output_file):
    dims = fld.shape[::-1]
    outputfile_header = """# Avizo 3D ASCII 2.0 

define Lattice {X} {Y} 1
Parameters {{
    Content "{X}x{Y}x1 float, uniform coordinates",
    BoundingBox 0 {BBx} 0 {BBy} 0 0.1,
    CoordType "uniform"
}}

Lattice {{ float Data }} @1

# Data section follows
@1
""".format(X = dims[0], Y = dims[1], BBx = dims[0]-1, BBy = dims[1]-1)

    ofh = open(output_file, 'w')
    ofh.write(outputfile_header)
    np.savetxt(ofh,fld,fmt = "%f",delimiter = ' ')
    ofh.close()

if __name__ == '__main__':
    main()