Update Convert_Wyko_ASCII.py

8d13df1a · Plougonven Erwan · ef786487 · 8d13df1a
Commit 8d13df1a authored 1 year ago by Plougonven Erwan
--- a/Convert_Wyko_ASCII.py
+++ b/Convert_Wyko_ASCII.py
@@ -3,25 +3,23 @@ import os
 import sys
 import re
 import numpy as np
-from scipy.ndimage import median_filter
+from scipy.ndimage import generic_filter
+from scipy.interpolate import griddata
+from skimage.morphology import remove_small_objects
+from skimage.restoration import inpaint
+from PIL import Image

 def main():
-    input_file = "particle 2 a.asc"
-    output_file_h = "particle2a_height.am"
-    output_file_i = "particle2a_intensity.am"
-
-    dims, hfld, ifld = extract_Wyko_ASCII_data(input_file)
-
-    # Some spikes in height field raw data, removed with median filter
-    hfld = median_filter(hfld, footprint = np.array([[0,1,0],[1,1,1],[0,1,0]]))
-    
-    create_Avizo_ASCII(dims,hfld,output_file_h) 
-    create_Avizo_ASCII(dims,ifld,output_file_i)
+    hfld, ifld = extract_Wyko_ASCII_data("profile.asc")
+    hfld = removePeaks(hfld) # Remove Dirac noise
+    hfld = fill_novals(hfld) # Fill undefined values
+    save_PNG(hfld,"profile.png")


 ##########################
 ### Internal functions ###
 ##########################
+# Extract data from file
 def extract_Wyko_ASCII_data(input_file):
    allLines = [line.rstrip('\r\n') for line in open(input_file)]# if len(line)>0]

@@ -40,7 +38,6 @@ def extract_Wyko_ASCII_data(input_file):
    endHeaderLine = r"OPD"
    for i, line in enumerate(allLines):
        if re.match(endHeaderLine+"[^\w]",line):
-            #print(line)
            sepChar = line[len(endHeaderLine)]
            break

@@ -48,11 +45,11 @@ def extract_Wyko_ASCII_data(input_file):
    startIntensityLine = r"Intensity" # There are two data sections
    for i, line in enumerate(data):
        if re.match(startIntensityLine,line):
-            #print(line)
            break
    data_H = data[0:i]
    data_I = data[i+1:]

+    # Actual work
    def parse_data(datalines):
        arr = np.full(dims, np.nan)
        if codes[0] == 0:
@@ -68,18 +65,106 @@ def extract_Wyko_ASCII_data(input_file):
                    nums = [float(x) for x in datalines[j*dims[1] + i].split()]
                    if(len(nums) == 3):
                        arr[j,i] = nums[2]
-
-        np.nan_to_num(arr, copy=False, nan = np.nanmean(arr))
+       
        arr = np.transpose(arr)
        return arr

    height_field = parse_data(data_H)
    intensity_field = parse_data(data_I)

-    return [dims,height_field,intensity_field]
+    return [height_field,intensity_field]
+
+# Custom function to remove the peaks.
+# A peak is a Dirac impulse, in the positive or negative. In other words, a peak is an isolated pixel with a wildly different value compared to its neighbours.
+# The basic idea is to look at the 4-neighbourhood of each pixel, and if its value differs from the average of its (defined) neighbours by more than a given threshold diffth, then it's considered a peak and set at the neighbour's average.
+# Note: I had to write a custom function because peaks can arise next to or completely around undefined pixels (tried setting undefined to constant value beforehand, tried setting to inf so that median will work, but always an edge case made those inappropriate).
+def removePeaks(I,diffth=10):
+    # Considering the image as binary (the background being the undefined pixels), start with handling the small CCs (connected components), of size 2 or less.
+    A = np.pad(I,1,constant_values=np.nan)
+    M = ~np.isnan(A)
+    M1 = remove_small_objects(M,2)
+    nbIslands = (M!=M1).sum() # Number of CCs of size 1
+    M2 = remove_small_objects(M1,3)
+    M2_ = (M2!=M1) # Only the CCs of size 2
+    nbCC2 = M2_.sum() / 2 # Number of CCs of size 2
+              
+    B = np.where(M==M1, A, np.nan) # Removing CCs of size 1. It could be a peak or not, but no way of being sure.
+    #  The size 2 ones are handled in the main loop.
+                                   
+    B_ = B.copy()
+    allPeaks = {}
+    nbPeaksCC2 = 0
+    for j in range(1,B.shape[0]-1):
+        for i in range(1,B.shape[1]-1):
+            if not np.isnan(B[j,i]):
+                nb = np.full(4,np.nan)
+                nb[0] = B[j-1,i]
+                nb[1] = B[j+1,i]
+                nb[2] = B[j,i-1]
+                nb[3] = B[j,i+1]
+                if abs(B[j,i] - np.nanmean(nb)) > diffth: # Since there must be at least one defined neighbour (CCs of size 1 were removed), nanmean(nb) is always a number
+                    if M2_[j-1,i-1]: # It's part of a CC of size 2, and its value differs from its (one) neighbour by more than diffth, but no way of knowing which one is a peak, so remove both.
+                        B_[j,i] = np.nan
+                        nbPeaksCC2 = nbPeaksCC2+1
+                    else:
+                        allPeaks[np.ravel_multi_index((j,i),B.shape)] = B[j,i]
+
+    nbPeaksCC2 = nbPeaksCC2/2 # Counted twice since incremented over each of the two pixels of each CC
+    B = B_
+    nbPeaks = 0
+    for peak in sorted(allPeaks.items(), key=lambda item:item[1],reverse=True): 
+    # With a very high peak, it and its neighbours are identified as peaks (the neighbour of a peak will see it's average neighbourhood value influenced by the peak, inducing a large difference).
+    # If you start with the one with the largest difference, i.e. the peak, then it will be flattened so when you recheck its neighbours, their respective difference from their average neighbourdhood also flattens out.
+        (j,i) = np.unravel_index(peak[0],B.shape)
+        nb = np.full(4,np.nan)
+        nb[0] = B[j-1,i]
+        nb[1] = B[j+1,i]
+        nb[2] = B[j,i-1]
+        nb[3] = B[j,i+1]
+        if abs(B[j,i] - np.nanmean(nb)) > diffth: 
+            B[j,i] = np.nanmean(nb)
+            nbPeaks = nbPeaks+1
+            
+    print("Removed "+ str(int(nbPeaks + nbPeaksCC2))+" peaks, "+str(nbIslands)+ " islands, and "+ str(int(nbPeaksCC2)) +" non-peaks.")
+    return B[1:-1,1:-1]
+
+# All undefined values are NaN, replace those with a specified method. 
+def fill_novals(fld, noval = "inpaint", cval = 0.0):
+    
+    novals = {"average","constant","inpaint","interpolate"} # Possible methods
+    noval_ = noval.lower()
+    if noval_ not in novals:
+        print("Error: fill method \""+noval+"\" not in list.")
+    elif noval_ == "average":
+        np.nan_to_num(fld, copy=False, nan = np.nanmean(fld))
+    elif noval_ == "constant":
+        np.nan_to_num(fld, copy=False, nan = cval)
+    elif noval_ == "inpaint":
+        fld = inpaint.inpaint_biharmonic(fld, np.isnan(fld))
+    elif noval == "interpolate":
+        x, y = np.indices(fld.shape)
+        # Will only interpolate within the convex hull of the known values. All other at the border are still nan
+        fld[np.isnan(fld)] = griddata( (x[~np.isnan(fld)], y[~np.isnan(fld)]), fld[~np.isnan(fld)], (x[np.isnan(fld)], y[np.isnan(fld)]))
+ 
+        def replace_nan_with_mean(values):
+            if np.isnan(values[4]):  # If the central pixel is NaN
+                return np.nanmean(values)
+            else:
+                return values[4]  # Return the original value if not NaN
+
+        fld = generic_filter(fld, replace_nan_with_mean, size=3) # Still NaNs?
+    
+    return fld

+def save_PNG(fld,output_file):
+    normed = (fld - np.nanmin(fld)) / (np.nanmax(fld) - np.nanmin(fld))
+    normed = np.nan_to_num(normed)
+    image = (255*normed).astype(np.uint8)
+    Image.fromarray(image).save(output_file)

-def create_Avizo_ASCII(dims,fld,output_file):
+# Export data to Avizo format
+def save_Avizo_ASCII(fld,output_file):
+    dims = fld.shape[::-1]
    outputfile_header = """# Avizo 3D ASCII 2.0 

 define Lattice {X} {Y} 1
@@ -101,4 +186,4 @@ Lattice {{ float Data }} @1
    ofh.close()

 if __name__ == '__main__':
-    main() # Defined this way so that user-defined parameters are at the beginning of the file.
\ No newline at end of file
+    main()
\ No newline at end of file