### This is a script containing the code used to generate Figure 5. in the article ... ### The starting point is the data of the FPA generated image as a matrix (IR.Image) : ### Each row of the matrix holds the spectra of a given pixel; ### each colum contains the absorbance at a given wavenumber for all the pixels. ### The pixels are ordered in row major format (that is, pixel on the same row on the image are on consecutive rows of the matrix). ### To help with the input format here is how, in our particular case, the image was loaded into R, starting from a binary file in an intermediate file format : MapFile <- file(description="BOC1-C_block-02.bin", open="rb") IR.Image <- matrix(data=readBin(con=MapFile, what="numeric", n=4096*1361, size=4), nrow=4096, ncol=1361, byrow=TRUE) close(MapFile) rm(MapFile) ### Preparing the coordinates of each pixels as labels for each row of the matrix : pixel.list <- c() for ( j in 1:64 ) { for ( i in 1:64 ) { pixel.list[i + (j-1)*64] <- sprintf("%03i_%03i", i, j) } } ### Adding the wavenumber as labels for each column of the matrix : dimnames(IR.Image) <- list(pixel.list, seq(from=4000,to=500, length.out=1361)) rm(i, j, pixel.list) ### In the current case, the raw data goes from 4000 to 500 cm-1, which indeed we have to "slice" to limit to the reasonnable data (in term of signal to noise ratio) ### that is from 3900 to 900 cm-1 : IR.Image <- IR.Image[, (seq(from=4000,to=500, length.out=1361) >= 900) & (seq(from=4000,to=500, length.out=1361) <= 3900)] ### Converting to absorbance : IR.Image <- -log10(IR.Image) ### In this case we applied a basline correction, based on the large number of spectrum, including spectra taken on the MirIR microscope slide ### we assumed that for each wave-number, the base line is the minimum value of attained at this wave-number taken on the 4096 collected spectra. IR.Image.BL <- apply(X = IR.Image, MARGIN = 2, FUN=function(x) {x - min(x); }) ################### This is the start of the clustering algorithm itself : ################### ### Computing all the pairwise "distance" between the spectra : IR.Image.cor <- cor(t(IR.Image.BL), use = "complete.obs") IR.Image.dist <- 1 - IR.Image.cor ### Performing the hierarchical clustering, using Ward's criteria : ## Formatting for hierarchical cluster (hclust) input : IR.Image.dist <- as.vector(IR.Image.dist[lower.tri(IR.Image.dist)]) class(IR.Image.dist) <- "dist" attr(IR.Image.dist, "Size") <- nrow(IR.Image) attr(IR.Image.dist, "Labels") <- dimnames(IR.Image)[[1]] ## Running hclust : IR.Image.tree <- hclust(IR.Image.dist, method="ward") #### plotting the "total variance" (ESS in Ward's parliance) as a function of retained number of clusters : IR.Image.ward.data <- data.frame(num.clusters=4096-(1:4095), height=IR.Image.tree$height) ### Performing a line fit on the part from 1 to 200 clusters : (finding the power law that govern this part of the clustering loss) IR.Image.tree.power.law <- lm(log(height) ~ log(num.clusters), data=IR.Image.ward.data, subset=(num.clusters < 100)) ################### Finally performing some of the plotting : ################### ### Displaying the evolution of the objecive function (ESS) as function of the number of classes : plot(x=IR.Image.ward.data$num.clusters, y=IR.Image.ward.data$height, log="xy", type="l", xlab="Number of classes", ylab="Clustering objective function", xlim=c(4,64), ylim=range(IR.Image.ward.data$height[4096 - (4:64)])) plot(function(x){ return(exp(IR.Image.tree.power.law$coef[[1]]) * x^(IR.Image.tree.power.law$coef[[2]])) }, add=TRUE, from=1, to=100, col="red") ### For a selected number of classes, display the class assigned to each pixel : #### To ease this plotting we define it as a function of the number of selected classes : clust.plot <- function(k) { #### In our case, the spacing between each pixel is 2 microns, hence the computation of xvalues and yvalues : xvalues <- (1:64) * 2 yvalues <- (1:64) * 2 #### We cut the hierarchical tree such that we get exactly k classes : seg.data <- cutree(IR.Image.tree, k=k) #### We convert the result of the clustering into a matrix representing the class of each pixel : seg.image <- matrix(data=seg.data, nrow=64, ncol=64, byrow=FALSE) #### And finally performing the display : image(x=xvalues, y=yvalues, z=seg.image, asp=1, col=rainbow(k, s=1, v=1, start=0, end=0.95)) } clust.plot(6) clust.plot(9) clust.plot(12) clust.plot(17) clust.plot(23)