Introduction
This is the home page of the article "Level set tree methods", written by Jussi Klemelä.
Abstract of the article
Level set trees provide a tool for analyzing multivariate functions. They can be used to visualize and present particularly good properties related to local maxima and local minima of functions. Level set trees can help statistical inference related to estimating probability density functions and regression functions, and they can be used in cluster analysis and function optimization, among other things.
Reproducing the computations
Install software
source("http://jklm.fi/denpro/denpro.R") source("http://jklm.fi/delt/delt.R")
Figure 1
# generate data seed<-1 n<-1000 d<-2 l<-3; D<-4; c<-D/sqrt(2) M<-matrix(0,l,d); M[2,1]<-c; M[2,2]<--c; M[3,1]<--c/1.1; M[3,2]<-c/1.1 sig<-matrix(1,l,d) p<-rep(1/l,l) dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed) paletti<-c("red","blue","green", "orange","yellow","navy") cex.lab<-1.5 cex.axis<-1.5 cex.sub<-1.5 cex<-1.5 h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd) N<-rep(40,d) pcf<-pcf.kern(dendat,h,N) lst<-leafsfirst(pcf) dp<-draw.pcf(pcf) persp(dp$x,dp$y,dp$z,phi=35,theta=20,ticktype="detailed", xlab="X1",ylab="X2",zlab="") title(sub="(a)",cex.sub=cex.sub) contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12, cex.axis=cex.axis,labcex=1.2) mtext("X1",side=1,cex=1.2,line=2.4) mtext("X2",side=2,cex=1.2,line=2.4) title(sub="(b)",cex.sub=cex.sub) text(-2.7,2.7,"M2",col="green",cex=1.5) text(0,0,"M3",col="red",cex=1.5) text(2.9,-2.9,"M1",col="blue",cex=1.5) plottree(lst,colo=TRUE,ptext=0.002,ylim=c(0,0.038),paletti=paletti, xmargin=0.04,xmarginleft=0.04, cex.axis=cex.axis,cex=cex) title(sub="(c)",cex.sub=cex.sub) plotvolu(lst,colo=TRUE,modelabel=TRUE,paletti=paletti, ptext=0.002,ylim=c(0,0.038), cex.axis=cex.axis,cex=cex) title(sub="(d)",cex.sub=cex.sub) plotbary(lst,title=FALSE, modelabel=TRUE,paletti=paletti, ptext=0.002,ylim=c(0,0.038),xmargin=0.04,xmarginleft=0.04, cex.axis=cex.axis,cex=cex) mtext("X1",side=1,cex=1.2,line=2.4) title(sub="(e)",cex.sub=cex.sub) plotbary(lst,coordi=2,title=FALSE, modelabel=TRUE,paletti=paletti, ptext=0.002,ylim=c(0,0.038),xmargin=0.04,xmarginleft=0.04, cex.axis=cex.axis,cex=cex) mtext("X2",side=1,cex=1.2,line=2.4) title(sub="(f)",cex.sub=cex.sub)
Figure 2
# generate data seed<-1 n<-1000 d<-3 l<-4; D<-4; c<-D/sqrt(2) M<-matrix(0,l,d); M[2,1]<-c; M[2,2]<--c; M[2,3]<--c M[3,1]<--c/1.1; M[3,2]<-c/1.1; M[3,3]<-c/1.1 M[4,1]<-c; M[4,2]<--c; M[4,3]<-c sig<-matrix(1,l,d) p<-rep(1/l,l) dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed) d<-3 h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd) N<-rep(60,d) pcfb<-pcf.kern(dendat,h,N) paletti<-c("red","blue","green", "orange","yellow","navy","violet","darkgreen") cex.lab<-1.5 cex.axis<-1.5 cex.sub<-1.5 cex<-1.5 d<-2 dendat1<-dendat[,c(1,2)] h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat1,2,sd) N<-rep(50,d) pcf1<-pcf.kern(dendat1,h,N) dp<-draw.pcf(pcf1) contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12, xlim=c(-5,5.5),ylim=c(-5.5,5), cex.axis=cex.axis,labcex=1.2) mtext("X1",side=1,cex=1.2,line=2.4) mtext("X2",side=2,cex=1.2,line=2.4) text(-2.7,2.7,"M2",col="green",cex=1.5) text(0,0,"M3",col="red",cex=1.5) text(2.9,-2.9,"M1",col="blue",cex=1.5) text(2.7,-2.7,"M4",col="orange",cex=1.5) title(sub="(a)",cex.sub=cex.sub) pcf12<-slicing(pcfb,vecci=c(0),d1=1,d2=2) dp<-draw.pcf(pcf12) contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12, xlim=c(-5,5),ylim=c(-5,5),cex.axis=cex.axis,labcex=1.2) mtext("X1",side=1,cex=1.2,line=2.4) mtext("X2",side=2,cex=1.2,line=2.4) text(-2.7,2.7,"M2",col="green",cex=1.5) text(0,0,"M3",col="red",cex=1.5) text(2.9,-2.9,"M1",col="blue",cex=1.5) text(2.7,-2.7,"M4",col="orange",cex=1.5) title(sub="(b)",cex.sub=cex.sub) d<-2 dendat1<-dendat[,c(1,3)] h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat1,2,sd) N<-rep(50,d) pcf1<-pcf.kern(dendat1,h,N) dp<-draw.pcf(pcf1) contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12, xlim=c(-5.5,5.5),ylim=c(-5.5,5.5), cex.axis=cex.axis,labcex=1.2) mtext("X1",side=1,cex=1.2,line=2.4) mtext("X3",side=2,cex=1.2,line=2.4) text(-2.7,2.7,"M2",col="green",cex=1.5) text(0,0,"M3",col="red",cex=1.5) text(2.9,-2.9,"M1",col="blue",cex=1.5) text(2.7,2.7,"M4",col="orange",cex=1.5) title(sub="(c)",cex.sub=cex.sub) pcf13<-slicing(pcfb,vecci=c(0),d1=1,d2=3) dp<-draw.pcf(pcf13) contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12, xlim=c(-5,5),ylim=c(-5,5),cex.axis=cex.axis,labcex=1.2) mtext("X1",side=1,cex=1.2,line=2.4) mtext("X3",side=2,cex=1.2,line=2.4) text(-2.7,2.7,"M2",col="green",cex=1.5) text(0,0,"M3",col="red",cex=1.5) text(2.9,-2.9,"M1",col="blue",cex=1.5) text(2.7,2.7,"M4",col="orange",cex=1.5) title(sub="(d)",cex.sub=cex.sub) d<-2 dendat1<-dendat[,c(2,3)] h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat1,2,sd) N<-rep(50,d) pcf1<-pcf.kern(dendat1,h,N) dp<-draw.pcf(pcf1) contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12, xlim=c(-5.5,5.5),ylim=c(-5.5,5.5), cex.axis=cex.axis,labcex=1.2) mtext("X2",side=1,cex=1.2,line=2.4) mtext("X3",side=2,cex=1.2,line=2.4) text(2.7,2.7,"M2",col="green",cex=1.5) text(0,0,"M3",col="red",cex=1.5) text(-2.9,-2.9,"M1",col="blue",cex=1.5) text(-2.8,2.8,"M4",col="orange",cex=1.5) title(sub="(e)",cex.sub=cex.sub) pcf23<-slicing(pcfb,vecci=c(0),d1=2,d2=3) dp<-draw.pcf(pcf23) contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12, xlim=c(-5,5),ylim=c(-5,5),cex.axis=cex.axis,labcex=1.2) mtext("X2",side=1,cex=1.2,line=2.4) mtext("X3",side=2,cex=1.2,line=2.4) text(2.7,2.7,"M2",col="green",cex=1.5) text(0,0,"M3",col="red",cex=1.5) text(-2.9,-2.9,"M1",col="blue",cex=1.5) text(-2.8,2.8,"M4",col="orange",cex=1.5) title(sub="(f)",cex.sub=cex.sub)
Figure 3
# generate data seed<-1 n<-1000 d<-3 l<-4; D<-4; c<-D/sqrt(2) M<-matrix(0,l,d); M[2,1]<-c; M[2,2]<--c; M[2,3]<--c M[3,1]<--c/1.1; M[3,2]<-c/1.1; M[3,3]<-c/1.1 M[4,1]<-c; M[4,2]<--c; M[4,3]<-c sig<-matrix(1,l,d) p<-rep(1/l,l) dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed) d<-3 h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd) N<-rep(20,d) pcf<-pcf.kern(dendat,h,N) lst<-leafsfirst(pcf) paletti<-c("red","blue","green", "orange","yellow","navy","violet","darkgreen") cex.lab<-1.5 cex.axis<-1.5 cex.sub<-1.5 cex<-1.5 plottree(lst,colo=TRUE,paletti=paletti, ptext=0.0005,ylim=c(0,0.007), xmargin=0.04,xmarginleft=0.04, cex.axis=cex.axis,cex=cex) title(sub="(a)",cex.sub=cex.sub) plotvolu(lst,colo=TRUE,paletti=paletti,#modelabel=TRUE, cex.axis=cex.axis,cex=cex) mtext("volume",side=1,cex=1.2,line=2.4) title(sub="(b)",cex.sub=cex.sub) plotvolu(lst,cutlev=0.002, colo=TRUE,paletti=paletti,modelabel=TRUE, ptext=0.0005,ylim=c(0.002,0.007), cex.axis=cex.axis,cex=cex) mtext("volume",side=1,cex=1.2,line=2.4) title(sub="(c)",cex.sub=cex.sub) plotbary(lst,title=FALSE, modelabel=TRUE,paletti=paletti, ptext=0.0005,ylim=c(0,0.007), xmargin=0.04,xmarginleft=0.04, cex.axis=cex.axis,cex=cex) mtext("X1",side=1,cex=1.2,line=2.4) title(sub="(d)",cex.sub=cex.sub) plotbary(lst,coordi=2,title=FALSE, modelabel=TRUE,paletti=paletti, ptext=0.0005,ylim=c(0,0.007), xmargin=0.04,xmarginleft=0.04, cex.axis=cex.axis,cex=cex) mtext("X2",side=1,cex=1.2,line=2.4) title(sub="(e)",cex.sub=cex.sub) plotbary(lst,coordi=3,title=FALSE, modelabel=TRUE,paletti=paletti, ptext=0.0005,ylim=c(0,0.007), xmargin=0.04,xmarginleft=0.04, cex.axis=cex.axis,cex=cex) mtext("X3",side=1,cex=1.2,line=2.4) title(sub="(f)",cex.sub=cex.sub)
Figure 4: Scatter plots
# generate data seed<-1 n<-1000 d<-2 l<-3; D<-4; c<-D/sqrt(2) M<-matrix(0,l,d); M[2,1]<-c; M[2,2]<--c; M[3,1]<--c/1.1; M[3,2]<-c/1.1 sig<-matrix(1,l,d) p<-rep(1/l,l) dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed) h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd) N<-rep(40,d) pcf<-pcf.kern(dendat,h,N) lst<-leafsfirst(pcf) cex.lab<-1.5 cex.axis<-1.5 cex.sub<-1.5 cex<-1.5 # 1 plot(dendat, xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab) mtext("X1",side=1,cex=cex,line=2.4) mtext("X2",side=2,cex=cex,line=2.4) title(sub="(a)",cex.sub=cex.sub) # 2 colored scatter plot cd<-colors2data(dendat,pcf,lst,paletti=paletti) plot(dendat[cd$ord,1],dendat[cd$ord,2],col=cd$datacolo[cd$ord], xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab) mtext("X1",side=1,cex=cex,line=2.4) mtext("X2",side=2,cex=cex,line=2.4) title(sub="(b)",cex.sub=cex.sub)
Figure 5: Clustering
# generate data seed<-1 n<-1000 d<-2 l<-3; D<-4; c<-D/sqrt(2) M<-matrix(0,l,d); M[2,1]<-c; M[2,2]<--c; M[3,1]<--c/1.1; M[3,2]<-c/1.1 sig<-matrix(1,l,d) p<-rep(1/l,l) dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed) h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd) N<-rep(40,d) pcf<-pcf.kern(dendat,h,N) lst<-leafsfirst(pcf) cex.lab<-1.5 cex.axis<-1.5 cex.sub<-1.5 cex<-1.5 # 1 plotvolu(lst,colo=TRUE,modelabel=FALSE,paletti=paletti, ptext=0.002,ylim=c(0,0.038), cex.axis=cex.axis,cex=1.3) lambda<-0.02 segments(-1,lambda,200,lambda) title(sub="(a)",cex.sub=cex.sub) # 2 partial clustering with a fixed level cl<-cluster.lst(dendat,h,N=N,labels="colors",paletti=paletti, type="grid",lambda=lambda) plot(dendat,col=cl, xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab) mtext("X1",side=1,cex=1.2,line=2.4) mtext("X2",side=2,cex=1.2,line=2.4) title(sub="(b)",cex.sub=cex.sub) # 3 plotvolu(lst,colo=TRUE,modelabel=FALSE,paletti=paletti, ptext=0.002,ylim=c(0,0.038), cex.axis=cex.axis,cex=1.3) lambda<-0.028 segments(-1,lambda,200,lambda) title(sub="(c)",cex.sub=cex.sub) # 4 partial clustering with a fixed level cl<-cluster.lst(dendat,h,N=N,labels="colors",paletti=paletti, type="grid",lambda=lambda) plot(dendat,col=cl, xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab) mtext("X1",side=1,cex=1.2,line=2.4) mtext("X2",side=2,cex=1.2,line=2.4) title(sub="(d)",cex.sub=cex.sub)
Figure 6: Clustering: Changing levels
# generate data seed<-1 n<-1000 d<-2 l<-3; D<-4; c<-D/sqrt(2) M<-matrix(0,l,d); M[2,1]<-c; M[2,2]<--c; M[3,1]<--c/1.1; M[3,2]<-c/1.1 sig<-matrix(1,l,d) p<-rep(1/l,l) dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed) h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd) N<-rep(40,d) pcf<-pcf.kern(dendat,h,N) lst<-leafsfirst(pcf) cex.lab<-1.5 cex.axis<-1.5 cex.sub<-1.5 cex<-1.5 # 1 partial clustering with locally changing levels nodes<-findbnodes(lst,modenum=3) plotvolu(lst,colo=TRUE,nodes=nodes, modelabel=FALSE,paletti=paletti, ptext=0.002,ylim=c(0,0.038), cex.axis=cex.axis,cex=1.2) title(sub="(a)",cex.sub=cex.sub) # 2 cd<-colors2data(dendat,pcf,lst,nodes=nodes) plot(dendat,col=cd$datacolo, xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab) mtext("X1",side=1,cex=1.2,line=2.4) mtext("X2",side=2,cex=1.2,line=2.4) title(sub="(b)",cex.sub=cex.sub)
Figure 7: Clustering: Complete
# generate data seed<-1 n<-1000 d<-2 l<-3; D<-4; c<-D/sqrt(2) M<-matrix(0,l,d); M[2,1]<-c; M[2,2]<--c; M[3,1]<--c/1.1; M[3,2]<-c/1.1 sig<-matrix(1,l,d) p<-rep(1/l,l) dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed) h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd) N<-rep(40,d) pcf<-pcf.kern(dendat,h,N) lst<-leafsfirst(pcf) cex.lab<-1.5 cex.axis<-1.5 cex.sub<-1.5 cex<-1.5 # 1 complete clustering with a fixed level lambda<-0.02 cl<-cluster.lst(dendat,h,N=N,complete=TRUE,labels="colors",type="grid", paletti=paletti,lambda=lambda) plot(dendat,col=cl, xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab) mtext("X1",side=1,cex=1.2,line=2.4) mtext("X2",side=2,cex=1.2,line=2.4) title(sub="(a)",cex.sub=cex.sub) # 2 complete clustering with a fixed level lambda<-0.028 cl<-cluster.lst(dendat,h,N=N,complete=TRUE,labels="colors",type="grid", paletti=paletti,lambda=lambda) plot(dendat,col=cl, xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab) mtext("X1",side=1,cex=1.2,line=2.4) mtext("X2",side=2,cex=1.2,line=2.4) title(sub="(b)",cex.sub=cex.sub)