Level set tree methods

Introduction

This is the home page of the article "Level set tree methods", written by Jussi Klemelä.

Abstract of the article

Level set trees provide a tool for analyzing multivariate functions. They can be used to visualize and present particularly good properties related to local maxima and local minima of functions. Level set trees can help statistical inference related to estimating probability density functions and regression functions, and they can be used in cluster analysis and function optimization, among other things.

Reproducing the computations

Install software

source("http://jklm.fi/denpro/denpro.R")
source("http://jklm.fi/delt/delt.R")

Figure 1

# generate data
seed<-1
n<-1000
d<-2
l<-3; D<-4; c<-D/sqrt(2)
M<-matrix(0,l,d); M[2,1]<-c; M[2,2]<--c; M[3,1]<--c/1.1; M[3,2]<-c/1.1
sig<-matrix(1,l,d)
p<-rep(1/l,l)
dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed)

paletti<-c("red","blue","green", "orange","yellow","navy")
	 
cex.lab<-1.5
cex.axis<-1.5
cex.sub<-1.5
cex<-1.5

h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd)
N<-rep(40,d)
pcf<-pcf.kern(dendat,h,N)
lst<-leafsfirst(pcf)

dp<-draw.pcf(pcf)    
persp(dp$x,dp$y,dp$z,phi=35,theta=20,ticktype="detailed", 
xlab="X1",ylab="X2",zlab="")
title(sub="(a)",cex.sub=cex.sub)
     
contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12,
cex.axis=cex.axis,labcex=1.2)
mtext("X1",side=1,cex=1.2,line=2.4)
mtext("X2",side=2,cex=1.2,line=2.4)
title(sub="(b)",cex.sub=cex.sub)
text(-2.7,2.7,"M2",col="green",cex=1.5)
text(0,0,"M3",col="red",cex=1.5)
text(2.9,-2.9,"M1",col="blue",cex=1.5)     
     
plottree(lst,colo=TRUE,ptext=0.002,ylim=c(0,0.038),paletti=paletti,
xmargin=0.04,xmarginleft=0.04,
cex.axis=cex.axis,cex=cex)
title(sub="(c)",cex.sub=cex.sub)

plotvolu(lst,colo=TRUE,modelabel=TRUE,paletti=paletti,
ptext=0.002,ylim=c(0,0.038),
cex.axis=cex.axis,cex=cex)
title(sub="(d)",cex.sub=cex.sub)
     
plotbary(lst,title=FALSE,
modelabel=TRUE,paletti=paletti,
ptext=0.002,ylim=c(0,0.038),xmargin=0.04,xmarginleft=0.04,
cex.axis=cex.axis,cex=cex)
mtext("X1",side=1,cex=1.2,line=2.4)
title(sub="(e)",cex.sub=cex.sub)

plotbary(lst,coordi=2,title=FALSE,
modelabel=TRUE,paletti=paletti,
ptext=0.002,ylim=c(0,0.038),xmargin=0.04,xmarginleft=0.04,
cex.axis=cex.axis,cex=cex)
mtext("X2",side=1,cex=1.2,line=2.4)
title(sub="(f)",cex.sub=cex.sub)
     

Figure 2

# generate data
seed<-1
n<-1000
d<-3
l<-4; D<-4; c<-D/sqrt(2)
M<-matrix(0,l,d);
M[2,1]<-c; M[2,2]<--c; M[2,3]<--c
M[3,1]<--c/1.1; M[3,2]<-c/1.1; M[3,3]<-c/1.1
M[4,1]<-c; M[4,2]<--c; M[4,3]<-c
sig<-matrix(1,l,d)
p<-rep(1/l,l)
dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed)

d<-3      
h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd)
N<-rep(60,d)
pcfb<-pcf.kern(dendat,h,N)

paletti<-c("red","blue","green", "orange","yellow","navy","violet","darkgreen")
       
cex.lab<-1.5
cex.axis<-1.5
cex.sub<-1.5
cex<-1.5

d<-2
dendat1<-dendat[,c(1,2)]
h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat1,2,sd)
N<-rep(50,d)
pcf1<-pcf.kern(dendat1,h,N)
dp<-draw.pcf(pcf1)  
contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12,
xlim=c(-5,5.5),ylim=c(-5.5,5),
cex.axis=cex.axis,labcex=1.2)
mtext("X1",side=1,cex=1.2,line=2.4)
mtext("X2",side=2,cex=1.2,line=2.4)
text(-2.7,2.7,"M2",col="green",cex=1.5)
text(0,0,"M3",col="red",cex=1.5)
text(2.9,-2.9,"M1",col="blue",cex=1.5)     
text(2.7,-2.7,"M4",col="orange",cex=1.5)     
title(sub="(a)",cex.sub=cex.sub)
     
pcf12<-slicing(pcfb,vecci=c(0),d1=1,d2=2)     
dp<-draw.pcf(pcf12)         
contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12,
xlim=c(-5,5),ylim=c(-5,5),cex.axis=cex.axis,labcex=1.2)
mtext("X1",side=1,cex=1.2,line=2.4)
mtext("X2",side=2,cex=1.2,line=2.4)
text(-2.7,2.7,"M2",col="green",cex=1.5)
text(0,0,"M3",col="red",cex=1.5)
text(2.9,-2.9,"M1",col="blue",cex=1.5)     
text(2.7,-2.7,"M4",col="orange",cex=1.5)     
title(sub="(b)",cex.sub=cex.sub)
     
d<-2
dendat1<-dendat[,c(1,3)]
h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat1,2,sd)
N<-rep(50,d)
pcf1<-pcf.kern(dendat1,h,N)
dp<-draw.pcf(pcf1)  
contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12,
xlim=c(-5.5,5.5),ylim=c(-5.5,5.5),
cex.axis=cex.axis,labcex=1.2)
mtext("X1",side=1,cex=1.2,line=2.4)
mtext("X3",side=2,cex=1.2,line=2.4)
text(-2.7,2.7,"M2",col="green",cex=1.5)
text(0,0,"M3",col="red",cex=1.5)
text(2.9,-2.9,"M1",col="blue",cex=1.5)     
text(2.7,2.7,"M4",col="orange",cex=1.5)     
title(sub="(c)",cex.sub=cex.sub)
     
pcf13<-slicing(pcfb,vecci=c(0),d1=1,d2=3)
dp<-draw.pcf(pcf13)         
contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12,
xlim=c(-5,5),ylim=c(-5,5),cex.axis=cex.axis,labcex=1.2)
mtext("X1",side=1,cex=1.2,line=2.4)
mtext("X3",side=2,cex=1.2,line=2.4)
text(-2.7,2.7,"M2",col="green",cex=1.5)
text(0,0,"M3",col="red",cex=1.5)
text(2.9,-2.9,"M1",col="blue",cex=1.5)     
text(2.7,2.7,"M4",col="orange",cex=1.5)     
title(sub="(d)",cex.sub=cex.sub)
     
d<-2
dendat1<-dendat[,c(2,3)]
h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat1,2,sd)
N<-rep(50,d)
pcf1<-pcf.kern(dendat1,h,N)
dp<-draw.pcf(pcf1)  
contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12,
xlim=c(-5.5,5.5),ylim=c(-5.5,5.5),
cex.axis=cex.axis,labcex=1.2)
mtext("X2",side=1,cex=1.2,line=2.4)
mtext("X3",side=2,cex=1.2,line=2.4)
text(2.7,2.7,"M2",col="green",cex=1.5)
text(0,0,"M3",col="red",cex=1.5)
text(-2.9,-2.9,"M1",col="blue",cex=1.5)     
text(-2.8,2.8,"M4",col="orange",cex=1.5)     
title(sub="(e)",cex.sub=cex.sub)

pcf23<-slicing(pcfb,vecci=c(0),d1=2,d2=3)     
dp<-draw.pcf(pcf23)         
contour(dp$x,dp$y,dp$z,xlab="",ylab="",nlevels=12,
xlim=c(-5,5),ylim=c(-5,5),cex.axis=cex.axis,labcex=1.2)
mtext("X2",side=1,cex=1.2,line=2.4)
mtext("X3",side=2,cex=1.2,line=2.4)
text(2.7,2.7,"M2",col="green",cex=1.5)
text(0,0,"M3",col="red",cex=1.5)
text(-2.9,-2.9,"M1",col="blue",cex=1.5)     
text(-2.8,2.8,"M4",col="orange",cex=1.5)     
title(sub="(f)",cex.sub=cex.sub)
     

Figure 3

# generate data
seed<-1
n<-1000
d<-3
l<-4; D<-4; c<-D/sqrt(2)
M<-matrix(0,l,d);
M[2,1]<-c; M[2,2]<--c; M[2,3]<--c
M[3,1]<--c/1.1; M[3,2]<-c/1.1; M[3,3]<-c/1.1
M[4,1]<-c; M[4,2]<--c; M[4,3]<-c
sig<-matrix(1,l,d)
p<-rep(1/l,l)
dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed)
	 
d<-3
h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd)
N<-rep(20,d)
pcf<-pcf.kern(dendat,h,N)

lst<-leafsfirst(pcf)

paletti<-c("red","blue","green", "orange","yellow","navy","violet","darkgreen")
      
cex.lab<-1.5
cex.axis<-1.5
cex.sub<-1.5
cex<-1.5

plottree(lst,colo=TRUE,paletti=paletti,
ptext=0.0005,ylim=c(0,0.007),
xmargin=0.04,xmarginleft=0.04,
cex.axis=cex.axis,cex=cex)
title(sub="(a)",cex.sub=cex.sub)

plotvolu(lst,colo=TRUE,paletti=paletti,#modelabel=TRUE,
cex.axis=cex.axis,cex=cex)
mtext("volume",side=1,cex=1.2,line=2.4)
title(sub="(b)",cex.sub=cex.sub)

plotvolu(lst,cutlev=0.002, colo=TRUE,paletti=paletti,modelabel=TRUE,
ptext=0.0005,ylim=c(0.002,0.007),
cex.axis=cex.axis,cex=cex)
mtext("volume",side=1,cex=1.2,line=2.4)      
title(sub="(c)",cex.sub=cex.sub)
   
plotbary(lst,title=FALSE,
modelabel=TRUE,paletti=paletti,
ptext=0.0005,ylim=c(0,0.007),
xmargin=0.04,xmarginleft=0.04,
cex.axis=cex.axis,cex=cex)
mtext("X1",side=1,cex=1.2,line=2.4)
title(sub="(d)",cex.sub=cex.sub)

plotbary(lst,coordi=2,title=FALSE,
modelabel=TRUE,paletti=paletti,
ptext=0.0005,ylim=c(0,0.007),
xmargin=0.04,xmarginleft=0.04,
cex.axis=cex.axis,cex=cex)
mtext("X2",side=1,cex=1.2,line=2.4)
title(sub="(e)",cex.sub=cex.sub)

plotbary(lst,coordi=3,title=FALSE,
modelabel=TRUE,paletti=paletti,
ptext=0.0005,ylim=c(0,0.007),
xmargin=0.04,xmarginleft=0.04,
cex.axis=cex.axis,cex=cex)
mtext("X3",side=1,cex=1.2,line=2.4)
title(sub="(f)",cex.sub=cex.sub)

Figure 4: Scatter plots

# generate data
seed<-1
n<-1000
d<-2
l<-3; D<-4; c<-D/sqrt(2)
M<-matrix(0,l,d); M[2,1]<-c; M[2,2]<--c; M[3,1]<--c/1.1; M[3,2]<-c/1.1
sig<-matrix(1,l,d)
p<-rep(1/l,l)
dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed)
	 
h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd)
N<-rep(40,d)
pcf<-pcf.kern(dendat,h,N)
lst<-leafsfirst(pcf)
	 
cex.lab<-1.5
cex.axis<-1.5
cex.sub<-1.5
cex<-1.5

# 1
plot(dendat,
xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab)
mtext("X1",side=1,cex=cex,line=2.4)
mtext("X2",side=2,cex=cex,line=2.4)
title(sub="(a)",cex.sub=cex.sub)

# 2 colored scatter plot
cd<-colors2data(dendat,pcf,lst,paletti=paletti)
plot(dendat[cd$ord,1],dendat[cd$ord,2],col=cd$datacolo[cd$ord],
xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab)
mtext("X1",side=1,cex=cex,line=2.4)
mtext("X2",side=2,cex=cex,line=2.4)
title(sub="(b)",cex.sub=cex.sub)

Figure 5: Clustering

# generate data
seed<-1
n<-1000
d<-2
l<-3; D<-4; c<-D/sqrt(2)
M<-matrix(0,l,d); M[2,1]<-c; M[2,2]<--c; M[3,1]<--c/1.1; M[3,2]<-c/1.1
sig<-matrix(1,l,d)
p<-rep(1/l,l)
dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed)
	 
h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd)
N<-rep(40,d)
pcf<-pcf.kern(dendat,h,N)
lst<-leafsfirst(pcf)
  
cex.lab<-1.5
cex.axis<-1.5
cex.sub<-1.5
cex<-1.5

# 1
plotvolu(lst,colo=TRUE,modelabel=FALSE,paletti=paletti,
ptext=0.002,ylim=c(0,0.038),
cex.axis=cex.axis,cex=1.3)
lambda<-0.02
segments(-1,lambda,200,lambda)	      
title(sub="(a)",cex.sub=cex.sub)

# 2 partial clustering with a fixed level
cl<-cluster.lst(dendat,h,N=N,labels="colors",paletti=paletti,
type="grid",lambda=lambda)
plot(dendat,col=cl,
xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab)
mtext("X1",side=1,cex=1.2,line=2.4)
mtext("X2",side=2,cex=1.2,line=2.4)
title(sub="(b)",cex.sub=cex.sub)
   
# 3
plotvolu(lst,colo=TRUE,modelabel=FALSE,paletti=paletti,
ptext=0.002,ylim=c(0,0.038),
cex.axis=cex.axis,cex=1.3)
lambda<-0.028
segments(-1,lambda,200,lambda)	      
title(sub="(c)",cex.sub=cex.sub)

# 4 partial clustering with a fixed level
cl<-cluster.lst(dendat,h,N=N,labels="colors",paletti=paletti,
type="grid",lambda=lambda)
plot(dendat,col=cl,
xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab)
mtext("X1",side=1,cex=1.2,line=2.4)
mtext("X2",side=2,cex=1.2,line=2.4)
title(sub="(d)",cex.sub=cex.sub)
     

Figure 6: Clustering: Changing levels

# generate data
seed<-1
n<-1000
d<-2
l<-3; D<-4; c<-D/sqrt(2)
M<-matrix(0,l,d); M[2,1]<-c; M[2,2]<--c; M[3,1]<--c/1.1; M[3,2]<-c/1.1
sig<-matrix(1,l,d)
p<-rep(1/l,l)
dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed)
	 
h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd)
N<-rep(40,d)
pcf<-pcf.kern(dendat,h,N)
lst<-leafsfirst(pcf)

cex.lab<-1.5
cex.axis<-1.5
cex.sub<-1.5
cex<-1.5

# 1 partial clustering with locally changing levels
nodes<-findbnodes(lst,modenum=3)
plotvolu(lst,colo=TRUE,nodes=nodes,
modelabel=FALSE,paletti=paletti,
ptext=0.002,ylim=c(0,0.038),
cex.axis=cex.axis,cex=1.2)
title(sub="(a)",cex.sub=cex.sub)
	
# 2
cd<-colors2data(dendat,pcf,lst,nodes=nodes)
plot(dendat,col=cd$datacolo,
xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab)
mtext("X1",side=1,cex=1.2,line=2.4)
mtext("X2",side=2,cex=1.2,line=2.4)
title(sub="(b)",cex.sub=cex.sub)
  

Figure 7: Clustering: Complete

# generate data
seed<-1
n<-1000
d<-2
l<-3; D<-4; c<-D/sqrt(2)
M<-matrix(0,l,d); M[2,1]<-c; M[2,2]<--c; M[3,1]<--c/1.1; M[3,2]<-c/1.1
sig<-matrix(1,l,d)
p<-rep(1/l,l)
dendat<-sim.data(type="mixt",n=n,M=M,sig=sig,p=p,seed=seed)
	 
h<-(4/(d+2))^(1/(d+4))*n^(-1/(d+4))*apply(dendat,2,sd)
N<-rep(40,d)
pcf<-pcf.kern(dendat,h,N)
lst<-leafsfirst(pcf)

cex.lab<-1.5
cex.axis<-1.5
cex.sub<-1.5
cex<-1.5

# 1 complete clustering with a fixed level
lambda<-0.02
cl<-cluster.lst(dendat,h,N=N,complete=TRUE,labels="colors",type="grid",
paletti=paletti,lambda=lambda)
plot(dendat,col=cl,
xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab)
mtext("X1",side=1,cex=1.2,line=2.4)
mtext("X2",side=2,cex=1.2,line=2.4)
title(sub="(a)",cex.sub=cex.sub)
  
# 2 complete clustering with a fixed level
lambda<-0.028
cl<-cluster.lst(dendat,h,N=N,complete=TRUE,labels="colors",type="grid",
paletti=paletti,lambda=lambda)
plot(dendat,col=cl,
xlab="",ylab="",cex.axis=cex.axis,cex.lab=cex.lab)
mtext("X1",side=1,cex=1.2,line=2.4)
mtext("X2",side=2,cex=1.2,line=2.4)
title(sub="(b)",cex.sub=cex.sub)
  

February 2018