###############################################################################
#									      #
# 			Mode Tree S-plus Functions			      #
#									      #
# These functions, together with the FORTRAN code in the companion file       #
# "modetree.f", will allow the user to generate univariate mode trees, as     #
# described in Minnotte and Scott, (1993) "The Mode Tree: A Tool for          #
# Visualization of Nonparametric Density Features," Journal of Computational  #
# and Graphical Statistics, 2, 51-68.					      #
#									      #
# The primary functions are the first four.  				      #
#									      #
# modetree - generates a basic mode tree including only modes and branchings  #
# enhmtree - generates an enhanced mode tree including modes, branchings,     #
#	mode sizes, bumps, and antimodes				      #
# modetreeplot - plots a basic mode tree (if doplot=T, called automatically   #
#	from modetree)							      #
# enhmtreeplot - plots an enhanced mode tree (if doplot=T, called             #
#	automatically from enhmtree)					      #
#									      #
# The remaining functions exist primarily to be called from the above         #
# functions, but may be useful themselves.				      #
#									      #
# bin1 - bins a data set into nbin bins for use in nash1		      #
# nash1 - approximates a kernel density estimate with normal kernel using     #
#	an appropriately weighted average shifted histogram (ASH)             #
# nicerange - range of a data set extended on each side (default: 10%)	      #
# matcher - matches elements from two lists as described in the appendix      #
#	to Minnotte and Scott						      #
# quadest - given three ordered pairs, finds the extremum of the interpolated #
#	parabola							      #
# findmodes - given sequence of values representing a density function,       #
# 	identify number of modes, locations of modes, and indices of modes    #
#	and antimodes							      #
# findbumps - given sequence of values representing a density function,       #
# 	identify bumps (regions of negative second derivative)		      #
#									      #
# All functions are copyright 1993, Michael C. Minnotte except bin1 and       #
# nicerange, copyright 1986, David W. Scott, and nash1, copyright 1993 David  #
# W. Scott and Michael  C. Minnotte.					      #
#									      #
# The authors may be contacted at minnotte@math.usu.edu and scottdw@rice.edu .#
###############################################################################

modetree<-
function(X, ran = c(diff(nicerange(X))/4, diff(nicerange(X))/500), 
	maintit = "", xlb="", nbin = 500, nh = 200, doplot = T, docat=T)
# modetree - generates a basic mode tree including only modes and branchings  
# X - raw (unbinned) data; ran - largest and smallest bandwidths; maintit -
# main title on plot; xlb - x label on plot; nbin - number of bins for data
# and ASH; nh - number of different bandwidths; doplot - plot tree on 
# current device if TRUE; docat - indicate status if TRUE
# Copyright 1993, Michael C. Minnotte
{
        tree <- NULL
        modes <- NULL
        bins <- bin1(X, nbin = nbin)
        maxh <- ran[1]
        minh <- ran[2]
        loghlist <- seq(log(maxh), log(minh), length = nh)
        n <- length(X)
        hlist <- exp(loghlist)
	dh<- hlist[2]/hlist[1]
        est <- nash1(bins, hlist[1])
        oldmodes <- findmodes(est$x, est$y)
        tree <- cbind(oldmodes$modes[, 1], rep(hlist[1], oldmodes$nm), rep(
                0, oldmodes$nm), rep(0, oldmodes$nm))
        count <- 0
        for(i in 2:length(hlist)) {
                if(docat)
                        cat("h number ", i, " starting.\n")
                est <- nash1(bins, hlist[i])
                newmodes <- findmodes(est$x, est$y)
                mout <- matcher(newmodes$modes[, 1], oldmodes$modes[, 1])
                for(j in 1:newmodes$nm) {
                        if(mout[j, 1] <= oldmodes$nm)
                                tree <- rbind(tree, c(newmodes$modes[j, 1],
                                        hlist[i], count + mout[j, 1], 1))
                        else {
                                oldfrom <- (1:oldmodes$nm)[newmodes$modes[
                                        j, 2] > oldmodes$am[, 1] & newmodes$
                                        modes[j, 2] < oldmodes$am[, 2]]
                                newfrom <- mout[oldfrom, 2]
                                tree <- rbind(tree, c(newmodes$modes[j, 1],
                                        hlist[i], count + oldmodes$nm + newfrom,
                                        2))
                        }
                }
                count <- count + oldmodes$nm
                oldmodes <- newmodes
        }
        if(doplot)
                modetreeplot(tree, maintit, xlb)
        return(tree)
}

###############################################################################

enhmtree<-
function(X, ran = c(diff(nicerange(X))/4, diff(nicerange(X))/200), 
        maintit = "", xlb = "", nbin = 500, nh = 200, doplot = T, docat = T)
# enhmtree - generates an enhanced mode tree including modes, branchings,     
#	mode sizes, bumps, and antimodes				      
# X - raw (unbinned) data; ran - largest and smallest bandwidths; maintit -
# main title on plot; xlb - x label on plot; nbin - number of bins for data
# and ASH; nh - number of different bandwidths; doplot - plot tree on 
# current device if TRUE; docat - indicate status if TRUE
# Copyright 1993, Michael C. Minnotte
{
        tree <- NULL
        am <- NULL
        bins <- bin1(X, nbin = nbin)
        maxh <- ran[1]
        minh <- ran[2]
        loghlist <- seq(log(maxh), log(minh), length = nh)
        hlist <- exp(loghlist)
        n <- length(X)
        est <- nash1(bins, hlist[1])
        d <- est$x[2] - est$x[1]
        oldbumps <- findbumps(est$x, est$y)
	nob <- nrow(oldbumps)
        shade <- cbind(oldbumps, rep(hlist[1], nob), rep(0, nob), rep(0, nob))
        oldmodes <- findmodes(est$x, est$y)
        if(oldmodes$nm == 1)
                tree <- c(oldmodes$modes[1, 1], hlist[1], 0, 1)
        else for(i in 1:oldmodes$nm) {
                        temp <- .Fortran("excise",
                                as.double(est$y),
                                g = double(nbin),
                                coeff = double(1),
                                as.integer(nbin),
                                as.integer(oldmodes$am[i, 1]),
                                as.integer(oldmodes$am[i, 2]),
                                as.double(d))
                        wd <- sum(est$y - temp$g) * d
                        tree <- rbind(tree, c(oldmodes$modes[i, 1], hlist[
                                1], 0, wd))
                }
	onm1<-oldmodes$nm-1
        if(oldmodes$nm > 1){
		oldam<-rep(0,onm1)
                for(j in 1:onm1)
                        oldam[j] <- quadest(est$x[(oldmodes$am[j + 1,
                                1] - 1):(oldmodes$am[j + 1, 1] + 1)], est$y[
                                (oldmodes$am[j + 1, 1] - 1):(oldmodes$am[j +
                                1, 1] + 1)])
		am<-cbind(oldam,rep(hlist[1],onm1),rep(0,onm1),
			rep(0,onm1))}
        count <- 0
	countb<-0
        countam <- 0
        for(i in 2:length(hlist)) {
                if(docat)
                        cat("h number ", i, " starting.\n")
                est <- nash1(bins, hlist[i])


                newbumps <- findbumps(est$x, est$y)
		nnb <- nrow(newbumps)
		mblout <- matcher(newbumps[,1],oldbumps[,1])[1:nnb,1]
		mblout[mblout>nob] <- -countb	

		mbrout <- matcher(newbumps[,2],oldbumps[,2])[1:nnb,1]
		mbrout[mbrout>nob] <- -countb	

		newbumps <- cbind(newbumps, rep(hlist[i], nnb), 
			countb + mblout, countb + mbrout)
		shade <- rbind(shade, newbumps)

		countb <- countb + nob
		nob <- nnb
		oldbumps <- newbumps

#                shade <- rbind(shade, cbind(bumps, rep(hlist[i], nrow(bumps))))
                        
                newmodes <- findmodes(est$x, est$y)
                mout <- matcher(newmodes$modes[, 1], oldmodes$modes[, 1])
                for(j in 1:newmodes$nm) {
                        if(newmodes$nm == 1)
                                wd <- 1
                        else {
                                temp <- .Fortran("excise",
                                        as.double(est$y),
                                        g = double(nbin),
                                        coeff = double(1),
                                        as.integer(nbin),
                                        as.integer(newmodes$am[j, 1]),
                                        as.integer(newmodes$am[j, 2]),
                                        as.double(d))
                                wd <- sum(est$y - temp$g) * d
                        }
                        if(mout[j, 1] <= oldmodes$nm)
                                tree <- rbind(tree, c(newmodes$modes[j, 1],
                                        hlist[i], count + mout[j, 1], wd))
                                
                        else {
                                oldfrom <- (1:oldmodes$nm)[newmodes$modes[
                                        j, 2] > oldmodes$am[, 1] & newmodes$
                                        modes[j, 2] < oldmodes$am[, 2]]
                                newfrom <- mout[oldfrom, 2]
                                tree <- rbind(tree, c(newmodes$modes[j, 1],
                                        hlist[i], count + oldmodes$nm + newfrom,
                                        -1 * wd))
                        }
                }
                count <- count + oldmodes$nm
                if(newmodes$nm > 1) {
			nnm1<-newmodes$nm-1
			newam<-rep(0,nnm1)
                        for(j in 1:nnm1)
                        	newam[j] <- quadest(est$x[(newmodes$am[j + 1,
                                1] - 1):(newmodes$am[j + 1, 1] + 1)], est$y[
                                (newmodes$am[j + 1, 1] - 1):(newmodes$am[j +
                                1, 1] + 1)])
			if (is.null(am)) 			
				am<-cbind(newam,rep(hlist[i],nnm1),rep(0,nnm1),
					rep(0,nnm1))
			else {
				mout<-matcher(newam,oldam)
				for (j in 1:nnm1)
					if (mout[j,1]<=onm1)
						am<-rbind(am,c(newam[j],
							hlist[i],countam+
							mout[j,1],1))
					else
						am<-rbind(am,c(newam[j],
							hlist[i],0,0))
			}
			countam<-countam + onm1
			onm1<-nnm1
			oldam<-newam
                }
                oldmodes <- newmodes
        }
        out <- list(tree = tree, shade = shade, am = am)
        if(doplot)
                enhmtreeplot(out, maintit, xlb=xlb)
        return(out)
}

###############################################################################
 
modetreeplot<-
function(tree, maintit = "", xlb="")
# modetreeplot - plots a basic mode tree (if doplot=T, called automatically   
#	from modetree)							      
# tree - mode tree (output from modetree); maintit - main title; xlb - x label
# Copyright 1993, Michael C. Minnotte
{
	nr <- nrow(tree)
	plot(tree[floor(nr/2), 1], tree[floor(nr/2), 2], type = "n", xlim = 
		nicerange(tree[, 1]), ylim = range(tree[, 2]), log =
		"y", xlab = xlb,ylab = "h", main = maintit)
	t1 <- (1:nrow(tree))[tree[, 4] == 1]
	t2 <- (1:nrow(tree))[tree[, 4] == 2]
	segments(tree[t1, 1], tree[t1, 2], tree[tree[t1, 3], 1], tree[tree[
		t1, 3], 2], lty = 1)
	segments(tree[t2, 1], tree[t2, 2], tree[tree[t2, 3], 1], tree[tree[
		t2, 3], 2], lty = 2)
}

###############################################################################

enhmtreeplot<-
function(enhmtree, maintit = "", xlb="", wd = 0, cols=c(16,1,2,1,0,1), amp=3)
# enhmtreeplot - plots an enhanced mode tree (if doplot=T, called             
#	automatically from enhmtree)					      
# tree - mode tree (output from modetree); maintit - main title; xlb - x label;
# wd - width of line indicating mode consisting of entire density (default of 
# 0 calculates so that no mode line goes beyond its bump's shaded area);
# cols - colors: [1] - modes (polygon), [2] - modes (segments), [3] - bumps
# (polygons), [4] - antimodes (points/segments), [5] - background/antibumps
# (polygon), [6] - labels/box (lines/text), selected values look good for
# X11 graphics mode, for openlook cyan-to-red I like c(5,5,2,1,6,1), and
# for (b&w) postscript I like c(1,1,2,1,0,1); amp - antimode points, if
# positive one dot for every amp h values will be plotted, if nonpositive,
# line type -amp will be used.
# Copyright 1993, Michael C. Minnotte
{
	par(las = 1)
        tree <- enhmtree$tree
        shade <- enhmtree$shade
        am <- enhmtree$am
        nr <- nrow(tree)
        plot(tree[floor(nr/2), 1], tree[floor(nr/2), 2], type = "n", xlim = 
                range(shade[, 1:2]), ylim = range(shade[, 3]), log = "y", xlab
                 = xlb, ylab = "h", main = maintit, col=cols[6])
        h <- unique(shade[, 3])
        lh <- length(h)
        logsep <- log(h[2]) - log(h[1])
        logsep2 <- logsep/2
#	factor <- sqrt(h[1]/h[2])
#      	polygon(c(min(shade[, 1]), rep(max(shade[, 2]), 2), min(shade[, 1])),
#                c(rep(min(shade[, 3])/factor, 2), rep(max(shade[, 3]) * factor,
#                2)), border = F, col = cols[5])

      	polygon(c(min(shade[, 1]), rep(max(shade[, 2]), 2), min(shade[, 1])),
                c(rep(min(shade[, 3]), 2), rep(max(shade[, 3]),
                2)), border = F, col = cols[5])
 
	s1 <- (1:nrow(shade))[shade[,4] > 0 & shade[,5] > 0]
        x1 <- cbind(shade[s1, 1], shade[s1, 2], shade[shade[s1,5], 2], 
		shade[shade[s1,4], 1], rep(NA,length(s1)))
	y1 <- cbind(shade[s1,3], shade[s1,3], shade[shade[s1,5],3],
		shade[shade[s1,4],3], rep(NA,length(s1)))
        x2 <- as.vector(t(x1))
        y2 <- as.vector(t(y1))
        polygon(x2, y2, border = F, col = cols[3])

	s1 <- (1:nrow(shade))[shade[,4] > 0 & shade[,5] == 0]
        x1 <- cbind(shade[s1, 1], shade[s1, 2], 
		(shade[s1,2] + shade[s1+1,1])/2,
		shade[shade[s1,4], 1], rep(NA,length(s1)))
	y1 <- cbind(shade[s1,3], shade[s1,3], shade[shade[s1,4],3],
		shade[shade[s1,4],3], rep(NA,length(s1)))
        x2 <- as.vector(t(x1))
        y2 <- as.vector(t(y1))
        polygon(x2, y2, border = F, col = cols[3])

	s1 <- (1:nrow(shade))[shade[,4] == 0 & shade[,5] > 0]
        x1 <- cbind(shade[s1, 1], shade[s1, 2], shade[shade[s1,5],2],
		(shade[s1,1] + shade[s1-1,2])/2, rep(NA,length(s1)))
	y1 <- cbind(shade[s1,3], shade[s1,3], shade[shade[s1,5],3],
		shade[shade[s1,5],3], rep(NA,length(s1)))
        x2 <- as.vector(t(x1))
        y2 <- as.vector(t(y1))
        polygon(x2, y2, border = F, col = cols[3])

        if(wd == 0) {
                gt <- (1:nrow(tree))[tree[, 4] < 0]
                mt <- tree[gt, 3]
                ms <- rep(0, length(mt))
                for(i in 1:length(mt)) {
                        ms[i] <- (1:nrow(shade))[shade[, 3] == tree[mt[i],
                                2] & shade[, 1] < tree[mt[i], 1] & shade[,
                                2] > tree[mt[i], 1]]
                }
                wd <- min((shade[ms, 2] - tree[mt, 1])/abs(tree[mt, 4]), (
                        tree[mt, 1] - shade[ms, 1])/abs(tree[mt, 4])) * .9
        }
        gt <- (1:nrow(tree))[tree[, 4] > 0 & tree[, 3] > 0]
        nt <- length(gt)
        x1 <- cbind(tree[gt, 1] - wd * abs(tree[gt, 4]), tree[gt, 1] + wd * abs(
                tree[gt, 4]), tree[tree[gt, 3], 1] + wd * abs(tree[tree[gt, 3],
                4]), tree[tree[gt, 3], 1] - wd * abs(tree[tree[gt, 3], 4]), rep(
                NA, nt))
        y1 <- cbind(tree[gt, 2], tree[gt, 2], tree[tree[gt, 3], 2], tree[tree[
                gt, 3], 2], rep(NA, nt))
        x2 <- as.vector(t(x1))
        y2 <- as.vector(t(y1))
        polygon(x2, y2, border = F, col = cols[1])
        t2 <- (1:nrow(tree))[tree[, 4] < 0]
        gt <- tree[t2, 3]
        nt <- length(gt)
        x1 <- cbind(tree[gt, 1] - wd * abs(tree[tree[gt, 3], 4]), tree[gt, 1] +
                wd * abs(tree[tree[gt, 3], 4]), tree[tree[gt, 3], 1] + wd * abs(
                tree[tree[gt, 3], 4]), tree[tree[gt, 3], 1] - wd * abs(tree[
                tree[gt, 3], 4]), rep(NA, nt))
        y1 <- cbind(tree[gt, 2], tree[gt, 2], tree[tree[gt, 3], 2], tree[tree[
                gt, 3], 2], rep(NA, nt))
        x2 <- as.vector(t(x1))
        y2 <- as.vector(t(y1))
        polygon(x2, y2, border = F, col = cols[1])
        gt <- (1:nrow(tree))[tree[, 4] > 0 & tree[, 3] > 0]
        segments(tree[gt, 1], tree[gt, 2], tree[tree[gt, 3], 1], 
		tree[tree[gt, 3], 2], col=cols[2])

        segments(tree[t2, 1], tree[t2, 2], tree[tree[t2, 3], 1], tree[tree[
                t2, 3], 2], lty = 3, col=cols[2])

	if (amp > 0) {
		h<-unique(am[,2])
		h<-c(h,rep(0,ceiling(length(h)/amp)*amp-length(h)))
		h<-matrix(h,amp)
		goodh<-h[1,]

		goodam<-NULL
		for (i in 1:length(goodh))
			goodam<-rbind(goodam,am[am[,2]==goodh[i],])
	        points(goodam[, 1], goodam[, 2], pch = ".",col=cols[4])
	} else {
	        a1 <- (1:nrow(am))[am[, 4] == 1]
        	segments(am[a1, 1], am[a1, 2], am[am[a1, 3], 1], 
			am[am[a1, 3], 2],  col = cols[4], lty = -amp)
#	        a2 <- (1:nrow(am))[am[, 2] == am[nrow(am), 2]]
#        	segments(am[a2, 1], am[a2, 2], am[a2, 1], am[a2, 2]/factor, 
#			col = cols[4], lty = -amp)
	}
}

###############################################################################

bin1<-
function(x, ab = nicerange(x), nbin = 500)
# bin1 - bins a data set into nbin bins for use in nash1		      
# x - (unbinned) data; ab - range for bins; nbin - number of bins
# Copyright 1986, David W. Scott
{
        if(ab[1] >= ab[2])
                fatal("Interval vector has negative orientation")
        if(nbin <= 0)
                fatal("Number of bin intervals nonpositive")
        a <- .Fortran("bin1",
                as.double(x),
                as.integer(length(x)),
                as.double(ab),
                as.integer(nbin),
                bc = double(nbin),
                nskip = integer(1))
        return(list(bc = a$bc, ab = ab, nskip = a$nskip))
}

###############################################################################

nash1<-
function(bins, h)
# nash1 - approximates a kernel density estimate with normal kernel using     
#	an appropriately weighted average shifted histogram (ASH)             
# bins - binned data; h - bandwidth (s.d. of normal kernel)
# Copyright 1993, David W. Scott and Michael C. Minnotte
{
        if(h <= 0)
                stop("Smoothing parameter m nonpositive")
        bc <- bins$bc
        ab <- bins$ab
        nbin <- length(bc)
        a <- .Fortran("nash1",
                as.double(h),
                as.double(bc),
                as.integer(nbin),
                as.double(ab),
                t = double(nbin),
                f = double(nbin),
                work = double(nbin))
        return(list(x = a$t, y = a$f, ab = ab, h = h, n = sum(bc), nskip = bins
                $nskip))
}

###############################################################################

nicerange<-
function(x, beta = 0.2)
# nicerange - range of a data set extended on each side (default: 10%)	      
# x - unbinned data; beta - total fraction of data range to add (split between
# the two sides)
# Copyright 1986, David W. Scott
{
	ab <- range(x)	# interval surrounding data
	del <- ((ab[2] - ab[1]) * beta)/2
	return(c(ab + c(.Uminus(del), del)))
}

###############################################################################

matcher<-
function(alist, blist)
# matcher - matches elements from two lists as described in the appendix 
#	to Minnotte and Scott						 
# alist - first vector of sorted values; blist - second vector of sorted 
# values
# Copyright 1993, Michael C. Minnotte
{
        na <- length(alist)
        nb <- length(blist)
        alist <- c(alist, rep(0, 100 - na))
        blist <- c(blist, rep(0, 100 - nb))
        mout <- matrix(0, 100, 2)
        out <- .Fortran("matcher",
                as.double(alist),
                as.double(blist),
                as.integer(na),
                as.integer(nb),
                mout = as.integer(mout))
        return(matrix(out$mout, 100, 2))
}

###############################################################################

quadest<-
function(x, y)
# quadest - given three ordered pairs, finds the extremum of the interpolated 
#	parabola					
# x - 3-vector of x values; y - 3-vector of y values
# Copyright 1993, Michael C. Minnotte
{
	a <- ((y[1] - y[2])/(x[1] - x[2]) - (y[2] - y[3])/(x[2] - x[3]))/(
		x[1] - x[3])
	b <- (y[1] - y[2])/(x[1] - x[2]) - a * (x[1] + x[2])
	quad <-  - b/(2 * a)
	return(quad)
}

###############################################################################

findmodes<-
function(x, y)
# findmodes - given sequence of values representing a density function,       
# 	identify number of modes, locations of modes, and indices of modes    
#	and antimodes
# x - x-values returned by nash1; y - density values returned by nash1
# Copyright 1993, Michael C. Minnotte
{
        nbin <- length(x)
        out <- .Fortran("findmodes",
                as.double(y),
                as.double(x),
                as.integer(nbin),
                rm = double(100),
                mc = integer(100),
                ml = integer(100),
                mr = integer(100),
                nm = integer(1))
        return(nm = out$nm, modes = cbind(out$rm[1:out$nm], out$mc[1:out$nm]),
                am = cbind(out$ml[1:out$nm], out$mr[1:out$nm]))
}

###############################################################################

findbumps<-
function(x, y)
# findbumps - given sequence of values representing a density function,       #
# 	identify bumps (regions of negative second derivative)		      #
# x - x-values returned by nash1; y - density values returned by nash1
# Copyright 1993, Michael C. Minnotte
{
	n <- length(y)
	f2 <- c(1, y[1:(n - 2)] - 2 * y[2:(n - 1)] + y[3:n], 1)
	s <- ifelse(f2 < 0, -1, 1)
	s2 <- s[2:n] - s[1:(n - 1)]
	i1 <- (1:(n - 1))[s2 == -2] + 1
	i2 <- (1:(n - 1))[s2 == 2]
	return(cbind(x[i1], x[i2]))
}