# R functions by (or adapted by) Kyle Gorman <kgorman@ling.upenn.edu>

goodman <- function(x, y) {
    require(Hmisc)
    return(as.numeric(rcorr.cens(x, y, outx=TRUE)['Dxy']))
    detach('package:Hmisc')
}

standardize <- function(x) {
    # standardize the vector
    return((x - mean(x)) / (2 * sd(x)))
}

drop <- function(dat) {
    # given a subset of a data frame, remove all the empty levels
    # based on code by Florian Jaeger
    dat[] <- lapply(dat, function(x) x[, drop=1])
    return(dat)
}

fc.switch <- function() {
    # switches coding of factors between sum and treatment coding
    if(getOption('contrasts')[1] == 'contr.treatment') {
        options(contrasts = c('contr.sum', 'contr.poly'))
        print(noquote('unordered factors set to sum (contrast) coding'))
    }
    else {
        options(contrasts = c('contr.treatment', 'contr.poly'))
        print(noquote('unordered factors set to treatment coding'))
    }
}

degree2rad <- function(x) {
    # convert degrees to radians
  return(x * pi / 180)
}

haversine <- function(lata, lnga, latb, lngb) {
    # compute great-circle distance (in Km) between two points on earth.
    # based on: http://www.movable-type.co.uk/scripts/latlong.html
    cLng <- cos(rad(lngb) - rad(lnga))
    latA <- rad(lata)
    latB <- rad(latb)
    return(acos(sin(latA) * sin(latB) + cos(latA) * cos(latB) * cLng) * 6371)
}

hz2mel <- function(f) {
    # convert Hertz to Mels 
    return(1127.01048 * log(1 + f / 700.))
}

mel2hz <- function(f) {
    # convert Mels to Hertz
    return(700. * (exp(m / 1127.01048) - 1.))
}

pdf2cdf <- function(x) {
    # make a sorted PDF into a sorted CDF
	s <- 0 # sum
	i <- 0 # counter
	for (i in 1:length(x)) {
		x[i] <- x[i] + s
		s <- x[i]
	}
	return(x)
} 

ran.outliers <- function(model, p = .05) {   
    require(arm)
    # identifies random outliers given a p-criterion, from an lmer/lmer2 object
    est <- ranef(model)
    se <- se.ranef(model)
    groupings <- names(est)
    i <- 1 # keep track of where we are grouping
    for(zest in est) { # over groupings
        j <- 1 # keep track of where we are in se's x-given-group
        for(effect in names(zest)) { # over effects in grouping
            k <- 1
            levels <- row.names(zest[effect])    
            print(noquote(paste(effect, groupings[i], sep='|')))
            for (val in zest[effect][,]) {
                pval <- pnorm(val, 0, se[[i]][k, j])
                if (pval < p) { # signif
                    print(noquote(paste('   ', levels[k], val, pval)))
                }
                k <- k + 1
            }
            j <- j + 1
        }
        i <- i + 1
    }
}

tolerance <- function(x, y) {
    # computes Tolerance (after Yang 2005)
    z <- x + y
    return((z / log(z)) > y)
}

height <- function(f1, f2) {
    # computes height (after Labov 1994, p. 458)
    return(sqrt(abs((f2 * f2) - (2 * f1) ^ 2)))
}

peripherality <- function(f2) {
    # computes peripherality (after Labov 1994, p. 458)
    return(sqrt((f2 * f2) + (2 * f2) ^ 2))
}

Zr.nr <- function(r, nr) {
    # compute a smoothed freq distribution using Z_r statistic
    zro <- nr
    zro[1] <- zro[1] / (r[2] - r[1])
    L <- length(nr)
    i <- 2
    while (i < L) {
        zro[i] <- 2 * zro[i] / (r[i + 1] - r[i - 1])
        i <- i + 1
    }
    zro[L] <- zro[L] / (r[L] - r[L - 1])
    return(zro)
}

Zr.f <- function(f) {
    # f is a bowl of frequencies, returns a data frame you can plot
    q  <- rle(sort(f))
    r  <- q$values
    nr <- q$lengths
    Zr <- Zr.nr(r, nr)
    return(data.frame(r, nr, Zr))
}

freq2hit <- function(x) {
    # turn a frequency spectrum into a list of frequency hits
    hitz <- c()
    cour <- 1
    for (i in x) {
        hitz <- c(hitz, rep(cour, i))
        cour <- cour + 1
    }
    return(hitz)
}

spc.frame <- function(observed, expected, m.max=15) {
    # given observed (.spc) and expected (.fzm.spc), creates an data frame
    observed.Vm <- freq2hit(Vm(observed, 1:m.max))
    expected.Vm <- freq2hit(Vm(expected, 1:m.max))
    freq <- c(observed.Vm, expected.Vm)
    Type <- c(rep('Observed', length(observed.Vm)))
    Type <- factor(c(Type, rep('Expected', length(expected.Vm))), levels=c('Observed', 'Expected'))
    return(data.frame(freq, Type))
}

hypot <- function(x, y) {
    # hypotenuse length function that can never overflow, based on libc 
    # function of the same name.
    if (y > x) {
        temp = y
        y = x
        x = temp
    }
    return(abs(x) * sqrt(1. + (y / x) ^ 2))
}

sum.coef <- function(model) {
    # function for computing log-odds. ASSUMES SUM CODING
    if (class(model) %in% c('lme', 'mer')) coefs <- fixef(model)
    else coefs <- coef(model)
    new.names <- vector()
    new.values <- vector()
    n <- names(coefs)
    h <- gsub("\\d", "", n)
    for (hh in unique(h)) {
        new.names <- c(new.names, n[h==hh])
        new.values <- c(new.values, (x <- coefs[h==hh]))
        if (hh == "(Intercept)") next
        new.names <- c(new.names,paste(hh, 0, sep=""))
        new.values <- c(new.values, -sum(x))
    }
    names(new.values) <- new.names
    return(new.values)
}

sf <- function(y) {
    # function for generating proportional odds assumption table, based on 
    # http://www.ats.ucla.edu/stat/r/dae/ologit.htm
    return(c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)),
      'Y>=2'=qlogis(mean(y >= 2))))
}

IR.scores <- function(tab) {
    # print out classic IR scores
    tp  <- tab[2, 2]
    fp  <- tab[1, 2]
    fn  <- tab[2, 1]
    tn  <- tab[1, 1]
    b   <- max(sum(tab[,1]), sum(tab[, 2])) / sum(tab)
    p   <- tp / (tp + fp)
    r   <- tp / (tp + fn)
    f   <- fp / (tn + fp)
    acc <- (tp + tn) / (tp + tn + fp + fn)
    F1  <- (2. * p * r) / (p + r)
    cat(paste('baseline  = ', format(b, digits=3)), '\n')
    cat(paste('accuracy  = ', format(acc, digits=3)), '\n')
    cat(paste('precision = ', format(p, digits=3)), '\n')
    cat(paste('recall    = ', format(hr, digits=3)), '\n')
    cat(paste('F1        = ', format(F1, digits=3)), '\n')
    cat(paste('MCC       = ', format(MCC(tp, tn, fp, fn), digits=3)), '\n')
    cat(paste('Aprime    = ', format(a.prime(r, f), digits=3)), '\n')
    cat(paste('Bprime    = ', format(b.prime(r, f), digits=3)), '\n')
}

MCC <- function(tp, tn, fp, fn) {
    # compute Matthews correlation coefficient
    prod <- log2(tp + fp) + log2(tp + fn) + log2(tn + fp) + log2(tn + fn)
    return((tp * tn - fp * fn) / sqrt(2 ^ prod))
}

a.prime <- function(hr, fr) {
    # compute A' discriminability statistic (after Grier 1970)
    if (fr > hr) {
        tp <- hr
        hr <- fr
        fr <- tp
   }
   return(.5 + ((hr - fr) * (1. + hr - fr) / (4. * hr * (1. - fr))))
}

b.prime <- function(hr, fr) {
    # compute B' bias stat (after Donaldson 1992)
    if (fr > hr) {
        tp <- hr
        hr <- fr
        fr <- tp
    }
    prod <- hr * fr
    return(((1. - hr) * (1. - fr) - prod) / ((1. - hr) * (1. - fr) + prod))
}

d.prime <- function(hr, fr) {
    # print D' detection statistic (after Macmillan & Creelman 2005)
    if (fr > hr) {
        tp <- hr
        hr <- fr
        fr <- tp
    }
    return(qnorm(hr, 0., 1.) - qnorm(fr, 0., 1.))
}

logx <- function() {
    require(scales)
    return(scale_x_continuous(trans=log10_trans(), breaks=trans_breaks('log10', function(x) 10^x), labels=trans_format('log10', math_format(10^.x))))
}

logy <- function() {
    require(scales)
    return(scale_y_continuous(trans=log10_trans(), breaks=trans_breaks('log10', function(x) 10^x), labels=trans_format('log10', math_format(10^.x))))
}