Orthogonalizing EM: Penalized Regression for Big Tall Data • oem

Introduction

The oem package provides estimation for various penalized linear models using the Orthogonalizing EM algorithm. Documentation for the package can be found here: oem site.

Install using the devtools package (RcppEigen must be installed first as well):

devtools::install_github("jaredhuling/oem")

or by cloning and building using R CMD INSTALL

Citation

To cite oem please use:

Xiong, S., Dai, B., Huling, J., Qian, P. Z. G. (2016) Orthogonalizing EM: A design-based least squares algorithm, Technometrics, Volume 58, Pages 285-293,
http://dx.doi.org/10.1080/00401706.2015.1054436.

Huling, J.D. and Chien, P. (2018), Fast Penalized Regression and Cross Validation for Tall Data with the OEM Package, Journal of Statistical Software, to appear, URL: https://arxiv.org/abs/1801.09661.

Penalties

Lasso

library(microbenchmark)
library(glmnet)
library(oem)
# compute the full solution path, n > p
set.seed(123)
n <- 1000000
p <- 100
m <- 25
b <- matrix(c(runif(m), rep(0, p - m)))
x <- matrix(rnorm(n * p, sd = 3), n, p)
y <- drop(x %*% b) + rnorm(n)

lambdas = oem(x, y, intercept = TRUE, standardize = FALSE, penalty = "elastic.net")$lambda[[1]]

microbenchmark(
    "glmnet[lasso]" = {res1 <- glmnet(x, y, thresh = 1e-10,
                                      standardize = FALSE,
                                      intercept = TRUE,
                                      lambda = lambdas)},
    "oem[lasso]"    = {res2 <- oem(x, y,
                                   penalty = "elastic.net",
                                   intercept = TRUE,
                                   standardize = FALSE,
                                   lambda = lambdas,
                                   tol = 1e-10)},
    times = 5
)

## Unit: seconds
##           expr      min       lq     mean   median       uq      max neval
##  glmnet[lasso] 7.610364 7.622585 7.879448 7.667767 7.945518 8.551005     5
##     oem[lasso] 1.969916 2.027118 2.133341 2.089135 2.126875 2.453660     5

# difference of results
max(abs(coef(res1) - res2$beta[[1]]))

## [1] 1.048072e-07

res1 <- glmnet(x, y, thresh = 1e-12,
               standardize = FALSE,
               intercept = TRUE,
               lambda = lambdas)

# answers are now more close once we require more precise glmnet solutions
max(abs(coef(res1) - res2$beta[[1]]))

## [1] 3.763397e-09

Nonconvex Penalties

library(sparsenet)
library(ncvreg)
library(plus)
# compute the full solution path, n > p
set.seed(123)
n <- 5000
p <- 200
m <- 25
b <- matrix(c(runif(m, -0.5, 0.5), rep(0, p - m)))
x <- matrix(rnorm(n * p, sd = 3), n, p)
y <- drop(x %*% b) + rnorm(n)

mcp.lam <- oem(x, y, penalty = "mcp",
               gamma = 2, intercept = TRUE,
               standardize = TRUE,
               nlambda = 200, tol = 1e-10)$lambda[[1]]

scad.lam <- oem(x, y, penalty = "scad",
               gamma = 4, intercept = TRUE,
               standardize = TRUE,
               nlambda = 200, tol = 1e-10)$lambda[[1]]

microbenchmark(
    "sparsenet[mcp]" = {res1 <- sparsenet(x, y, thresh = 1e-10,
                                          gamma = c(2,3), #sparsenet throws an error 
                                                          #if you only fit 1 value of gamma
                                          nlambda = 200)},
    "oem[mcp]"    = {res2 <- oem(x, y,
                                 penalty = "mcp",
                                 gamma = 2,
                                 intercept = TRUE,
                                 standardize = TRUE,
                                 nlambda = 200,
                                 tol = 1e-10)},
    "ncvreg[mcp]"    = {res3 <- ncvreg(x, y,
                                   penalty = "MCP",
                                   gamma = 2,
                                   lambda = mcp.lam,
                                   eps = 1e-7)},
    "plus[mcp]"    = {res4 <- plus(x, y,
                                   method = "mc+",
                                   gamma = 2,
                                   lam = mcp.lam)},
    "oem[scad]"    = {res5 <- oem(x, y,
                                 penalty = "scad",
                                 gamma = 4,
                                 intercept = TRUE,
                                 standardize = TRUE,
                                 nlambda = 200,
                                 tol = 1e-10)},
    "ncvreg[scad]"    = {res6 <- ncvreg(x, y,
                                   penalty = "SCAD",
                                   gamma = 4,
                                   lambda = scad.lam,
                                   eps = 1e-7)},
    "plus[scad]"    = {res7 <- plus(x, y,
                                   method = "scad",
                                   gamma = 4,
                                   lam = scad.lam)},
    times = 5
)

## Unit: milliseconds
##            expr       min        lq      mean    median        uq      max
##  sparsenet[mcp] 1762.3026 1779.0533 1907.9942 1871.4751 1954.0494 2173.091
##        oem[mcp]  159.3148  159.6247  194.6605  160.0044  238.3018  256.057
##     ncvreg[mcp] 7566.5792 7636.3529 7900.8602 7681.1292 7907.0934 8713.146
##       plus[mcp] 1625.3785 1692.9125 1703.2951 1694.1239 1711.7150 1792.346
##       oem[scad]  136.1331  136.3932  138.6294  137.1140  138.2907  145.216
##    ncvreg[scad] 7485.8139 8060.6739 8534.4502 8388.1125 8779.2423 9958.408
##      plus[scad] 1765.2935 1873.8984 2009.8369 1878.5176 2097.5155 2433.959

diffs <- array(NA, dim = c(4, 1))
colnames(diffs) <- "abs diff"
rownames(diffs) <- c("MCP:  oem and ncvreg", "SCAD: oem and ncvreg",
                     "MCP:  oem and plus", "SCAD: oem and plus")
diffs[,1] <- c(max(abs(res2$beta[[1]] - res3$beta)), max(abs(res5$beta[[1]] - res6$beta)),
               max(abs(res2$beta[[1]][-1,1:nrow(res4$beta)] - t(res4$beta))),
               max(abs(res5$beta[[1]][-1,1:nrow(res7$beta)] - t(res7$beta))))
diffs

##                          abs diff
## MCP:  oem and ncvreg 1.725859e-07
## SCAD: oem and ncvreg 5.094648e-08
## MCP:  oem and plus   2.684136e-11
## SCAD: oem and plus   1.732409e-11

Group Penalties

In addition to the group lasso, the oem package offers computation for the group MCP, group SCAD, and group sparse lasso penalties. All aforementioned penalties can also be augmented with a ridge penalty.

library(gglasso)
library(grpreg)
library(grplasso)
# compute the full solution path, n > p
set.seed(123)
n <- 10000
p <- 200
m <- 25
b <- matrix(c(runif(m, -0.5, 0.5), rep(0, p - m)))
x <- matrix(rnorm(n * p, sd = 3), n, p)
y <- drop(x %*% b) + rnorm(n)
groups <- rep(1:floor(p/10), each = 10)

grp.lam <- oem(x, y, penalty = "grp.lasso",
               groups = groups,
               nlambda = 100, tol = 1e-10)$lambda[[1]]


microbenchmark(
    "gglasso[grp.lasso]" = {res1 <- gglasso(x, y, group = groups,
                                            lambda = grp.lam,
                                            intercept = FALSE,
                                            eps = 1e-8)},
    "oem[grp.lasso]"    = {res2 <- oem(x, y,
                                       groups = groups,
                                       intercept = FALSE,
                                       standardize = FALSE,
                                       penalty = "grp.lasso",
                                       lambda = grp.lam,
                                       tol = 1e-10)},
    "grplasso[grp.lasso]"    = {res3 <- grplasso(x=x, y=y,
                                                 index = groups,
                                                 standardize = FALSE,
                                                 center = FALSE, model = LinReg(),
                                                 lambda = grp.lam * n * 2,
                                                 control = grpl.control(trace = 0, tol = 1e-10))},
    "grpreg[grp.lasso]"    = {res4 <- grpreg(x, y, group = groups,
                                             eps = 1e-10, lambda = grp.lam)},
    times = 5
)

## Unit: milliseconds
##                 expr        min       lq      mean    median        uq
##   gglasso[grp.lasso] 3483.62353 3529.320 3601.1492 3600.3122 3675.7521
##       oem[grp.lasso]   99.62382  100.823  107.9303  106.6158  114.8208
##  grplasso[grp.lasso] 7105.62106 7409.959 7835.5972 7836.2535 7977.5347
##    grpreg[grp.lasso] 1972.84562 2013.477 2132.7026 2015.0525 2149.0820
##        max neval
##  3716.7380     5
##   117.7679     5
##  8848.6178     5
##  2513.0563     5

diffs <- array(NA, dim = c(2, 1))
colnames(diffs) <- "abs diff"
rownames(diffs) <- c("oem and gglasso", "oem and grplasso")
diffs[,1] <- c(  max(abs(res2$beta[[1]][-1,] - res1$beta)), max(abs(res2$beta[[1]][-1,] - res3$coefficients))  )
diffs

##                      abs diff
## oem and gglasso  1.303906e-06
## oem and grplasso 1.645871e-08

Bigger Group Lasso Example

set.seed(123)
n <- 500000
p <- 200
m <- 25
b <- matrix(c(runif(m, -0.5, 0.5), rep(0, p - m)))
x <- matrix(rnorm(n * p, sd = 3), n, p)
y <- drop(x %*% b) + rnorm(n)
groups <- rep(1:floor(p/10), each = 10)

# fit all group penalties at once
grp.penalties <- c("grp.lasso", "grp.mcp", "grp.scad",
                   "grp.mcp.net", "grp.scad.net",
                   "sparse.group.lasso")
system.time(res <- oem(x, y,
                       penalty = grp.penalties,
                       groups  = groups,
                       alpha   = 0.25, # mixing param for l2 penalties
                       tau     = 0.5)) # mixing param for sparse grp lasso

##    user  system elapsed 
##    3.23    0.17    3.46

Fitting Multiple Penalties

The oem algorithm is quite efficient at fitting multiple penalties simultaneously when p is not too big.

set.seed(123)
n <- 100000
p <- 100
m <- 15
b <- matrix(c(runif(m, -0.25, 0.25), rep(0, p - m)))
x <- matrix(rnorm(n * p, sd = 3), n, p)
y <- drop(x %*% b) + rnorm(n)

microbenchmark(
    "oem[lasso]"    = {res1 <- oem(x, y,
                                   penalty = "elastic.net",
                                   intercept = TRUE,
                                   standardize = TRUE,
                                   tol = 1e-10)},
    "oem[lasso/mcp/scad/ols]"    = {res2 <- oem(x, y,
                                   penalty = c("elastic.net", "mcp",
                                               "scad", "grp.lasso",
                                               "grp.mcp", "sparse.grp.lasso",
                                               "grp.mcp.net", "mcp.net"),
                                   gamma = 4,
                                   tau = 0.5,
                                   alpha = 0.25,
                                   groups = rep(1:10, each = 10),
                                   intercept = TRUE,
                                   standardize = TRUE,
                                   tol = 1e-10)},
    times = 5
)

## Unit: milliseconds
##                     expr      min       lq     mean   median       uq
##               oem[lasso] 214.3171 218.2459 225.8759 219.6271 226.8682
##  oem[lasso/mcp/scad/ols] 253.8738 255.8601 279.3674 272.4458 276.6489
##       max neval
##  250.3211     5
##  338.0085     5

#png("../mcp_path.png", width = 3000, height = 3000, res = 400);par(mar=c(5.1,5.1,4.1,2.1));plot(res2, which.model = 2, main = "mcp",lwd = 3,cex.axis=2.0, cex.lab=2.0, cex.main=2.0, cex.sub=2.0);dev.off()
#

layout(matrix(1:4, ncol=2, byrow = TRUE))
plot(res2, which.model = 1, lwd = 2,
     xvar = "lambda")
plot(res2, which.model = 2, lwd = 2,
     xvar = "lambda")
plot(res2, which.model = 4, lwd = 2,
     xvar = "lambda")
plot(res2, which.model = 7, lwd = 2,
     xvar = "lambda")

Introduction

Citation

Penalties

Lasso

Nonconvex Penalties

Group Penalties

Bigger Group Lasso Example

Fitting Multiple Penalties

Links

License

Citation

Developers

Dev status