The oem package provides estimation for various penalized linear models using the Orthogonalizing EM algorithm. Documentation for the package can be found here: oem site.
Install using the devtools package (RcppEigen must be installed first as well):
devtools::install_github("jaredhuling/oem")
or by cloning and building using R CMD INSTALL
To cite oem please use:
Xiong, S., Dai, B., Huling, J., Qian, P. Z. G. (2016) Orthogonalizing EM: A design-based least squares algorithm, Technometrics, Volume 58, Pages 285-293,
http://dx.doi.org/10.1080/00401706.2015.1054436.
Huling, J.D. and Chien, P. (2018), Fast Penalized Regression and Cross Validation for Tall Data with the OEM Package, Journal of Statistical Software, to appear, URL: https://arxiv.org/abs/1801.09661.
library(microbenchmark) library(glmnet) library(oem) # compute the full solution path, n > p set.seed(123) n <- 1000000 p <- 100 m <- 25 b <- matrix(c(runif(m), rep(0, p - m))) x <- matrix(rnorm(n * p, sd = 3), n, p) y <- drop(x %*% b) + rnorm(n) lambdas = oem(x, y, intercept = TRUE, standardize = FALSE, penalty = "elastic.net")$lambda[[1]] microbenchmark( "glmnet[lasso]" = {res1 <- glmnet(x, y, thresh = 1e-10, standardize = FALSE, intercept = TRUE, lambda = lambdas)}, "oem[lasso]" = {res2 <- oem(x, y, penalty = "elastic.net", intercept = TRUE, standardize = FALSE, lambda = lambdas, tol = 1e-10)}, times = 5 )
## Unit: seconds
## expr min lq mean median uq max neval
## glmnet[lasso] 7.610364 7.622585 7.879448 7.667767 7.945518 8.551005 5
## oem[lasso] 1.969916 2.027118 2.133341 2.089135 2.126875 2.453660 5
library(sparsenet) library(ncvreg) library(plus) # compute the full solution path, n > p set.seed(123) n <- 5000 p <- 200 m <- 25 b <- matrix(c(runif(m, -0.5, 0.5), rep(0, p - m))) x <- matrix(rnorm(n * p, sd = 3), n, p) y <- drop(x %*% b) + rnorm(n) mcp.lam <- oem(x, y, penalty = "mcp", gamma = 2, intercept = TRUE, standardize = TRUE, nlambda = 200, tol = 1e-10)$lambda[[1]] scad.lam <- oem(x, y, penalty = "scad", gamma = 4, intercept = TRUE, standardize = TRUE, nlambda = 200, tol = 1e-10)$lambda[[1]] microbenchmark( "sparsenet[mcp]" = {res1 <- sparsenet(x, y, thresh = 1e-10, gamma = c(2,3), #sparsenet throws an error #if you only fit 1 value of gamma nlambda = 200)}, "oem[mcp]" = {res2 <- oem(x, y, penalty = "mcp", gamma = 2, intercept = TRUE, standardize = TRUE, nlambda = 200, tol = 1e-10)}, "ncvreg[mcp]" = {res3 <- ncvreg(x, y, penalty = "MCP", gamma = 2, lambda = mcp.lam, eps = 1e-7)}, "plus[mcp]" = {res4 <- plus(x, y, method = "mc+", gamma = 2, lam = mcp.lam)}, "oem[scad]" = {res5 <- oem(x, y, penalty = "scad", gamma = 4, intercept = TRUE, standardize = TRUE, nlambda = 200, tol = 1e-10)}, "ncvreg[scad]" = {res6 <- ncvreg(x, y, penalty = "SCAD", gamma = 4, lambda = scad.lam, eps = 1e-7)}, "plus[scad]" = {res7 <- plus(x, y, method = "scad", gamma = 4, lam = scad.lam)}, times = 5 )
## Unit: milliseconds
## expr min lq mean median uq max
## sparsenet[mcp] 1762.3026 1779.0533 1907.9942 1871.4751 1954.0494 2173.091
## oem[mcp] 159.3148 159.6247 194.6605 160.0044 238.3018 256.057
## ncvreg[mcp] 7566.5792 7636.3529 7900.8602 7681.1292 7907.0934 8713.146
## plus[mcp] 1625.3785 1692.9125 1703.2951 1694.1239 1711.7150 1792.346
## oem[scad] 136.1331 136.3932 138.6294 137.1140 138.2907 145.216
## ncvreg[scad] 7485.8139 8060.6739 8534.4502 8388.1125 8779.2423 9958.408
## plus[scad] 1765.2935 1873.8984 2009.8369 1878.5176 2097.5155 2433.959
diffs <- array(NA, dim = c(4, 1)) colnames(diffs) <- "abs diff" rownames(diffs) <- c("MCP: oem and ncvreg", "SCAD: oem and ncvreg", "MCP: oem and plus", "SCAD: oem and plus") diffs[,1] <- c(max(abs(res2$beta[[1]] - res3$beta)), max(abs(res5$beta[[1]] - res6$beta)), max(abs(res2$beta[[1]][-1,1:nrow(res4$beta)] - t(res4$beta))), max(abs(res5$beta[[1]][-1,1:nrow(res7$beta)] - t(res7$beta)))) diffs
In addition to the group lasso, the oem package offers computation for the group MCP, group SCAD, and group sparse lasso penalties. All aforementioned penalties can also be augmented with a ridge penalty.
library(gglasso) library(grpreg) library(grplasso) # compute the full solution path, n > p set.seed(123) n <- 10000 p <- 200 m <- 25 b <- matrix(c(runif(m, -0.5, 0.5), rep(0, p - m))) x <- matrix(rnorm(n * p, sd = 3), n, p) y <- drop(x %*% b) + rnorm(n) groups <- rep(1:floor(p/10), each = 10) grp.lam <- oem(x, y, penalty = "grp.lasso", groups = groups, nlambda = 100, tol = 1e-10)$lambda[[1]] microbenchmark( "gglasso[grp.lasso]" = {res1 <- gglasso(x, y, group = groups, lambda = grp.lam, intercept = FALSE, eps = 1e-8)}, "oem[grp.lasso]" = {res2 <- oem(x, y, groups = groups, intercept = FALSE, standardize = FALSE, penalty = "grp.lasso", lambda = grp.lam, tol = 1e-10)}, "grplasso[grp.lasso]" = {res3 <- grplasso(x=x, y=y, index = groups, standardize = FALSE, center = FALSE, model = LinReg(), lambda = grp.lam * n * 2, control = grpl.control(trace = 0, tol = 1e-10))}, "grpreg[grp.lasso]" = {res4 <- grpreg(x, y, group = groups, eps = 1e-10, lambda = grp.lam)}, times = 5 )
## Unit: milliseconds
## expr min lq mean median uq
## gglasso[grp.lasso] 3483.62353 3529.320 3601.1492 3600.3122 3675.7521
## oem[grp.lasso] 99.62382 100.823 107.9303 106.6158 114.8208
## grplasso[grp.lasso] 7105.62106 7409.959 7835.5972 7836.2535 7977.5347
## grpreg[grp.lasso] 1972.84562 2013.477 2132.7026 2015.0525 2149.0820
## max neval
## 3716.7380 5
## 117.7679 5
## 8848.6178 5
## 2513.0563 5
diffs <- array(NA, dim = c(2, 1)) colnames(diffs) <- "abs diff" rownames(diffs) <- c("oem and gglasso", "oem and grplasso") diffs[,1] <- c( max(abs(res2$beta[[1]][-1,] - res1$beta)), max(abs(res2$beta[[1]][-1,] - res3$coefficients)) ) diffs
set.seed(123) n <- 500000 p <- 200 m <- 25 b <- matrix(c(runif(m, -0.5, 0.5), rep(0, p - m))) x <- matrix(rnorm(n * p, sd = 3), n, p) y <- drop(x %*% b) + rnorm(n) groups <- rep(1:floor(p/10), each = 10) # fit all group penalties at once grp.penalties <- c("grp.lasso", "grp.mcp", "grp.scad", "grp.mcp.net", "grp.scad.net", "sparse.group.lasso") system.time(res <- oem(x, y, penalty = grp.penalties, groups = groups, alpha = 0.25, # mixing param for l2 penalties tau = 0.5)) # mixing param for sparse grp lasso
The oem algorithm is quite efficient at fitting multiple penalties simultaneously when p is not too big.
set.seed(123) n <- 100000 p <- 100 m <- 15 b <- matrix(c(runif(m, -0.25, 0.25), rep(0, p - m))) x <- matrix(rnorm(n * p, sd = 3), n, p) y <- drop(x %*% b) + rnorm(n) microbenchmark( "oem[lasso]" = {res1 <- oem(x, y, penalty = "elastic.net", intercept = TRUE, standardize = TRUE, tol = 1e-10)}, "oem[lasso/mcp/scad/ols]" = {res2 <- oem(x, y, penalty = c("elastic.net", "mcp", "scad", "grp.lasso", "grp.mcp", "sparse.grp.lasso", "grp.mcp.net", "mcp.net"), gamma = 4, tau = 0.5, alpha = 0.25, groups = rep(1:10, each = 10), intercept = TRUE, standardize = TRUE, tol = 1e-10)}, times = 5 )
## Unit: milliseconds
## expr min lq mean median uq
## oem[lasso] 214.3171 218.2459 225.8759 219.6271 226.8682
## oem[lasso/mcp/scad/ols] 253.8738 255.8601 279.3674 272.4458 276.6489
## max neval
## 250.3211 5
## 338.0085 5
#png("../mcp_path.png", width = 3000, height = 3000, res = 400);par(mar=c(5.1,5.1,4.1,2.1));plot(res2, which.model = 2, main = "mcp",lwd = 3,cex.axis=2.0, cex.lab=2.0, cex.main=2.0, cex.sub=2.0);dev.off() # layout(matrix(1:4, ncol=2, byrow = TRUE)) plot(res2, which.model = 1, lwd = 2, xvar = "lambda") plot(res2, which.model = 2, lwd = 2, xvar = "lambda") plot(res2, which.model = 4, lwd = 2, xvar = "lambda") plot(res2, which.model = 7, lwd = 2, xvar = "lambda")