## Introduction

The oem package provides estimaton for various penalized linear models using the Orthogonalizing EM algorithm. Documentation for the package can be found here: oem site.

Install using the devtools package (RcppEigen must be installed first as well):

devtools::install_github("jaredhuling/oem")

or by cloning and building using R CMD INSTALL

## Citation

To cite oem please use:

Xiong, S., Dai, B., Huling, J., Qian, P. Z. G. (2016) Orthogonalizing EM: A design-based least squares algorithm, Technometrics, Volume 58, Pages 285-293,
http://dx.doi.org/10.1080/00401706.2015.1054436.

Huling, J.D. and Chien, P. (2018), Fast Penalized Regression and Cross Validation for Tall Data with the OEM Package, Journal of Statistical Software, to appear, URL: https://arxiv.org/abs/1801.09661.

## Penalties

### Lasso

library(microbenchmark)
library(glmnet)
library(oem)
# compute the full solution path, n > p
set.seed(123)
n <- 1000000
p <- 100
m <- 25
b <- matrix(c(runif(m), rep(0, p - m)))
x <- matrix(rnorm(n * p, sd = 3), n, p)
y <- drop(x %*% b) + rnorm(n)

lambdas = oem(x, y, intercept = TRUE, standardize = FALSE, penalty = "elastic.net")$lambda[] microbenchmark( "glmnet[lasso]" = {res1 <- glmnet(x, y, thresh = 1e-10, standardize = FALSE, intercept = TRUE, lambda = lambdas)}, "oem[lasso]" = {res2 <- oem(x, y, penalty = "elastic.net", intercept = TRUE, standardize = FALSE, lambda = lambdas, tol = 1e-10)}, times = 5 ) ## Unit: seconds ## expr min lq mean median uq max ## glmnet[lasso] 9.348672 9.38828 11.638270 12.080791 13.265859 14.10775 ## oem[lasso] 2.652288 3.10888 3.872852 3.451426 4.429786 5.72188 ## neval ## 5 ## 5 # difference of results max(abs(coef(res1) - res2$beta[]))
##  1.048072e-07
res1 <- glmnet(x, y, thresh = 1e-12,
standardize = FALSE,
intercept = TRUE,
lambda = lambdas)

# answers are now more close once we require more precise glmnet solutions
max(abs(coef(res1) - res2$beta[])) ##  3.763397e-09 ### Nonconvex Penalties library(sparsenet) library(ncvreg) library(plus) # compute the full solution path, n > p set.seed(123) n <- 5000 p <- 200 m <- 25 b <- matrix(c(runif(m, -0.5, 0.5), rep(0, p - m))) x <- matrix(rnorm(n * p, sd = 3), n, p) y <- drop(x %*% b) + rnorm(n) mcp.lam <- oem(x, y, penalty = "mcp", gamma = 2, intercept = TRUE, standardize = TRUE, nlambda = 200, tol = 1e-10)$lambda[]

scad.lam <- oem(x, y, penalty = "scad",
gamma = 4, intercept = TRUE,
standardize = TRUE,
nlambda = 200, tol = 1e-10)$lambda[] microbenchmark( "sparsenet[mcp]" = {res1 <- sparsenet(x, y, thresh = 1e-10, gamma = c(2,3), #sparsenet throws an error #if you only fit 1 value of gamma nlambda = 200)}, "oem[mcp]" = {res2 <- oem(x, y, penalty = "mcp", gamma = 2, intercept = TRUE, standardize = TRUE, nlambda = 200, tol = 1e-10)}, "ncvreg[mcp]" = {res3 <- ncvreg(x, y, penalty = "MCP", gamma = 2, lambda = mcp.lam, eps = 1e-7)}, "plus[mcp]" = {res4 <- plus(x, y, method = "mc+", gamma = 2, lam = mcp.lam)}, "oem[scad]" = {res5 <- oem(x, y, penalty = "scad", gamma = 4, intercept = TRUE, standardize = TRUE, nlambda = 200, tol = 1e-10)}, "ncvreg[scad]" = {res6 <- ncvreg(x, y, penalty = "SCAD", gamma = 4, lambda = scad.lam, eps = 1e-7)}, "plus[scad]" = {res7 <- plus(x, y, method = "scad", gamma = 4, lam = scad.lam)}, times = 5 ) ## Warning message: some lam not reached by the plus path and dropped ## Warning message: some lam not reached by the plus path and dropped ## Warning message: some lam not reached by the plus path and dropped ## Warning message: some lam not reached by the plus path and dropped ## Warning message: some lam not reached by the plus path and dropped ## Warning message: some lam not reached by the plus path and dropped ## Warning message: some lam not reached by the plus path and dropped ## Warning message: some lam not reached by the plus path and dropped ## Warning message: some lam not reached by the plus path and dropped ## Warning message: some lam not reached by the plus path and dropped ## Unit: milliseconds ## expr min lq mean median uq ## sparsenet[mcp] 2190.2603 2229.8939 2352.8026 2257.4280 2464.9714 ## oem[mcp] 189.4601 200.9284 208.1295 207.6121 212.3561 ## ncvreg[mcp] 8678.2871 9032.7411 9155.7129 9057.6076 9260.1797 ## plus[mcp] 2147.4707 2149.8849 2183.5159 2169.1367 2187.5362 ## oem[scad] 159.3547 167.8190 181.3854 171.0642 182.4713 ## ncvreg[scad] 9079.8976 9269.4556 9311.4232 9332.3771 9405.5469 ## plus[scad] 2611.3499 2790.3159 2934.2532 2967.3169 3097.7322 ## max neval ## 2621.4595 5 ## 230.2906 5 ## 9749.7491 5 ## 2263.5511 5 ## 226.2177 5 ## 9469.8389 5 ## 3204.5509 5 diffs <- array(NA, dim = c(4, 1)) colnames(diffs) <- "abs diff" rownames(diffs) <- c("MCP: oem and ncvreg", "SCAD: oem and ncvreg", "MCP: oem and plus", "SCAD: oem and plus") diffs[,1] <- c(max(abs(res2$beta[] - res3$beta)), max(abs(res5$beta[] - res6$beta)), max(abs(res2$beta[][-1,1:nrow(res4$beta)] - t(res4$beta))),
max(abs(res5$beta[][-1,1:nrow(res7$beta)] - t(res7$beta)))) diffs ## abs diff ## MCP: oem and ncvreg 1.725859e-07 ## SCAD: oem and ncvreg 5.094648e-08 ## MCP: oem and plus 2.684136e-11 ## SCAD: oem and plus 1.732409e-11 ### Group Penalties In addition to the group lasso, the oem package offers computation for the group MCP, group SCAD, and group sparse lasso penalties. All aforementioned penalties can also be augmented with a ridge penalty. library(gglasso) library(grpreg) library(grplasso) # compute the full solution path, n > p set.seed(123) n <- 10000 p <- 200 m <- 25 b <- matrix(c(runif(m, -0.5, 0.5), rep(0, p - m))) x <- matrix(rnorm(n * p, sd = 3), n, p) y <- drop(x %*% b) + rnorm(n) groups <- rep(1:floor(p/10), each = 10) grp.lam <- oem(x, y, penalty = "grp.lasso", groups = groups, nlambda = 100, tol = 1e-10)$lambda[]

microbenchmark(
"gglasso[grp.lasso]" = {res1 <- gglasso(x, y, group = groups,
lambda = grp.lam,
intercept = FALSE,
eps = 1e-8)},
"oem[grp.lasso]"    = {res2 <- oem(x, y,
groups = groups,
intercept = FALSE,
standardize = FALSE,
penalty = "grp.lasso",
lambda = grp.lam,
tol = 1e-10)},
"grplasso[grp.lasso]"    = {res3 <- grplasso(x=x, y=y,
index = groups,
standardize = FALSE,
center = FALSE, model = LinReg(),
lambda = grp.lam * n * 2,
control = grpl.control(trace = 0, tol = 1e-10))},
"grpreg[grp.lasso]"    = {res4 <- grpreg(x, y, group = groups,
eps = 1e-10, lambda = grp.lam)},
times = 5
)
## Unit: milliseconds
##                 expr       min        lq       mean    median         uq
##   gglasso[grp.lasso] 4213.0698 4235.5627  4464.0790 4427.5890  4546.2656
##       oem[grp.lasso]  122.5265  151.8338   155.6852  164.9955   165.7759
##  grplasso[grp.lasso] 9010.8600 9224.9403 10376.0763 9640.6276 10956.3353
##    grpreg[grp.lasso] 2436.6968 2542.1362  2693.5194 2700.2787  2851.1327
##         max neval
##   4897.9079     5
##    173.2944     5
##  13047.6184     5
##   2937.3528     5
diffs <- array(NA, dim = c(2, 1))
colnames(diffs) <- "abs diff"
rownames(diffs) <- c("oem and gglasso", "oem and grplasso")
diffs[,1] <- c(  max(abs(res2$beta[][-1,] - res1$beta)), max(abs(res2$beta[][-1,] - res3$coefficients))  )
diffs
##                      abs diff
## oem and gglasso  1.303906e-06
## oem and grplasso 1.645871e-08

#### Bigger Group Lasso Example

set.seed(123)
n <- 500000
p <- 200
m <- 25
b <- matrix(c(runif(m, -0.5, 0.5), rep(0, p - m)))
x <- matrix(rnorm(n * p, sd = 3), n, p)
y <- drop(x %*% b) + rnorm(n)
groups <- rep(1:floor(p/10), each = 10)

# fit all group penalties at once
grp.penalties <- c("grp.lasso", "grp.mcp", "grp.scad",
"sparse.group.lasso")
system.time(res <- oem(x, y,
penalty = grp.penalties,
groups  = groups,
alpha   = 0.25, # mixing param for l2 penalties
tau     = 0.5)) # mixing param for sparse grp lasso 
##    user  system elapsed
##    4.03    0.23    4.57

### Fitting Multiple Penalties

The oem algorithm is quite efficient at fitting multiple penalties simultaneously when p is not too big.

set.seed(123)
n <- 100000
p <- 100
m <- 15
b <- matrix(c(runif(m, -0.25, 0.25), rep(0, p - m)))
x <- matrix(rnorm(n * p, sd = 3), n, p)
y <- drop(x %*% b) + rnorm(n)

microbenchmark(
"oem[lasso]"    = {res1 <- oem(x, y,
penalty = "elastic.net",
intercept = TRUE,
standardize = TRUE,
tol = 1e-10)},
"oem[lasso/mcp/scad/ols]"    = {res2 <- oem(x, y,
penalty = c("elastic.net", "mcp",
"grp.mcp", "sparse.grp.lasso",
"grp.mcp.net", "mcp.net"),
gamma = 4,
tau = 0.5,
alpha = 0.25,
groups = rep(1:10, each = 10),
intercept = TRUE,
standardize = TRUE,
tol = 1e-10)},
times = 5
)
## Unit: milliseconds
##                     expr      min       lq     mean   median       uq
##               oem[lasso] 263.5128 266.2448 270.2931 266.6826 268.7175
##  oem[lasso/mcp/scad/ols] 319.3591 323.0294 326.5360 326.9471 330.2104
##       max neval
##  286.3077     5
##  333.1339     5
#png("../mcp_path.png", width = 3000, height = 3000, res = 400);par(mar=c(5.1,5.1,4.1,2.1));plot(res2, which.model = 2, main = "mcp",lwd = 3,cex.axis=2.0, cex.lab=2.0, cex.main=2.0, cex.sub=2.0);dev.off()
#

layout(matrix(1:4, ncol=2, byrow = TRUE))
plot(res2, which.model = 1, lwd = 2,
xvar = "lambda")
plot(res2, which.model = 2, lwd = 2,
xvar = "lambda")
plot(res2, which.model = 4, lwd = 2,
xvar = "lambda")
plot(res2, which.model = 7, lwd = 2,
xvar = "lambda") 