Cross Validation for the vennLasso

cv.vennLasso(
  x,
  y,
  groups,
  lambda = NULL,
  compute.se = FALSE,
  conf.int = NULL,
  type.measure = c("mse", "deviance", "class", "auc", "mae", "brier"),
  nfolds = 10,
  foldid,
  grouped = TRUE,
  keep = FALSE,
  parallel = FALSE,
  ...
)

Arguments

x	input matrix or SparseMatrix of dimension nobs x nvars. Each row is an observation, each column corresponds to a covariate
y	numeric response vector of length nobs
groups	A list of length equal to the number of groups containing vectors of integers indicating the variable IDs for each group. For example, groups=list(c(1,2), c(2,3), c(3,4,5)) specifies that Group 1 contains variables 1 and 2, Group 2 contains variables 2 and 3, and Group 3 contains variables 3, 4, and 5. Can also be a matrix of 0s and 1s with the number of columns equal to the number of groups and the number of rows equal to the number of variables. A value of 1 in row i and column j indicates that variable i is in group j and 0 indicates that variable i is not in group j.
lambda	A user-specified sequence of lambda values. Left unspecified, the a sequence of lambda values is automatically computed, ranging uniformly on the log scale over the relevant range of lambda values.
compute.se	logical flag. If `TRUE`, standard errors will be computed, otherwise if `FALSE` they will not
conf.int	value between 0 and 1 indicating the level of the confidence intervals to be computed. For example if `conf.int = 0.95`, 95 percent confidence intervals will be computed.
type.measure	One of `c("mse","deviance","class","auc","mae","brier")` indicating measure to evaluate for cross-validation. The default is `type.measure = "deviance"`, which uses squared-error for gaussian models (a.k.a `type.measure = "mse"` there), deviance for logistic regression. `type.measure = "class"` applies to binomial only. `type.measure = "auc"` is for two-class logistic regression only. `type.measure = "mse"` or `type.measure = "mae"` (mean absolute error) can be used by all models; they measure the deviation from the fitted mean to the response. `type.measure = "brier"` is for models with `family = "coxph"` and will compute the Brier score.
nfolds	number of folds for cross-validation. default is 10. 3 is smallest value allowed.
foldid	an optional vector of values between 1 and nfold specifying which fold each observation belongs to.
grouped	Like in glmnet, this is an experimental argument, with default `TRUE`, and can be ignored by most users. For all models, this refers to computing nfolds separate statistics, and then using their mean and estimated standard error to describe the CV curve. If `grouped = FALSE`, an error matrix is built up at the observation level from the predictions from the `nfold` fits, and then summarized (does not apply to `type.measure = "auc"`).
keep	If `keep = TRUE`, a prevalidated list of arrasy is returned containing fitted values for each observation and each value of lambda for each model. This means these fits are computed with this observation and the rest of its fold omitted. The folid vector is also returned. Default is `keep = FALSE`
parallel	If TRUE, use parallel foreach to fit each fold. Must register parallel before hand, such as doMC.
...	parameters to be passed to vennLasso

Value

An object with S3 class "cv.vennLasso"

Examples


library(Matrix)

set.seed(123)
n.obs <- 150
n.vars <- 25

true.beta.mat <- array(NA, dim = c(3, n.vars))
true.beta.mat[1,] <- c(-0.5, -1, 0, 0, 2, rep(0, n.vars - 5))
true.beta.mat[2,] <- c(0.5, 0.5, -0.5, -0.5, 1, -1, rep(0, n.vars - 6))
true.beta.mat[3,] <- c(0, 0, 1, 1, -1, rep(0, n.vars - 5))
rownames(true.beta.mat) <- c("1,0", "1,1", "0,1")
true.beta <- as.vector(t(true.beta.mat))

x.sub1 <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
x.sub2 <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
x.sub3 <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)

x <- as.matrix(rbind(x.sub1, x.sub2, x.sub3))

conditions <- as.matrix(cbind(c(rep(1, 2 * n.obs), rep(0, n.obs)),
                              c(rep(0, n.obs), rep(1, 2 * n.obs))))

y <- rnorm(n.obs * 3, sd = 3) + drop(as.matrix(bdiag(x.sub1, x.sub2, x.sub3)) %*% true.beta)

fit <- cv.vennLasso(x = x, y = y, groups = conditions, nfolds = 3)

fitted.coef <- predict(fit$vennLasso.fit, type = "coefficients", s = fit$lambda.min)
(true.coef <- true.beta.mat[match(dimnames(fit$vennLasso.fit$beta)[[1]],
                                  rownames(true.beta.mat)),])
#>     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#> 0,1  0.0  0.0  1.0  1.0   -1    0    0    0    0     0     0     0     0     0
#> 1,0 -0.5 -1.0  0.0  0.0    2    0    0    0    0     0     0     0     0     0
#> 1,1  0.5  0.5 -0.5 -0.5    1   -1    0    0    0     0     0     0     0     0
#>     [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
#> 0,1     0     0     0     0     0     0     0     0     0     0     0
#> 1,0     0     0     0     0     0     0     0     0     0     0     0
#> 1,1     0     0     0     0     0     0     0     0     0     0     0
round(fitted.coef, 2)
#> , , 1
#> 
#>     (Intercept)    V1    V2    V3    V4    V5    V6 V7 V8 V9   V10   V11 V12
#> 0,1       -0.26  0.00  0.00  0.54  0.54 -0.46  0.00  0  0  0 -0.13  0.03   0
#> 1,0        0.12 -0.29 -0.74  0.00  0.00  1.53  0.09  0  0  0  0.07  0.00   0
#> 1,1        0.07  0.41  0.67 -0.28 -0.42  1.04 -0.92  0  0  0  0.02 -0.01   0
#>     V13   V14  V15  V16 V17 V18   V19 V20 V21   V22 V23 V24   V25
#> 0,1   0  0.02 0.00 0.00   0   0  0.00   0   0 -0.04   0   0  0.09
#> 1,0   0 -0.07 0.00 0.00   0   0  0.07   0   0 -0.04   0   0 -0.33
#> 1,1   0  0.38 0.01 0.13   0   0 -0.01   0   0 -0.35   0   0  0.04
#> 

## effects smaller for logistic regression
if (FALSE) {
true.beta.mat <- true.beta.mat / 2
true.beta <- true.beta / 2
# logistic regression example#'
y <- rbinom(n.obs * 3, 1,
       prob = 1 / (1 + exp(-drop(as.matrix(bdiag(x.sub1, x.sub2, x.sub3)) %*% true.beta))))

bfit <- cv.vennLasso(x = x, y = y, groups = conditions, family = "binomial",
                     nfolds = 3)

fitted.coef <- predict(bfit$vennLasso.fit, type = "coefficients", s = bfit$lambda.min)
(true.coef <- true.beta.mat[match(dimnames(bfit$vennLasso.fit$beta)[[1]],
                                  rownames(true.beta.mat)),])
round(fitted.coef, 2)
}