Cross Validation for the vennLasso

cv.vennLasso(x, y, groups, lambda = NULL, compute.se = FALSE,
conf.int = NULL, type.measure = c("mse", "deviance", "class", "auc",
"mae", "brier"), nfolds = 10, foldid, grouped = TRUE, keep = FALSE,
parallel = FALSE, ...)

## Arguments

x input matrix or SparseMatrix of dimension nobs x nvars. Each row is an observation, each column corresponds to a covariate numeric response vector of length nobs A list of length equal to the number of groups containing vectors of integers indicating the variable IDs for each group. For example, groups=list(c(1,2), c(2,3), c(3,4,5)) specifies that Group 1 contains variables 1 and 2, Group 2 contains variables 2 and 3, and Group 3 contains variables 3, 4, and 5. Can also be a matrix of 0s and 1s with the number of columns equal to the number of groups and the number of rows equal to the number of variables. A value of 1 in row i and column j indicates that variable i is in group j and 0 indicates that variable i is not in group j. A user-specified sequence of lambda values. Left unspecified, the a sequence of lambda values is automatically computed, ranging uniformly on the log scale over the relevant range of lambda values. logical flag. If TRUE, standard errors will be computed, otherwise if FALSE they will not value between 0 and 1 indicating the level of the confidence intervals to be computed. For example if conf.int = 0.95, 95 percent confidence intervals will be computed. One of c("mse","deviance","class","auc","mae","brier") indicating measure to evaluate for cross-validation. The default is type.measure = "deviance", which uses squared-error for gaussian models (a.k.a type.measure = "mse" there), deviance for logistic regression. type.measure = "class" applies to binomial only. type.measure = "auc" is for two-class logistic regression only. type.measure = "mse" or type.measure = "mae" (mean absolute error) can be used by all models; they measure the deviation from the fitted mean to the response. type.measure = "brier" is for models with family = "coxph" and will compute the Brier score. number of folds for cross-validation. default is 10. 3 is smallest value allowed. an optional vector of values between 1 and nfold specifying which fold each observation belongs to. Like in glmnet, this is an experimental argument, with default TRUE, and can be ignored by most users. For all models, this refers to computing nfolds separate statistics, and then using their mean and estimated standard error to describe the CV curve. If grouped = FALSE, an error matrix is built up at the observation level from the predictions from the nfold fits, and then summarized (does not apply to type.measure = "auc"). If keep = TRUE, a prevalidated list of arrasy is returned containing fitted values for each observation and each value of lambda for each model. This means these fits are computed with this observation and the rest of its fold omitted. The folid vector is also returned. Default is keep = FALSE If TRUE, use parallel foreach to fit each fold. Must register parallel before hand, such as doMC. parameters to be passed to vennLasso

## Value

An object with S3 class "cv.vennLasso"

## Examples


library(Matrix)

set.seed(123)
n.obs <- 150
n.vars <- 25

true.beta.mat <- array(NA, dim = c(3, n.vars))
true.beta.mat[1,] <- c(-0.5, -1, 0, 0, 2, rep(0, n.vars - 5))
true.beta.mat[2,] <- c(0.5, 0.5, -0.5, -0.5, 1, -1, rep(0, n.vars - 6))
true.beta.mat[3,] <- c(0, 0, 1, 1, -1, rep(0, n.vars - 5))
rownames(true.beta.mat) <- c("1,0", "1,1", "0,1")
true.beta <- as.vector(t(true.beta.mat))

x.sub1 <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
x.sub2 <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
x.sub3 <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)

x <- as.matrix(rbind(x.sub1, x.sub2, x.sub3))

conditions <- as.matrix(cbind(c(rep(1, 2 * n.obs), rep(0, n.obs)),
c(rep(0, n.obs), rep(1, 2 * n.obs))))

y <- rnorm(n.obs * 3, sd = 3) + drop(as.matrix(bdiag(x.sub1, x.sub2, x.sub3)) %*% true.beta)

fit <- cv.vennLasso(x = x, y = y, groups = conditions, nfolds = 3)

fitted.coef <- predict(fit$vennLasso.fit, type = "coefficients", s = fit$lambda.min)
(true.coef <- true.beta.mat[match(dimnames(fit$vennLasso.fit$beta)[[1]],
rownames(true.beta.mat)),])#>     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#> 0,1  0.0  0.0  1.0  1.0   -1    0    0    0    0     0     0     0     0     0
#> 1,0 -0.5 -1.0  0.0  0.0    2    0    0    0    0     0     0     0     0     0
#> 1,1  0.5  0.5 -0.5 -0.5    1   -1    0    0    0     0     0     0     0     0
#>     [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
#> 0,1     0     0     0     0     0     0     0     0     0     0     0
#> 1,0     0     0     0     0     0     0     0     0     0     0     0
#> 1,1     0     0     0     0     0     0     0     0     0     0     0round(fitted.coef, 2)#> , , 1
#>
#>     (Intercept)    V1    V2    V3    V4    V5    V6 V7 V8 V9   V10   V11 V12
#> 0,1       -0.26  0.00  0.00  0.54  0.54 -0.46  0.00  0  0  0 -0.13  0.03   0
#> 1,0        0.12 -0.29 -0.74  0.00  0.00  1.53  0.09  0  0  0  0.07  0.00   0
#> 1,1        0.07  0.41  0.67 -0.28 -0.42  1.04 -0.92  0  0  0  0.02 -0.01   0
#>     V13   V14  V15  V16 V17 V18   V19 V20 V21   V22 V23 V24   V25
#> 0,1   0  0.02 0.00 0.00   0   0  0.00   0   0 -0.04   0   0  0.09
#> 1,0   0 -0.07 0.00 0.00   0   0  0.07   0   0 -0.04   0   0 -0.33
#> 1,1   0  0.38 0.01 0.13   0   0 -0.01   0   0 -0.35   0   0  0.04
#>
## effects smaller for logistic regression
true.beta.mat <- true.beta.mat / 2
true.beta <- true.beta / 2
# logistic regression example#'
y <- rbinom(n.obs * 3, 1,
prob = 1 / (1 + exp(-drop(as.matrix(bdiag(x.sub1, x.sub2, x.sub3)) %*% true.beta))))

bfit <- cv.vennLasso(x = x, y = y, groups = conditions, family = "binomial",
nfolds = 3)

fitted.coef <- predict(bfit$vennLasso.fit, type = "coefficients", s = bfit$lambda.min)
(true.coef <- true.beta.mat[match(dimnames(bfit$vennLasso.fit$beta)[[1]],
rownames(true.beta.mat)),])#>      [,1]  [,2]  [,3]  [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
#> 0,1  0.00  0.00  0.50  0.50 -0.5  0.0    0    0    0     0     0     0     0
#> 1,0 -0.25 -0.50  0.00  0.00  1.0  0.0    0    0    0     0     0     0     0
#> 1,1  0.25  0.25 -0.25 -0.25  0.5 -0.5    0    0    0     0     0     0     0
#>     [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
#> 0,1     0     0     0     0     0     0     0     0     0     0     0     0
#> 1,0     0     0     0     0     0     0     0     0     0     0     0     0
#> 1,1     0     0     0     0     0     0     0     0     0     0     0     0round(fitted.coef, 2)#> , , 1
#>
#>     (Intercept)    V1 V2    V3    V4    V5    V6    V7    V8    V9   V10   V11
#> 0,1       -0.14  0.00  0  0.31  0.41 -0.24 -0.10  0.00 -0.01  0.01 -0.01  0.03
#> 1,0       -0.15  0.00  0 -0.08  0.21  0.56  0.06  0.00  0.00  0.06  0.00 -0.03
#> 1,1        0.05 -0.05  0 -0.23 -0.26  0.74 -0.53 -0.14 -0.01 -0.09  0.00  0.00
#>     V12 V13 V14   V15 V16 V17 V18 V19  V20 V21   V22  V23   V24   V25
#> 0,1   0   0   0  0.00   0   0   0   0 0.00   0  0.00 0.10 -0.10  0.01
#> 1,0   0   0   0 -0.02   0   0   0   0 0.00   0 -0.03 0.00  0.00 -0.11
#> 1,1   0   0   0 -0.10   0   0   0   0 0.04   0  0.01 0.14  0.33  0.14
#>