Fitting vennLasso models
vennLasso( x, y, groups, family = c("gaussian", "binomial"), nlambda = 100L, lambda = NULL, lambda.min.ratio = NULL, lambda.fused = NULL, penalty.factor = NULL, group.weights = NULL, adaptive.lasso = FALSE, adaptive.fused = FALSE, gamma = 1, standardize = FALSE, intercept = TRUE, one.intercept = FALSE, compute.se = FALSE, conf.int = NULL, rho = NULL, dynamic.rho = TRUE, maxit = 500L, abs.tol = 1e-05, rel.tol = 1e-05, irls.tol = 1e-05, irls.maxit = 100L, model.matrix = FALSE, ... )
x | input matrix of dimension nobs by nvars. Each row is an observation, each column corresponds to a covariate |
---|---|
y | numeric response vector of length nobs |
groups | A list of length equal to the number of groups containing vectors of integers
indicating the variable IDs for each group. For example, |
family |
|
nlambda | The number of lambda values. Default is 100. |
lambda | A user-specified sequence of lambda values. Left unspecified, the a sequence of lambda values is automatically computed, ranging uniformly on the log scale over the relevant range of lambda values. |
lambda.min.ratio | Smallest value for lambda, as a fraction of |
lambda.fused | tuning parameter for fused lasso penalty |
penalty.factor | vector of weights to be multiplied to the tuning parameter for the group lasso penalty. A vector of length equal to the number of groups |
group.weights | A vector of values representing multiplicative factors by which each group's penalty is to be multiplied. Often, this is a function (such as the square root) of the number of predictors in each group. The default is to use the square root of group size for the group selection methods. |
adaptive.lasso | Flag indicating whether or not to use adaptive lasso weights. If set to |
adaptive.fused | Flag indicating whether or not to use adaptive fused lasso weights. |
gamma | power to raise the MLE estimated weights by for the adaptive lasso. defaults to 1 |
standardize | Should the data be standardized? Defaults to |
intercept | Should an intercept be fit? Defaults to |
one.intercept | Should a single intercept be fit for all subpopulations instead of one
for each subpopulation? Defaults to |
compute.se | Should standard errors be computed? If |
conf.int | level for confidence intervals. Defaults to |
rho | ADMM parameter. must be a strictly positive value. By default, an appropriate value is automatically chosen |
dynamic.rho |
|
maxit | integer. Maximum number of ADMM iterations. Default is 500. |
abs.tol | absolute convergence tolerance for ADMM iterations for the relative dual and primal residuals.
Default is |
rel.tol | relative convergence tolerance for ADMM iterations for the relative dual and primal residuals.
Default is |
irls.tol | convergence tolerance for IRLS iterations. Only used if |
irls.maxit | integer. Maximum number of IRLS iterations. Only used if |
model.matrix | logical flag. Should the design matrix used be returned? |
... | not used |
An object with S3 class "vennLasso"
library(Matrix) # first simulate heterogeneous data using # genHierSparseData set.seed(123) dat.sim <- genHierSparseData(ncats = 2, nvars = 25, nobs = 200, hier.sparsity.param = 0.5, prop.zero.vars = 0.5, family = "gaussian") x <- dat.sim$x conditions <- dat.sim$group.ind y <- dat.sim$y true.beta.mat <- dat.sim$beta.mat fit <- vennLasso(x = x, y = y, groups = conditions) (true.coef <- true.beta.mat[match(dimnames(fit$beta)[[1]], rownames(true.beta.mat)),])#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] #> 0,0 0 0 0 -0.3132412 0.0000000 -0.4185941 0.2619159 0.4252133 0 #> 0,1 0 0 0 0.0000000 -0.2818829 0.4383270 0.0000000 0.0000000 0 #> 1,0 0 0 0 0.0000000 -0.3544117 0.4470490 0.0000000 0.0000000 0 #> 1,1 0 0 0 0.0000000 -0.3802839 -0.4149596 0.0000000 0.4465704 0 #> [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] #> 0,0 0.352236 0.0000000 0.4797143 0 0 0 0 0 0 0 #> 0,1 0.000000 -0.3459924 -0.3185959 0 0 0 0 0 0 0 #> 1,0 0.000000 0.0000000 -0.2937632 0 0 0 0 0 0 0 #> 1,1 0.000000 -0.3279256 -0.3523687 0 0 0 0 0 0 0 #> [,20] [,21] [,22] [,23] [,24] [,25] #> 0,0 0 0 0 0 0 0 #> 0,1 0 0 0 0 0 0 #> 1,0 0 0 0 0 0 0 #> 1,1 0 0 0 0 0 0#> (Intercept) V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 #> 0,0 -0.03 0 0 0 -0.19 0.00 -0.34 0.1 0.34 0 0.28 0.03 0.45 0 0 #> 0,1 -0.08 0 0 0 0.00 -0.15 0.18 0.0 0.00 0 0.00 -0.20 -0.16 0 0 #> 1,0 0.02 0 0 0 0.00 -0.19 0.28 0.0 0.00 0 0.00 0.00 -0.22 0 0 #> 1,1 0.08 0 0 0 0.00 -0.27 -0.34 0.0 0.31 0 0.00 -0.21 -0.20 0 0 #> V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 #> 0,0 0 0 0 -0.02 0 0 0 0 0.03 0 0 #> 0,1 0 0 0 0.00 0 0 0 0 0.00 0 0 #> 1,0 0 0 0 0.00 0 0 0 0 0.00 0 0 #> 1,1 0 0 0 0.00 0 0 0 0 0.00 0 0## fit adaptive version and compute confidence intervals afit <- vennLasso(x = x, y = y, groups = conditions, conf.int = 0.95, adaptive.lasso = TRUE) (true.coef <- true.beta.mat[match(dimnames(fit$beta)[[1]], rownames(true.beta.mat)),])[,1:10]#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] #> 0,0 0 0 0 -0.3132412 0.0000000 -0.4185941 0.2619159 0.4252133 0 #> 0,1 0 0 0 0.0000000 -0.2818829 0.4383270 0.0000000 0.0000000 0 #> 1,0 0 0 0 0.0000000 -0.3544117 0.4470490 0.0000000 0.0000000 0 #> 1,1 0 0 0 0.0000000 -0.3802839 -0.4149596 0.0000000 0.4465704 0 #> [,10] #> 0,0 0.352236 #> 0,1 0.000000 #> 1,0 0.000000 #> 1,1 0.000000#> (Intercept) V1 V2 V3 V4 V5 V6 V7 V8 V9 #> 0,0 -0.03 0 0 0 -0.12 0.00 -0.33 0 0.32 0 #> 0,1 -0.07 0 0 0 0.00 -0.13 0.13 0 0.00 0 #> 1,0 0.02 0 0 0 0.00 -0.16 0.29 0 0.00 0 #> 1,1 0.08 0 0 0 0.00 -0.27 -0.36 0 0.29 0#> (Intercept) V1 V2 V3 V4 V5 V6 V7 V8 V9 #> 0,0 -0.17 0 0 0 -0.25 0.00 -0.47 0 0.17 0 #> 0,1 -0.21 0 0 0 0.00 -0.26 -0.01 0 0.00 0 #> 1,0 -0.12 0 0 0 0.00 -0.30 0.16 0 0.00 0 #> 1,1 -0.06 0 0 0 0.00 -0.41 -0.49 0 0.15 0#> (Intercept) V1 V2 V3 V4 V5 V6 V7 V8 V9 #> 0,0 0.10 0 0 0 0.01 0.00 -0.19 0 0.46 0 #> 0,1 0.06 0 0 0 0.00 0.01 0.27 0 0.00 0 #> 1,0 0.16 0 0 0 0.00 -0.02 0.43 0 0.00 0 #> 1,1 0.22 0 0 0 0.00 -0.14 -0.24 0 0.43 0aic.idx <- which.min(afit$aic) bic.idx <- which.min(afit$bic) # actual coverage # actual coverage mean(true.coef[afit$beta[,-1,aic.idx] != 0] >= afit$lower.ci[,-1,aic.idx][afit$beta[,-1,aic.idx] != 0] & true.coef[afit$beta[,-1,aic.idx] != 0] <= afit$upper.ci[,-1,aic.idx][afit$beta[,-1,aic.idx] != 0])#> [1] 0.8888889(covered <- true.coef >= afit$lower.ci[,-1,aic.idx] & true.coef <= afit$upper.ci[,-1,aic.idx])#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] #> 0,0 TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE #> 0,1 TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE #> 1,0 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE #> 1,1 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE #> [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] #> 0,0 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE #> 0,1 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE #> 1,0 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE #> 1,1 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUEmean(covered)#> [1] 0.98# logistic regression example if (FALSE) { set.seed(123) dat.sim <- genHierSparseData(ncats = 2, nvars = 25, nobs = 200, hier.sparsity.param = 0.5, prop.zero.vars = 0.5, family = "binomial", effect.size.max = 0.5) # don't make any # coefficients too big x <- dat.sim$x conditions <- dat.sim$group.ind y <- dat.sim$y true.beta.b <- dat.sim$beta.mat bfit <- vennLasso(x = x, y = y, groups = conditions, family = "binomial") (true.coef.b <- -true.beta.b[match(dimnames(fit$beta)[[1]], rownames(true.beta.b)),]) round(bfit$beta[,,20], 2) }