## Show the code

```
suppressPackageStartupMessages({
library(foreach)
library(doParallel)
library(glmnet)
library(ggplot2)
library(dplyr)
})
theme_set(theme_minimal())
Logistic <- function(z) {
return(exp(z) / (1 + exp(z)))
}
SignSummary <- function(x) {
return(paste(ifelse(x == 0, '0', ifelse(x > 0, '+', '-')), collapse = ''))
}
HessianLogistic <- function(x, theta) {
z <- x %*% theta
psi.2nd <- exp(z) / (1 + exp(z))^2
return(-t(x) %*% (x * as.vector(psi.2nd)) / nrow(x))
}
HessianGaussian <- function(x) {
return(-t(x) %*% x / nrow(x))
}
GLMSim <- function(theta, lambda, n, nRep = 1000, family = 'gaussian') {
# GLMSim produces a plot of the true selection events by simulation, with
# approximation of the selection event based on Taylor expansion of the
# log-likelihood at the true theta (dahsed) and the MLE (dotted)
#
# Args:
# theta: true parameter
# lambda: penalty parameter
# n: sample size
# nRep: number of points to be included
# family: 'binomial' for logistic regression, 'gaussian' for linear
# regression
#
# Returns:
# A plot of the simulation
registerDoParallel(cores = 4)
# generates nRep possible sets of n observations
x.cov.rt <- rbind(c(1, -0.1), c(-0.1, 1))
x <- matrix(rnorm(n * 2), n, 2) %*% x.cov.rt
if (family == 'binomial') {
pr <- Logistic(x %*% theta)
y <- foreach(i = 1:nRep, .combine = cbind) %dopar% {
set.seed(i)
rbinom(n, size = 1, prob = pr)
}
} else if (family == 'gaussian') {
mu <- x %*% theta
y <- foreach(i = 1:nRep, .combine = cbind) %dopar% {
set.seed(i)
rnorm(n, mean = mu)
}
} else {
stop('family is not binomial or gaussian')
}
# runs glmnet to compute the selected variables at s = lambda
# computes mle as test statistic at s = 0
mle <- foreach(i = 1:nRep, .combine = rbind, .packages = "glmnet") %dopar% {
fit <- glmnet(x, y[, i], family = family, intercept = FALSE)
coeff <- as.matrix(
coef(
fit, s = c(0, lambda), exact = TRUE,
x = x, y = y[, i], family = family,
intercept = FALSE
)
)
selection <- SignSummary(coeff[-1, 2])
data.frame(
theta1 = coeff[2, 1], theta2 = coeff[3, 1],
selection = selection
)
}
mle$selection <- as.factor(mle$selection)
# computes the selection event based on the MLE at the first row
if (family == 'binomial') {
hessian <- HessianLogistic(x, as.numeric(mle[1, 1:2]))
} else if (family == 'gaussian') {
hessian <- HessianGaussian(x)
}
# computes the segments of the polyhedron for event '00'
zero.seg <- -t(solve(hessian, rbind(c(1, -1, -1, 1), c(1, 1, -1, -1)))) *
lambda
zero.seg <- cbind(zero.seg, zero.seg[c(2, 3, 4, 1), ])
zero.seg <- data.frame(zero.seg)
# adds segment representing the other events
zero.seg <- rbind(
zero.seg,
zero.seg %>% dplyr::mutate(X3 = X1, X4 = sign(X2) * 10),
zero.seg %>% dplyr::mutate(X4 = X2, X3 = sign(X1) * 10)
)
# computes the selection event based on true theta
if (family == 'binomial') {
true.hessian <- HessianLogistic(x, theta)
} else if (family == 'gaussian') {
true.hessian <- hessian
}
# computes the segments of the polyhedron for event '00'
true.zero.seg <- -t(solve(true.hessian, rbind(c(1, -1, -1, 1), c(1, 1, -1, -1)))) *
lambda
true.zero.seg <- cbind(true.zero.seg, true.zero.seg[c(2, 3, 4, 1), ])
true.zero.seg <- data.frame(true.zero.seg)
# adds segment representing the other events
true.zero.seg <- rbind(
true.zero.seg,
true.zero.seg %>%
dplyr::mutate(X3 = X1, X4 = sign(X2) * 10),
true.zero.seg %>%
dplyr::mutate(X4 = X2, X3 = sign(X1) * 10)
)
ggplot(mle) +
geom_point(aes(x = theta1, y = theta2, color = selection)) +
geom_point(x = mle[1, 1], y = mle[1, 2], shape = 4) +
geom_segment(
aes(x = X1, y = X2, xend = X3, yend = X4),
data = zero.seg, color = 'black'
) +
geom_segment(
aes(x = X1, y = X2, xend = X3, yend = X4),
data = true.zero.seg, color = 'black',
linetype = 'dashed'
) +
coord_cartesian(
xlim = c(min(mle$theta1), max(mle$theta1)),
ylim = c(min(mle$theta2), max(mle$theta2))
)
}
GLMSim(
theta = c(0.05, 0), lambda = 0.04, n = 100, nRep = 1000,
family = 'binomial'
)
```