Use the below codes to solve the questions: I am expecting a R file and a word document
if (!require(mlba)) {
library(devtools)
install_github(“gedeck/mlba/mlba”, force=TRUE)
}
options(scipen=999)
# Logistic Regression
## The Logistic Regression Model
library(ggplot2)
library(gridExtra)
p <- seq(0.005, 0.995, 0.01)
df <- data.frame(
p = p,
odds = p / (1 - p),
logit = log(p / (1 - p))
)
g1 <- ggplot(df, aes(x=p, y=odds)) +
geom_line() + coord_cartesian(xlim=c(0, 1), ylim=c(0, 100)) +
labs(x='Probability of success', y='Odds', title='(a)') +
geom_hline(yintercept = 0) +
theme_bw() +
theme(axis.line = element_line(colour = "black"),
axis.line.x = element_blank(),
panel.border = element_blank())
g2 <- ggplot(df, aes(x=p, y=logit)) +
geom_line() + coord_cartesian(xlim=c(0, 1), ylim=c(-4, 4)) +
labs(x='Probability of success', y='Logit', title='(b)' ) +
geom_hline(yintercept = 0) +
theme_bw() +
theme(axis.line = element_line(colour = "black"),
axis.line.x = element_blank(),
panel.border = element_blank())
grid.arrange(g1, g2, ncol=2)
## Example: Acceptance of Personal Loan
### Model with a Single Predictor
bank.df <- mlba::UniversalBank
g <- ggplot(bank.df, aes(x=Income, y=Personal.Loan)) +
geom_jitter(width=0, height=0.01, alpha=0.1) +
geom_function(fun=function(x){ return (1 / (1 + exp(6.04892 - 0.036*x)))}) +
xlim(0, 250) +
labs(x='Income (in $000s)') +
theme_bw()
g
# Z Obtain coefficients for Personal.Loan ~ Income
glm.model.income <- glm(Personal.Loan ~ Income, data = bank.df, family = binomial)
glm.model.income
###############################################
### Estimating the Logistic Model from Data: Computing Parameter Estimates
#### Estimated Model
library(caret)
library(tidyverse)
# load and preprocess data
bank.df <- mlba::UniversalBank %>%
select(-c(ID, ZIP.Code)) %>% # Drop ID and zip code columns.
mutate(
Education = factor(Education, levels=c(1:3),
labels=c(“Undergrad”, “Graduate”, “Advanced/Professional”)),
Personal.Loan = factor(Personal.Loan, levels=c(0, 1),
labels=c(“No”, “Yes”))
)
# partition data
set.seed(2)
idx <- caret::createDataPartition(bank.df$Personal.Loan, p=0.6, list=FALSE)
train.df <- bank.df[idx, ]
holdout.df <- bank.df[-idx, ]
# build model
trControl <- caret::trainControl(method="cv", number=5, allowParallel=TRUE)
logit.reg <- caret::train(Personal.Loan ~ ., data=train.df, trControl=trControl,
# fit logistic regression with a generalized linear model
method="glm", family="binomial")
logit.reg
summary(logit.reg$finalModel)
## Evaluating Classification Performance
### Interpreting Results in Terms of Odds (for a Profiling Goal)
# use predict() with type = "response" to compute predicted probabilities.
logit.reg.pred <- predict(logit.reg, holdout.df[, -8], type = "prob")
str(holdout.df)
# display four different cases
interestingCases = c(1, 12, 32, 1333)
results=data.frame(
actual = holdout.df$Personal.Loan[interestingCases],
p0 = logit.reg.pred[interestingCases, 1],
p1 = logit.reg.pred[interestingCases, 2],
predicted = ifelse(logit.reg.pred[interestingCases, 2] > 0.5, 1, 0)
)
# Z evalulate performance ##################################################################
# predict training set
logit.reg.pred.train <- predict(logit.reg, train.df[, -8], type = "prob")
predicted.train <- factor(ifelse(logit.reg.pred.train[, 2] > 0.5, 1, 0), levels = c(0, 1), labels = c(“No”, “Yes”))
# accuracy train
confusionMatrix(train.df$Personal.Loan ,predicted.train)
# predict holdout
predicted.holdout <- factor(ifelse(logit.reg.pred[, 2] > 0.5, 1, 0), levels = c(0, 1), labels = c(“No”, “Yes”))
# accuracy holdout
confusionMatrix(holdout.df$Personal.Loan ,predicted.holdout)
#########################################################################################
library(gains)
actual <- ifelse(holdout.df$Personal.Loan == "Yes", 1, 0)
# Z comments#########################################################################################
# when you specify the groups argument below, it typically represents the number of distinct or unique values
#in your predicted probabilities that you want to use for creating the cumulative gains chart.
n_distinct(logit.reg.pred[,2])
#######################################################################################################
gain <- gains(actual, logit.reg.pred[,2], groups=length(actual)-2)
# plot gains chart
nactual <-sum(actual)
g1 <- ggplot() +
geom_line(aes(x=gain$cume.obs, y=gain$cume.pct.of.total * nactual)) +
geom_line(aes(x=c(0, max(gain$cume.obs)), y=c(0, nactual)), color="darkgrey") +
labs(x="# Cases", y="Cumulative")
# plot decile-wise lift chart
gain10 <- gains(actual, logit.reg.pred[,2], groups=10)
g2 <- ggplot(mapping=aes(x=gain10$depth, y=gain10$lift / 100)) +
geom_col(fill="steelblue") +
geom_text(aes(label=round(gain10$lift / 100, 1)), vjust=-0.2, size=3) +
ylim(0, 8) + labs(x="Percentile", y="Lift")
grid.arrange(g1, g2, ncol=2)
### Z residual plot
# Fit a logistic regression model
model <- glm(Personal.Loan ~., data = bank.df, family = binomial(link = "logit"))
summary(model)
residuals = residuals(model, type = "response")
# Create a residual plot
plot(model$fitted.values, residuals, ylab = "Residuals", xlab = "Fitted Values", main = "Logistic Regression Residuals vs. Fitted")
abline(h = 0, col = "red", lty = 2) # Add a horizontal line at y = 0 for reference
# Calculate predicted probabilities same as model fitted value
predicted_probs <- predict(model, type = "response")
predicted_probs[1:5]
model$fitted.values[1:5]
# Define bin intervals (adjust bins and breaks as needed)
bins <- cut(predicted_probs, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1.0), labels = FALSE)
# Calculate binned residuals
binned_residuals <- data.frame(
PredictedProb = predicted_probs,
ActualOutcome = ifelse(bank.df$Personal.Loan == "Yes", 1, 0), # Replace with the actual outcome variable
Bin = bins
)
# Example: Calculate mean residuals for each bin
library(dplyr)
binned_residuals_summary <- binned_residuals %>%
group_by(Bin) %>%
summarize(MeanResidual = mean(ActualOutcome – PredictedProb))
# Visualize mean residuals
barplot(binned_residuals_summary$MeanResidual, names.arg = binned_residuals_summary$Bin,
xlab = “Bin”, ylab = “Mean Residual”, main = “Mean Residuals by Bin”)
# Load the ggplot2 package if not already loaded
library(ggplot2)
# Assuming you have calculated binned residuals as described earlier
# Create a scatter plot of binned residuals
binned_residuals_summary %>% ggplot(aes(x = Bin, y = MeanResidual)) +
geom_col() +
labs(x = “Predicted Probabilities”, y = “Binned Residuals”) +
ggtitle(“Scatter Plot of Binned Residuals”) +
theme_minimal()
library(arm)
# confidence bands
binnedplot(predicted_probs,residuals)
STIMULATION:
library(ggplot2)
library(reshape)
library(glmnet)
rm(list=ls())
set.seed(1)
x = rnorm(1000, sd=3) # A random variable
hist(x)
summary(x)
p = 1/(1+exp(-(1+10*x)))
hist(p)
plot(x,p)
y = rbinom(1000,1,p) # bernoulli response variable
plot(x,y)
# two approaches to set outcome categories
# 1 cutoff
#y = ifelse(p>=0.5,1,0)
# bernoulli response
#set.seed(1)
#prob <- c(0.3, 0.5, 0.7, 0.4, 0.6)
#result <- rbinom(5, 1, prob)
#prob
#result
data.plot = data.frame(y,p,x)
# plot probability, category
data.plot %>% ggplot(aes(x)) +
geom_line(aes(y=p)) +
geom_point(aes(y=y,color=factor(y)))+
theme_bw()
# plot category
data.plot %>% ggplot(aes(x,y,color=y)) +
geom_point() +
theme_bw()
# plot linear regression
data.plot %>% ggplot(aes(x,y,color=y)) +
geom_point() +
geom_smooth(method=’lm’,se=FALSE) +
theme_bw()
# plot glm
data.plot %>% ggplot(aes(x,y,color=y)) +
geom_point() +
geom_smooth(method=’glm’,
method.args = list(family = “binomial”),
se=FALSE) +
theme_bw()
# get coefficients through linear regression
logit = log(p/(1-p))
plot(x,logit)
data = data.frame(cbind(logit,x))
summary(data)
# remove infinite in all columns:
data = data %>%
filter_all(all_vars(!is.infinite(.)))
summary(data)
logit.model = lm(logit~x,data)
logit.model
# get coefficients through logistic regression via glm
data.glm = t(rbind(as.numeric(y),as.numeric(x)))
data.glm = data.frame(data.glm)
names(data.glm) = c(“y”,”x”)
#data.glm = data.glm %>% rename(y=X1,x=X2)
data.glm$y = as.factor(data.glm$y)
summary(data.glm)
plot(data.glm$x,data.glm$y)
set.seed(1)
glm.model <- glm(y ~ x, data = data.glm, family = binomial)
glm.model
Place this order or similar order and get an amazing discount. USE Discount code “GET20” for 20% discount