Assingments % - Use the below codes to solve the questions: I am expecting a R file and a word d

Use the below codes to solve the questions: I am expecting a R file and a word document
if (!require(mlba)) {
library(devtools)
install_github(“gedeck/mlba/mlba”, force=TRUE)
}
options(scipen=999)
# Logistic Regression
## The Logistic Regression Model
library(ggplot2)
library(gridExtra)
p <- seq(0.005, 0.995, 0.01) df <- data.frame( p = p, odds = p / (1 - p), logit = log(p / (1 - p)) ) g1 <- ggplot(df, aes(x=p, y=odds)) + geom_line() + coord_cartesian(xlim=c(0, 1), ylim=c(0, 100)) + labs(x='Probability of success', y='Odds', title='(a)') + geom_hline(yintercept = 0) + theme_bw() + theme(axis.line = element_line(colour = "black"), axis.line.x = element_blank(), panel.border = element_blank()) g2 <- ggplot(df, aes(x=p, y=logit)) + geom_line() + coord_cartesian(xlim=c(0, 1), ylim=c(-4, 4)) + labs(x='Probability of success', y='Logit', title='(b)' ) + geom_hline(yintercept = 0) + theme_bw() + theme(axis.line = element_line(colour = "black"), axis.line.x = element_blank(), panel.border = element_blank()) grid.arrange(g1, g2, ncol=2) ## Example: Acceptance of Personal Loan ### Model with a Single Predictor bank.df <- mlba::UniversalBank g <- ggplot(bank.df, aes(x=Income, y=Personal.Loan)) + geom_jitter(width=0, height=0.01, alpha=0.1) + geom_function(fun=function(x){ return (1 / (1 + exp(6.04892 - 0.036*x)))}) + xlim(0, 250) + labs(x='Income (in $000s)') + theme_bw() g # Z Obtain coefficients for Personal.Loan ~ Income glm.model.income <- glm(Personal.Loan ~ Income, data = bank.df, family = binomial) glm.model.income ############################################### ### Estimating the Logistic Model from Data: Computing Parameter Estimates #### Estimated Model library(caret) library(tidyverse) # load and preprocess data bank.df <- mlba::UniversalBank %>%
select(-c(ID, ZIP.Code)) %>% # Drop ID and zip code columns.
mutate(
Education = factor(Education, levels=c(1:3),
labels=c(“Undergrad”, “Graduate”, “Advanced/Professional”)),
Personal.Loan = factor(Personal.Loan, levels=c(0, 1),
labels=c(“No”, “Yes”))
)
# partition data
set.seed(2)
idx <- caret::createDataPartition(bank.df$Personal.Loan, p=0.6, list=FALSE) train.df <- bank.df[idx, ] holdout.df <- bank.df[-idx, ] # build model trControl <- caret::trainControl(method="cv", number=5, allowParallel=TRUE) logit.reg <- caret::train(Personal.Loan ~ ., data=train.df, trControl=trControl, # fit logistic regression with a generalized linear model method="glm", family="binomial") logit.reg summary(logit.reg$finalModel) ## Evaluating Classification Performance ### Interpreting Results in Terms of Odds (for a Profiling Goal) # use predict() with type = "response" to compute predicted probabilities. logit.reg.pred <- predict(logit.reg, holdout.df[, -8], type = "prob") str(holdout.df) # display four different cases interestingCases = c(1, 12, 32, 1333) results=data.frame( actual = holdout.df$Personal.Loan[interestingCases], p0 = logit.reg.pred[interestingCases, 1], p1 = logit.reg.pred[interestingCases, 2], predicted = ifelse(logit.reg.pred[interestingCases, 2] > 0.5, 1, 0)
)
# Z evalulate performance ##################################################################
# predict training set
logit.reg.pred.train <- predict(logit.reg, train.df[, -8], type = "prob") predicted.train <- factor(ifelse(logit.reg.pred.train[, 2] > 0.5, 1, 0), levels = c(0, 1), labels = c(“No”, “Yes”))
# accuracy train
confusionMatrix(train.df$Personal.Loan ,predicted.train)
# predict holdout
predicted.holdout <- factor(ifelse(logit.reg.pred[, 2] > 0.5, 1, 0), levels = c(0, 1), labels = c(“No”, “Yes”))
# accuracy holdout
confusionMatrix(holdout.df$Personal.Loan ,predicted.holdout)
#########################################################################################
library(gains)
actual <- ifelse(holdout.df$Personal.Loan == "Yes", 1, 0) # Z comments######################################################################################### # when you specify the groups argument below, it typically represents the number of distinct or unique values #in your predicted probabilities that you want to use for creating the cumulative gains chart. n_distinct(logit.reg.pred[,2]) ####################################################################################################### gain <- gains(actual, logit.reg.pred[,2], groups=length(actual)-2) # plot gains chart nactual <-sum(actual) g1 <- ggplot() + geom_line(aes(x=gain$cume.obs, y=gain$cume.pct.of.total * nactual)) + geom_line(aes(x=c(0, max(gain$cume.obs)), y=c(0, nactual)), color="darkgrey") + labs(x="# Cases", y="Cumulative") # plot decile-wise lift chart gain10 <- gains(actual, logit.reg.pred[,2], groups=10) g2 <- ggplot(mapping=aes(x=gain10$depth, y=gain10$lift / 100)) + geom_col(fill="steelblue") + geom_text(aes(label=round(gain10$lift / 100, 1)), vjust=-0.2, size=3) + ylim(0, 8) + labs(x="Percentile", y="Lift") grid.arrange(g1, g2, ncol=2) ### Z residual plot # Fit a logistic regression model model <- glm(Personal.Loan ~., data = bank.df, family = binomial(link = "logit")) summary(model) residuals = residuals(model, type = "response") # Create a residual plot plot(model$fitted.values, residuals, ylab = "Residuals", xlab = "Fitted Values", main = "Logistic Regression Residuals vs. Fitted") abline(h = 0, col = "red", lty = 2) # Add a horizontal line at y = 0 for reference # Calculate predicted probabilities same as model fitted value predicted_probs <- predict(model, type = "response") predicted_probs[1:5] model$fitted.values[1:5] # Define bin intervals (adjust bins and breaks as needed) bins <- cut(predicted_probs, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1.0), labels = FALSE) # Calculate binned residuals binned_residuals <- data.frame( PredictedProb = predicted_probs, ActualOutcome = ifelse(bank.df$Personal.Loan == "Yes", 1, 0), # Replace with the actual outcome variable Bin = bins ) # Example: Calculate mean residuals for each bin library(dplyr) binned_residuals_summary <- binned_residuals %>%
group_by(Bin) %>%
summarize(MeanResidual = mean(ActualOutcome – PredictedProb))
# Visualize mean residuals
barplot(binned_residuals_summary$MeanResidual, names.arg = binned_residuals_summary$Bin,
xlab = “Bin”, ylab = “Mean Residual”, main = “Mean Residuals by Bin”)
# Load the ggplot2 package if not already loaded
library(ggplot2)
# Assuming you have calculated binned residuals as described earlier
# Create a scatter plot of binned residuals
binned_residuals_summary %>% ggplot(aes(x = Bin, y = MeanResidual)) +
geom_col() +
labs(x = “Predicted Probabilities”, y = “Binned Residuals”) +
ggtitle(“Scatter Plot of Binned Residuals”) +
theme_minimal()
library(arm)
# confidence bands
binnedplot(predicted_probs,residuals)
STIMULATION:
library(ggplot2)
library(reshape)
library(glmnet)
rm(list=ls())
set.seed(1)
x = rnorm(1000, sd=3) # A random variable
hist(x)
summary(x)
p = 1/(1+exp(-(1+10*x)))
hist(p)
plot(x,p)
y = rbinom(1000,1,p) # bernoulli response variable
plot(x,y)
# two approaches to set outcome categories
# 1 cutoff
#y = ifelse(p>=0.5,1,0)
# bernoulli response
#set.seed(1)
#prob <- c(0.3, 0.5, 0.7, 0.4, 0.6) #result <- rbinom(5, 1, prob) #prob #result data.plot = data.frame(y,p,x) # plot probability, category data.plot %>% ggplot(aes(x)) +
geom_line(aes(y=p)) +
geom_point(aes(y=y,color=factor(y)))+
theme_bw()
# plot category
data.plot %>% ggplot(aes(x,y,color=y)) +
geom_point() +
theme_bw()
# plot linear regression
data.plot %>% ggplot(aes(x,y,color=y)) +
geom_point() +
geom_smooth(method=’lm’,se=FALSE) +
theme_bw()
# plot glm
data.plot %>% ggplot(aes(x,y,color=y)) +
geom_point() +
geom_smooth(method=’glm’,
method.args = list(family = “binomial”),
se=FALSE) +
theme_bw()
# get coefficients through linear regression
logit = log(p/(1-p))
plot(x,logit)
data = data.frame(cbind(logit,x))
summary(data)
# remove infinite in all columns:
data = data %>%
filter_all(all_vars(!is.infinite(.)))
summary(data)
logit.model = lm(logit~x,data)
logit.model
# get coefficients through logistic regression via glm
data.glm = t(rbind(as.numeric(y),as.numeric(x)))
data.glm = data.frame(data.glm)
names(data.glm) = c(“y”,”x”)
#data.glm = data.glm %>% rename(y=X1,x=X2)
data.glm$y = as.factor(data.glm$y)
summary(data.glm)
plot(data.glm$x,data.glm$y)
set.seed(1)
glm.model <- glm(y ~ x, data = data.glm, family = binomial) glm.model

Place this order or similar order and get an amazing discount. USE Discount code “GET20” for 20% discount