# Create example spam detection data
set.seed(123)
n_emails <- 1000
spam_data <- data.frame(
exclamation_marks = c(rpois(100, 5), rpois(900, 0.5)), # Spam has more !
contains_free = c(rbinom(100, 1, 0.8), rbinom(900, 1, 0.1)), # Spam mentions "free"
length = c(rnorm(100, 200, 50), rnorm(900, 500, 100)), # Spam is shorter
is_spam = c(rep(1, 100), rep(0, 900))
)
# Fit logistic regression
spam_model <- glm(
is_spam ~ exclamation_marks + contains_free + length,
data = spam_data,
family = "binomial" # This specifies logistic regression
)
# View results
summary(spam_model)
coefs <- coef(spam_model)
odds_ratios <- exp(coefs)
print(odds_ratios)