# --- Step 1: Enter the data as vectors ---
time_spent <- c(5, 10, 15, 20, 25, 30, 35, 40, 45, 50)
spend <- c(25, 40, 58, 70, 85, 95, 112, 130, 140, 160)
# --- Step 2: Calculate the means (x_bar, y_bar) ---
x_bar <- mean(time_spent)
y_bar <- mean(spend)
cat("Mean time spent:", x_bar, "\n")Mean time spent: 27.5
cat("Mean spend:", y_bar, "\n\n")Mean spend: 91.5
# --- Step 3: Calculate the components for beta_1 (the slope) ---
# beta_1 = sum( (x_i - x_bar)*(y_i - y_bar) ) / sum( (x_i - x_bar)^2 )
# Calculate deviations from the mean for x and y
x_dev <- time_spent - x_bar
y_dev <- spend - y_bar
# Calculate the numerator (cross-product of deviations)
# This is related to the covariance
numerator <- sum(x_dev * y_dev)
# Calculate the denominator (sum of squared deviations for x)
# This is the variance of x times (n-1)
denominator <- sum(x_dev^2)
beta_1 <- numerator / denominator
# --- Step 4: Calculate beta_0 (the intercept) ---
# beta_0 = y_bar - beta_1 * x_bar
beta_0 <- y_bar - beta_1 * x_bar
cat("Our manually calculated model:\n")Our manually calculated model:
cat("spend =", beta_0, "+", beta_1, "* time_spent\n\n")spend = 11.13333 + 2.922424 * time_spent
# --- Step 5: Make a prediction for a new value ---
# What would we predict for a user who spends 22 minutes on the site?
new_time <- 22
predicted_spend <- beta_0 + beta_1 * new_time
cat("Predicted spend for", new_time, "minutes: $", predicted_spend, "\n")Predicted spend for 22 minutes: $ 75.42667
# --- Step 6: Visualize our model ---
plot(time_spent, spend,
main = "Spend vs. Time Spent: Our First Model",
xlab = "Time Spent (minutes)",
ylab = "Spend (dollars)",
pch = 16, col = "blue")
# Add the regression line
abline(a = beta_0, b = beta_1, col = "red", lwd = 2)
# Add the prediction point
points(new_time, predicted_spend, col = "darkgreen", pch = 18, cex = 2)
legend("topleft", legend = c("Data", "Our Model", "Prediction (22 min)"),
col = c("blue", "red", "darkgreen"), pch = c(16, NA, 18),
lty = c(NA, 1, NA), lwd = 2, cex=0.6)



