#Step 1: Import Libraries import pandas as pd Import numpy as np import statsmodels.api as sm import matplotlib.pyplot as plt import seaborn as sns #Step 2: Import Dataset from Web Storage (FIXED URL) url="https://stats.idre.ucla.edu/stat/data/binary.csv" data pd.read csv(url) print(data.head()) admit gre gpa rank 0 0 380 3.61 3 1 1 660 3.67 2 2 1 800 4.00 1 3 1 640 3.19 4 4 0 520 2.93 4 #Step 3: Define Variables and handle 'rank' as categorical y= data['admit'] # Dependent variable Create dummy variables for 'rank' and drop the first one to avoid multicollinearity rank_dummies pd.get_dummies(data['rank'], prefix='rank', drop_first=True) #Combine original predictors (gre, gpa) with the new rank dummy variables X= data[['gre', 'gpa']] Xpd.concat(X, rank dummies), axis-1) sm.add_constant(X) X #Add constant (intercept) to the independent variables #Convert boolean dummy variables to integers (e and 1) for statsmodels X[['rank 2', 'rank 3', 'rank_4']] = X[['rank_2', 'rank_3', 'rank_4']].astype(int) #Step 4: Apply Logistic Regression (Changed from Multiple Linear Regression) model sm. Logit(y, x).fit() Optimization terminated successfully. Current function value: 0.573147 Iterations 6 #Step 5: Print Model Summary print(model.summary())Logit Regression Results Dep. Variable: Model: Method: Date: Time: converged: Covariance Type: Wed, 11 Mаг 2026 14:05:06 nonrobust admit No. Observations: Logit Df Residuals: MLE Df Model: Pseudo R-squ.: Log-Likelihood: 400 394 S 0.08292 -229.26 True LL-Null: -249.99 LLR p-value: 7.578e-08 coef std err P>[z [0.025 0.975] const -3.9900 1.140 -3.500 0.000 -6.224 -1.756 gre 0.0023 0.001 2.070 0.038 0.000 0.004 gpa 0.8040 0.332 2.423 0.015 0.154 1.454 rank 2 -0.6754 0.316 -2.134 0.033 -1.296 -0.055 rank 3 -1.3402 0.345 -3.881 0.000 2.017 0.663 rank 4 -1.5515 0.418 -3.713 0.000 -2.370 -0,733 #Step 6: Predict probabilities predictions model.predict(X) #Step 7: Plot Actual Admission vs Predicted Probabilities plt.figure(figsize(10, 6)) plt.scatter(y, predictions, alpha=0.5) plt.xlabel("Actual Admission (e or 1)") plt.ylabel("Predicted Probability of Admission") plt.title("Actual Admission vs Predicted Probabilities from Logistic Regression") plt.grid(True, linestyle, alpha-0.7) plt.show()