Wine Quality prediction project code

 Code of Project for Jupyter notebook:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


raw_data = r"Set your csv file path"
data = pd.read_csv(raw_data, delimiter=';')
print("Dataset loaded successfully!")
print(data.head())



# Check for missing values
print("Missing values:\n", data.isnull().sum())

# Remove duplicates
data = data.drop_duplicates()
print("Data shape after removing duplicates:", data.shape)

# Separate features and target
X = data.drop('quality', axis=1)
y = data['quality']

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)




X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)
print("Training set shape:", X_train.shape)
print("Testing set shape:", X_test.shape)


svr = SVR(kernel='rbf')
svr.fit(X_train, y_train)
y_pred_svr = svr.predict(X_test)


lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)


plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_svr, alpha=0.5)
plt.xlabel("Actual Quality")
plt.ylabel("Predicted Quality (SVR)")
plt.title("Actual vs Predicted Wine Quality (SVR)")
plt.plot([3, 8], [3, 8], 'r--')  # Reference line
plt.show()



plt.figure(figsize=(12, 8))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()



def evaluate_model(y_true, y_pred, model_name):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    print(f"{model_name} Evaluation:")
    print(f"MSE: {mse:.2f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"R²: {r2:.2f}\n")

evaluate_model(y_test, y_pred_svr, "SVR")
evaluate_model(y_test, y_pred_lr, "Linear Regression")




processed_data = pd.DataFrame(X_scaled, columns=X.columns)
processed_data['quality'] = y
processed_data.to_csv('wine_quality.csv', index=False)
print("Processed data saved to 'wine_quality.csv'")

Comments

Popular posts from this blog

Wine Quality Prediction Project (Documentation) Jupyter Notebook