Wine Quality prediction project code

Code of Project for Jupyter notebook:

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.svm import SVR

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error, r2_score

raw_data = r"Set your csv file path"

data = pd.read_csv(raw_data, delimiter=';')

print("Dataset loaded successfully!")

print(data.head())

# Check for missing values

print("Missing values:\n", data.isnull().sum())

# Remove duplicates

data = data.drop_duplicates()

print("Data shape after removing duplicates:", data.shape)

# Separate features and target

X = data.drop('quality', axis=1)

y = data['quality']

# Feature scaling

scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(

X_scaled, y, test_size=0.2, random_state=42

)

print("Training set shape:", X_train.shape)

print("Testing set shape:", X_test.shape)

svr = SVR(kernel='rbf')

svr.fit(X_train, y_train)

y_pred_svr = svr.predict(X_test)

lr = LinearRegression()

lr.fit(X_train, y_train)

y_pred_lr = lr.predict(X_test)

plt.figure(figsize=(10, 6))

plt.scatter(y_test, y_pred_svr, alpha=0.5)

plt.xlabel("Actual Quality")

plt.ylabel("Predicted Quality (SVR)")

plt.title("Actual vs Predicted Wine Quality (SVR)")

plt.plot([3, 8], [3, 8], 'r--') # Reference line

plt.show()

plt.figure(figsize=(12, 8))

sns.heatmap(data.corr(), annot=True, cmap='coolwarm')

plt.title("Feature Correlation Heatmap")

plt.show()

def evaluate_model(y_true, y_pred, model_name):

mse = mean_squared_error(y_true, y_pred)

rmse = np.sqrt(mse)

r2 = r2_score(y_true, y_pred)

print(f"{model_name} Evaluation:")

print(f"MSE: {mse:.2f}")

print(f"RMSE: {rmse:.2f}")

print(f"R²: {r2:.2f}\n")

evaluate_model(y_test, y_pred_svr, "SVR")

evaluate_model(y_test, y_pred_lr, "Linear Regression")

processed_data = pd.DataFrame(X_scaled, columns=X.columns)

processed_data['quality'] = y

processed_data.to_csv('wine_quality.csv', index=False)

print("Processed data saved to 'wine_quality.csv'")

Search This Blog

TECH VS ME

Wine Quality prediction project code

Comments

Post a Comment

Popular posts from this blog

Wine Quality Prediction Project (Documentation) Jupyter Notebook