Setup Environment and Dependencies
Step 1: Create Virtual Environment
# Install virtualenv if not already installed
pip install virtualenv
# Create a virtual environment named 'heart_disease_env'
virtualenv heart_disease_env
# Activate the virtual environment
# On Windows
.\heart_disease_env\Scripts\activate
# On MacOS/Linux
source heart_disease_env/bin/activate
Step 2: Install Required Libraries
Create a requirements.txt
file with the necessary dependencies:
scikit-learn==1.0.2
pandas==1.3.5
numpy==1.21.4
matplotlib==3.5.1
Install the dependencies using pip:
pip install -r requirements.txt
Step 3: Verify the Setup
Create a Python script verify_setup.py
to ensure that all libraries are correctly installed:
import sklearn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print("scikit-learn version:", sklearn.__version__)
print("pandas version:", pd.__version__)
print("numpy version:", np.__version__)
print("matplotlib version:", plt.__version__)
Run the script:
python verify_setup.py
If the setup is correct, the script will print the versions of the installed libraries.
Step 4: Deactivate the Virtual Environment
# Deactivate the virtual environment
deactivate
Now the environment and dependencies for your project are set up and verified.
# Input Validation and Error Handling
def validate_input(patient_data):
"""
Validate the input data received for a heart disease prediction.
Parameters:
patient_data (dict): Dictionary containing patient data with 13 medical features.
Returns:
bool, str: Returns True if data is valid, else returns False and an error message.
"""
REQUIRED_FEATURES = [
'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs',
'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal'
]
# Check for missing features
for feature in REQUIRED_FEATURES:
if feature not in patient_data:
return False, f"Missing feature: {feature}"
# Validate feature types
for feature, value in patient_data.items():
if type(value) not in [int, float]:
return False, f"Invalid type for {feature}: Expected int or float, got {type(value).__name__}"
return True, "Validation passed."
def predict_heart_disease(model, patient_data):
"""
Predict the likelihood of heart disease using the pre-trained model.
Parameters:
model (sklearn.base.BaseEstimator): Pre-trained scikit-learn model.
patient_data (dict): Dictionary containing validated patient data.
Returns:
str: Prediction result.
"""
valid, message = validate_input(patient_data)
if not valid:
return f"Error: {message}"
# Extract the features into an ordered list
features = [
patient_data['age'], patient_data['sex'], patient_data['cp'], patient_data['trestbps'],
patient_data['chol'], patient_data['fbs'], patient_data['restecg'], patient_data['thalach'],
patient_data['exang'], patient_data['oldpeak'], patient_data['slope'], patient_data['ca'],
patient_data['thal']
]
try:
prediction = model.predict([features])
return "Heart Disease Likely" if prediction[0] == 1 else "Heart Disease Unlikely"
except Exception as e:
return f"Prediction error: {str(e)}"
validate_input
ensures all required features are present and of correct type.predict_heart_disease
uses these utilities to return the heart disease prediction or error message.
# Import necessary libraries
import joblib
from sklearn.preprocessing import StandardScaler
# Load the pre-trained model
model = joblib.load('path/to/your/pretrained_model.pkl')
# Load the pre-trained scaler
scaler = joblib.load('path/to/your/scaler.pkl')
# Function to make predictions using the pre-trained model and scaler
def predict_heart_disease(features):
"""
Predict the likelihood of heart disease given medical features.
:param features: List or numpy array of length 13 with patient medical features
:return: Prediction result
"""
# Ensure features are in the correct shape for the scaler
features = features.reshape(1, -1)
# Scale the input features
scaled_features = scaler.transform(features)
# Make a prediction
prediction = model.predict(scaled_features)
return prediction
# Example usage
# Assuming features is a numpy array or list containing 13 medical features
# features = np.array([...])
# result = predict_heart_disease(features)
# print("Prediction:", result)
This code assumes you have the appropriate model and scaler objects saved as .pkl
files. The function predict_heart_disease
takes in medical features, scales them, and then uses the pre-trained model to make a prediction. Make sure joblib
, sklearn
, and any other dependencies are properly installed and configured in your Python environment.
# Import necessary library for feature normalization
from sklearn.preprocessing import StandardScaler
import numpy as np
# Placeholder array for example purposes, replace it with your actual input data
input_features = np.array([
[63, 1, 3, 145, 233, 1, 0, 150, 0, 2.3, 0, 0, 1], # Example patient 1
[37, 0, 2, 130, 250, 0, 1, 187, 0, 3.5, 1, 0, 2], # Example patient 2
])
# Assume `scaler` is your pre-trained scaler loaded from previous steps
# Example: scaler = joblib.load('path_to_pretrained_scaler.pkl')
# Apply the scaler to normalize the input features
normalized_features = scaler.transform(input_features)
# The normalized features can now be used as input to the model
print(normalized_features)
# Part 5: Make and Output Prediction
def make_prediction(pretrained_model, scaler, input_features):
"""
- pretrained_model: a pre-trained and loaded machine learning model
- scaler: a loaded scaler for normalizing input features
- input_features: a list or array of 13 medical features
Returns prediction: probability of heart disease
"""
import numpy as np
# Convert input_features to numpy array if it's not already
input_array = np.array(input_features).reshape(1, -1)
# Normalize input features
normalized_features = scaler.transform(input_array)
# Make prediction
prediction = pretrained_model.predict_proba(normalized_features)[0][1]
return prediction
# Example usage (assuming model and scaler are already loaded and input features are provided):
# model = load_your_pretrained_model()
# scaler = load_your_scaler()
input_features = [57, 1, 2, 130, 236, 0, 0, 174, 0, 0.0, 1, 1, 2] # Example patient data
probability = make_prediction(model, scaler, input_features)
print(f"Predicted probability of heart disease: {probability:.2f}")
In the implementation provided, you’d need to replace load_your_pretrained_model()
and load_your_scaler()
with actual functions or methods that load your model and scaler. The input_features
should be the patient data you wish to evaluate, consisting of 13 medical features. The output will be the predicted probability of heart disease.
Testing and Deployment
Unit Tests
import unittest
import joblib
import numpy as np
# Assuming `predict_heart_disease` is the primary function to test
# and it's located in a module named 'predict'
from predict import predict_heart_disease
class TestHeartDiseasePrediction(unittest.TestCase):
def setUp(self):
# Load the model and scaler only once
self.model = joblib.load('heart_disease_model.pkl')
self.scaler = joblib.load('scaler.pkl')
def test_prediction_output(self):
# Example input: 13 feature values
test_input = np.array([[63, 1, 3, 145, 233, 1, 0, 150, 0, 2.3, 0, 0, 1]])
prediction = predict_heart_disease(test_input, self.model, self.scaler)
self.assertIn(prediction, [0, 1], "Prediction should be 0 or 1 for binary classification.")
def test_invalid_input(self):
# Example of invalid input: incorrect number of features
with self.assertRaises(ValueError):
test_input = np.array([[63, 1, 3, 145, 233, 1, 150, 0, 2.3, 0, 0, 1]]) # 12 features instead of 13
predict_heart_disease(test_input, self.model, self.scaler)
if __name__ == '__main__':
unittest.main()
Integration Tests
import requests
import unittest
class TestHeartDiseasePredictionAPI(unittest.TestCase):
def test_predict_endpoint(self):
url = 'http://127.0.0.1:5000/predict'
payload = {
"features": [63, 1, 3, 145, 233, 1, 0, 150, 0, 2.3, 0, 0, 1]
}
response = requests.post(url, json=payload)
self.assertEqual(response.status_code, 200)
self.assertIn(response.json()['prediction'], [0, 1], "Prediction should be 0 or 1 for binary classification.")
if __name__ == '__main__':
unittest.main()
Deployment
Dockerfile
# Use an official Python runtime as a parent image
FROM python:3.8-slim
# Set the working directory
WORKDIR /app
# Copy the current directory contents into the container at /app
COPY . /app
# Install any needed packages specified in requirements.txt
RUN pip install --upgrade pip
RUN pip install -r requirements.txt
# Make port 5000 available to the world outside this container
EXPOSE 5000
# Define environment variable
ENV FLASK_APP=app.py
# Run app.py when the container launches
CMD ["flask", "run", "--host=0.0.0.0"]
Flask Application
from flask import Flask, request, jsonify
import joblib
import numpy as np
app = Flask(__name__)
# Load model and scaler
model = joblib.load('heart_disease_model.pkl')
scaler = joblib.load('scaler.pkl')
@app.route('/predict', methods=['POST'])
def predict():
try:
content = request.json
features = np.array(content['features']).reshape(1, -1)
scaled_features = scaler.transform(features)
prediction = model.predict(scaled_features)
return jsonify({"prediction": int(prediction[0])})
except Exception as e:
return jsonify({"error": str(e)}), 400
if __name__ == "__main__":
app.run(debug=True)
Build and Run Docker Container
# Build Docker image
docker build -t heart-disease-predictor .
# Run Docker container
docker run -p 5000:5000 heart-disease-predictor