Machine Learning (ML) can significantly enhance Six Sigma methodologies by providing powerful tools for data analysis, prediction, and optimization. Integrating ML into Six Sigma processes can lead to more accurate insights, better decision-making, and improved process control.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.impute import SimpleImputer
# Load sample manufacturing data (you would replace this with your own data)
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00567/qualitative_bankruptcy.csv"
df = pd.read_csv(url)
# Encode categorical variables
df = pd.get_dummies(df, drop_first=True)
# Split features and target
X = df.drop('Bankrupt?_Y', axis=1)
y = df['Bankrupt?_Y']
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print(df.head())
print(f"\nShape of training data: {X_train.shape}")
print(f"Shape of test data: {X_test.shape}")
def train_and_evaluate_model(model, X_train, X_test, y_train, y_test):
# Train the model
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)
# Logistic Regression
log_reg = LogisticRegression(random_state=42)
print("Logistic Regression Results:")
train_and_evaluate_model(log_reg, X_train_scaled, X_test_scaled, y_train, y_test)
# Decision Tree
dt = DecisionTreeClassifier(random_state=42)
print("\nDecision Tree Results:")
train_and_evaluate_model(dt, X_train, X_test, y_train, y_test)
# Visualize feature importance
feature_importance = pd.DataFrame({'feature': X.columns, 'importance': dt.feature_importances_})
feature_importance = feature_importance.sort_values('importance', ascending=False)
plt.figure(figsize=(10, 6))
sns.barplot(x='importance', y='feature', data=feature_importance)
plt.title('Feature Importance in Decision Tree')
plt.tight_layout()
plt.show()
# Random Forest
rf = RandomForestClassifier(random_state=42)
print("\nRandom Forest Results:")
train_and_evaluate_model(rf, X_train, X_test, y_train, y_test)
# Partial dependence plot
from sklearn.inspection import PartialDependenceDisplay
features = [0, 1] # Adjust based on your most important features
PartialDependenceDisplay.from_estimator(rf, X_test, features)
plt.tight_layout()
plt.show()
# Simulate missing data
X_with_missing = X.copy()
X_with_missing.iloc[np.random.choice(X.index, 100), 0] = np.nan
# Impute missing values
imputer = SimpleImputer(strategy='mean')
X_imputed = pd.DataFrame(imputer.fit_transform(X_with_missing), columns=X.columns)
print("Original data:")
print(X.head())
print("\nData with missing values:")
print(X_with_missing.head())
print("\nImputed data:")
print(X_imputed.head())
from sklearn.ensemble import IsolationForest
# Simulate process data
np.random.seed(42)
process_data = np.random.normal(loc=10, scale=1, size=1000)
process_data[np.random.choice(1000, 20)] += np.random.uniform(5, 10, 20) # Add some anomalies
# Reshape data for scikit-learn
process_data = process_data.reshape(-1, 1)
# Train Isolation Forest
iso_forest = IsolationForest(contamination=0.01, random_state=42)
predictions = iso_forest.fit_predict(process_data)
# Visualize results
plt.figure(figsize=(12, 6))
plt.scatter(range(len(process_data)), process_data, c=predictions, cmap='viridis')
plt.title('Anomaly Detection in Process Data')
plt.xlabel('Sample')
plt.ylabel('Measurement')
plt.colorbar(label='Prediction (-1: anomaly, 1: normal)')
plt.tight_layout()
plt.show()
Machine Learning can be integrated into each phase of the DMAIC (Define, Measure, Analyze, Improve, Control) process:
When integrating Machine Learning into Six Sigma projects, consider the following:
Integrating Machine Learning into Six Sigma methodologies can significantly enhance the effectiveness of quality improvement initiatives. By leveraging advanced analytics and predictive modeling, organizations can gain deeper insights into their processes, make more accurate predictions, and optimize their operations more effectively. However, it's crucial to approach this integration thoughtfully, ensuring that the fundamentals of Six Sigma are maintained while harnessing the power of AI and Machine Learning.