Introduction to Scikit-learn
Chapter 7: Evaluation
Example training and evaluation
Here is a complete example of how we might use scikit-learn to train models for classification, and to evaluate them on a test set.
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
# Load dataset
dataset = load_iris()
x = dataset.data
y = dataset.target
# Split the dataset into 80% train and 20% test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)
# Fit a decision tree classifier, and predict on test data
classifier1 = DecisionTreeClassifier(criterion="entropy", random_state=10)
classifier1.fit(x_train, y_train)
predictions1 = classifier1.predict(x_test)
# Fit another decision tree classifier with different hyperparamateters, and predict on test data
classifier2 = DecisionTreeClassifier(criterion="gini", max_depth=2, random_state=10)
classifier2.fit(x_train, y_train)
predictions2 = classifier2.predict(x_test)
# Evaluate first decision tree
print("Evaluating first decision tree...")
accuracy1 = accuracy_score(y_test, predictions1)
print(f"Accuracy: {accuracy1}")
print("Confusion matrix:")
confusion1 = confusion_matrix(y_test, predictions1)
print(confusion1)
## Evaluating first decision tree...
## Accuracy: 1.0
## Confusion matrix:
## [[10 0 0]
## [ 0 9 0]
# [ 0 0 11]]
# Evaluate second decision tree
print("Evaluating second decision tree...")
accuracy2 = accuracy_score(y_test, predictions2)
print(f"Accuracy: {accuracy2}")
print("Confusion matrix:")
confusion2 = confusion_matrix(y_test, predictions2)
print(confusion2)
## Evaluating second decision tree...
## Accuracy: 0.9666666666666667
## Confusion matrix:
## [[10 0 0]
## [ 0 8 1]
## [ 0 0 11]]