This is an archived version of the course. Please find the latest version of the course on the main webpage.

Chapter 7: Evaluation

Example training and evaluation

face Josiah Wang

Here is a complete example of how we might use scikit-learn to train models for classification, and to evaluate it on a test set.

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Load dataset
dataset = load_iris()
x = dataset.data
y = dataset.target

# Split the dataset into 80% train and 20% test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

# Fit a decision tree classifier, and predict on test data
classifier1 = DecisionTreeClassifier(criterion="entropy", random_state=10)
classifier1.fit(x_train, y_train)
predictions1 = classifier1.predict(x_test)

# Fit another decision tree classifier with different hyperparamateters, and predict on test data
classifier2 = DecisionTreeClassifier(criterion="gini", max_depth=2, random_state=10)
classifier2.fit(x_train, y_train)
predictions2 = classifier2.predict(x_test)

# Evaluate first decision tree
print("Evaluating first decision tree...")
accuracy1 = accuracy_score(y_test, predictions1)
print(f"Accuracy: {accuracy1}")
print("Confusion matrix:") 
confusion1 = confusion_matrix(y_test, predictions1)
print(confusion1)
## Evaluating first decision tree...
## Accuracy: 1.0
## Confusion matrix:
## [[10  0  0]
##  [ 0  9  0]
#   [ 0  0 11]]

# Evaluate second decision tree
print("Evaluating second decision tree...")
accuracy2 = accuracy_score(y_test, predictions2)
print(f"Accuracy: {accuracy2}")
print("Confusion matrix:") 
confusion2 = confusion_matrix(y_test, predictions2)
print(confusion2)
## Evaluating second decision tree...
## Accuracy: 0.9666666666666667
## Confusion matrix:
## [[10  0  0]
##  [ 0  8  1]
##  [ 0  0 11]]