This is an archived version of the course and is no longer updated. Please find the latest version of the course on the main webpage.

Example training and evaluation

Here is a complete example of how we might use scikit-learn to train models for classification, and to evaluate it on a test set.

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Load dataset
dataset = load_iris()
x = dataset.data
y = dataset.target

# Split the dataset into 80% train and 20% test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

# Fit a decision tree classifier, and predict on test data
classifier1 = DecisionTreeClassifier(criterion="entropy", random_state=10)
classifier1.fit(x_train, y_train)
predictions1 = classifier1.predict(x_test)

# Fit another decision tree classifier with different hyperparamateters, and predict on test data
classifier2 = DecisionTreeClassifier(criterion="gini", max_depth=2, random_state=10)
classifier2.fit(x_train, y_train)
predictions2 = classifier2.predict(x_test)

# Evaluate first decision tree
print("Evaluating first decision tree...")
accuracy1 = accuracy_score(y_test, predictions1)
print(f"Accuracy: {accuracy1}")
print("Confusion matrix:") 
confusion1 = confusion_matrix(y_test, predictions1)
print(confusion1)
## Evaluating first decision tree...
## Accuracy: 1.0
## Confusion matrix:
## [[10  0  0]
##  [ 0  9  0]
#   [ 0  0 11]]

# Evaluate second decision tree
print("Evaluating second decision tree...")
accuracy2 = accuracy_score(y_test, predictions2)
print(f"Accuracy: {accuracy2}")
print("Confusion matrix:") 
confusion2 = confusion_matrix(y_test, predictions2)
print(confusion2)
## Evaluating second decision tree...
## Accuracy: 0.9666666666666667
## Confusion matrix:
## [[10  0  0]
##  [ 0  8  1]
##  [ 0  0 11]]