-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_model.py
67 lines (46 loc) · 1.63 KB
/
test_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pytest
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from starter.starter.ml.data import process_data
from starter.starter.ml.model import train_model, inference
from starter.starter.ml.model import compute_model_metrics
@pytest.fixture(scope='session')
def input_data():
data = pd.read_csv('./starter/data/cleaned_data.csv')
train, test = train_test_split(data, test_size=0.20)
cat_features = [
"workclass",
"education",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"native-country",
]
X_train, y_train, encoder, lb = process_data(
train, categorical_features=cat_features, label="salary",
training=True
)
X_test, y_test, encoder, lb = process_data(
test, categorical_features=cat_features, label="salary",
training=False, encoder=encoder, lb=lb
)
return [X_train, y_train, X_test, y_test]
def test_train_model(input_data):
model = train_model(input_data[0], input_data[1])
prediction = model.predict(input_data[2])
assert len(prediction) != 0
def test_compute_model_metrics(input_data):
model = train_model(input_data[0], input_data[1])
predictions = model.predict(input_data[2])
precision, recall, fbeta = compute_model_metrics(
input_data[3], predictions)
assert precision != 0
assert recall != 0
assert fbeta != 0
def test_inference(input_data):
model = joblib.load('./starter/model/RF_Classifier.pkl')
inference_test = inference(model, input_data[2])
assert len(inference_test) != 0