Skip to content

Commit ee38ff3

Browse files
authored
Update titanicprocessing
1 parent 8e17629 commit ee38ff3

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

titanicprocessing

+4-1
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ while degerson < 0.85:
220220
X_train, X_test, y_train, y_test = train_test_split(train.drop(['Survived'], axis = 1),
221221
train['Survived'], test_size=0.3)
222222

223-
model = RandomForestClassifier(n_estimators = 5000, min_samples_split = 15, min_samples_leaf = 1, max_features = 'log2',
223+
model = RandomForestClassifier(n_estimators = 500, min_samples_split = 15, min_samples_leaf = 1, max_features = 'log2',
224224
max_depth = 40) # These values were determined through Randomized Search CV above
225225
model.fit(X_train, y_train)
226226
deger = cross_validate(model, X_test, y_test, cv=30) # I like to use cross validation to determine my score
@@ -239,6 +239,9 @@ deger['test_score'].mean()
239239

240240
test = pd.read_csv('/kaggle/input/titanic/test.csv')
241241

242+
test.isnull().sum()
243+
# To check how many NaN values are there in the test dataset
244+
242245
test['Embarked'] = test['Embarked'].apply(embarkcorr)
243246
test['Age'] = test[['Age', 'Pclass', 'Embarked']].apply(agedet, axis = 1)
244247
test['Age'] = test['Age'].apply(agecat)

0 commit comments

Comments
 (0)