location_dummies = pd.get_dummies(df_ml['LOCATION_ID'], prefix='location')
df_ml = pd.concat([df_ml, location_dummies], axis=1)
df_ml = df_ml.drop('LOCATION_ID', axis=1)
X = df_ml.drop(columns = 'Risk')
y = df_ml.Risk
# Создаем тренировочную и тестовую выборку
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7,stratify = y, shuffle=True, random_state=42)
# Импортируем модели
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import svm
from sklearn import metrics
# Обучим модели и сравним их показатели на тестовой выборке
models = {
"Logistic Regression": LogisticRegression(solver= 'liblinear', random_state=42),
"Decision Tree": DecisionTreeClassifier(),
"Random Forest": RandomForestClassifier(n_estimators=100, random_state=0),
"GB": GradientBoostingClassifier()
}
model_list=[]
accuracy_list=[]
recall_list=[]
precision_list=[]
specificity_list=[]
f1_score_list=[]
for i in range(len(list(models))):
model=list(models.values())[i]
model.fit(X_train,y_train)
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)
score = round(model.score(X_test, y_test), 4)
tn, fp, fn, tp = metrics.confusion_matrix(y_test, y_test_pred).ravel()
recall = round(tp/(tp+fn), 3)
precision = round(tp/(tp+fp), 3)
specificity =round(tn/(tn+fp),3)
f1_score = round(2*precision*recall/(precision + recall), 3)
print(list(models.keys())[i])
model_list.append(list(models.keys())[i])
print('Model performance for Test set')
print("- Accuracy: {}".format(score))
print("- Recall: {}".format(recall))
print("- Precision: {}".format(precision))
print("- Specificity: {}".format(specificity))
print("- f1_score: {}".format(f1_score))
accuracy_list.append(score)
recall_list.append(recall)
precision_list.append(precision)
specificity_list.append(specificity)
f1_score_list.append(f1_score)
print('='*35)
print('\n')