everfortuneai_interview/Q3/code.py

53 lines
1.3 KiB
Python

# code.py
# Please use env.yaml to build corresponding conda env if you want to run this script
#
# author : deng
# date : 20230919
# platform: MacBook Pro 14 2021
import pandas
from sklearn import metrics
import matplotlib.pyplot as plt
if __name__ == '__main__':
csv_path = 'psudo_result.csv'
df = pandas.read_csv(csv_path)
df = df[:-1]
# Quick glance
print('Ground truth')
print(df['Ground truth'].value_counts())
print('Gender')
print(df['Gender'].value_counts())
print('Age')
plt.title('All patients')
plt.xlabel('age')
plt.ylabel('count')
df['Age'].hist(bins=20)
plt.show()
plt.title('Sick patients')
plt.xlabel('age')
plt.ylabel('count')
df[df['Ground truth'] == 'Sick']['Age'].hist(bins=20)
plt.show()
# Confusion Matrix
# threshold = 0.5
# gt = df['Ground truth']
# pred = ['Sick' if val >= threshold else 'No Sick'
# for val in df['AI pred']]
# print(metrics.classification_report(gt, pred))
# ROC curve
gt = [1 if val == 'Sick' else 0
for val in df['Ground truth']]
pred = df['AI pred']
fpr, tpr, thresholds = metrics.roc_curve(gt, pred, pos_label=1)
plt.title('ROC curve')
plt.plot(fpr, tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()