everfortuneai_interview/Q3/code.py

# code.py
# Please use env.yaml to build corresponding conda env if you want to run this script
#
# author  : deng
# date    : 20230919
# platform: MacBook Pro 14 2021

import pandas
from sklearn import metrics
import matplotlib.pyplot as plt


if __name__ == '__main__':

    csv_path = 'psudo_result.csv'
    df = pandas.read_csv(csv_path)
    df = df[:-1]

    # Quick glance
    print('Ground truth')
    print(df['Ground truth'].value_counts())
    print('Gender')
    print(df['Gender'].value_counts())
    print('Age')
    plt.title('All patients')
    plt.xlabel('age')
    plt.ylabel('count')
    df['Age'].hist(bins=20)
    plt.show()
    plt.title('Sick patients')
    plt.xlabel('age')
    plt.ylabel('count')
    df[df['Ground truth'] == 'Sick']['Age'].hist(bins=20)
    plt.show()

    # Confusion Matrix
    # threshold = 0.5
    # gt = df['Ground truth']
    # pred = ['Sick' if val >= threshold else 'No Sick'
    #         for val in df['AI pred']]
    # print(metrics.classification_report(gt, pred))

    # ROC curve
    gt = [1 if val == 'Sick' else 0
          for val in df['Ground truth']]
    pred = df['AI pred']
    fpr, tpr, thresholds = metrics.roc_curve(gt, pred, pos_label=1)
    plt.title('ROC curve')
    plt.plot(fpr, tpr)
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()