Source code for regdiffusion.evaluator

import numpy as np
import pandas as pd
from sklearn.metrics import average_precision_score, roc_auc_score

[docs] class GRNEvaluator: """ A generalized evaluator for GRN inference. Args: ground_truth (np.ndarray or list): Either a 2D numpy array or list of list holding the ground truth. Each row is an edge and includes names for the source and target nodes. For example, [['A', 'B'], ['B', 'C']]. gene_names (np.ndarray or list): Either a 1D numpy array or list of gene names. Make sure the order of the gene names is the same as the order of gene names in the adjacency matrix. metrics (list): A list of supported evaluation metrics. Currently support 'AUROC', 'AUPR', 'AUPRR', 'EP', 'EPR'. """ def __init__(self, ground_truth, gene_names, metrics=['AUROC', 'AUPR', 'AUPRR', 'EP', 'EPR']): n_gene = len(gene_names) gene1 = [x[0] for x in ground_truth] gene2 = [x[1] for x in ground_truth] # TF is the set of genes appearing as regulators TF = set(gene1) # All_gene is the combined set of regulator and target genes All_gene = set(gene1) | set(gene2) tf_mask = (np.zeros(n_gene) == 1) # Not all provided genes are in the all_gene set gene_mask = (np.zeros(n_gene) == 1) tf_map = {} gene_map = {} for i, item in enumerate(gene_names): if item in TF: tf_mask[i] = True tf_map[item] = len(tf_map) if item in All_gene: gene_mask[i] = True gene_map[item] = len(gene_map) y_true = np.zeros([len(TF), len(All_gene)]) for link in ground_truth: y_true[tf_map[link[0]], gene_map[link[1]]] = 1.0 y_true = y_true.flatten() self.tf_mask = tf_mask self.gene_mask = gene_mask self.y_true = y_true self.y_true_mean = np.mean(y_true) self.num_true_edges = int(y_true.sum()) self.ground_truth = ground_truth self.report_auroc = ('AUROC' in metrics) self.report_aupr = ('AUPR' in metrics) self.report_auprr = ('AUPRR' in metrics) self.report_ep = ('EP' in metrics) self.report_epr = ('EPR' in metrics) def evaluate(self, A): if A.shape[0] == A.shape[1]: A = A[self.tf_mask, :] A = A[:, self.gene_mask] y_pred = np.abs(A.flatten()) eval_results = {} if self.report_auroc: eval_results['AUROC'] = roc_auc_score(self.y_true, y_pred) if self.report_aupr or self.report_auprr: eval_results['AUPR'] = average_precision_score( self.y_true, y_pred ) eval_results['AUPRR'] = eval_results['AUPR'] / self.y_true_mean if self.report_ep or self.report_epr: cutoff = np.partition( y_pred, -self.num_true_edges )[-self.num_true_edges] y_above_cutoff = y_pred > cutoff eval_results['EP'] = int(np.sum(self.y_true[y_above_cutoff])) eval_results['EPR'] = 1. * eval_results['EP'] eval_results['EPR'] /= ((self.num_true_edges ** 2) / len(y_pred)) return eval_results