Source code for econml.validate.results

import numpy as np
import pandas as pd

from typing import List, Dict, Any


[docs]class CalibrationEvaluationResults: """ Results class for calibration test. Parameters ---------- cal_r_squared: list or numpy array of floats Sequence of calibration R^2 values plot_data_dict: dict Dictionary mapping treatment levels to dataframes containing necessary data for plotting calibration test GATE results treatments: list or numpy array of floats Sequence of treatment labels """
[docs] def __init__( self, cal_r_squared: np.array, plot_data_dict: Dict[Any, pd.DataFrame], treatments: np.array ): self.cal_r_squared = cal_r_squared self.plot_data_dict = plot_data_dict self.treatments = treatments
[docs] def summary(self) -> pd.DataFrame: """ Constructs dataframe summarizing the results of the calibration test. Parameters ---------- None Returns ------- pandas dataframe containing summary of calibration test results """ res = pd.DataFrame({ 'treatment': self.treatments[1:], 'cal_r_squared': self.cal_r_squared, }).round(3) return res
[docs] def plot_cal(self, tmt: Any): """ Plots group average treatment effects (GATEs) and predicted GATEs by quantile-based group in validation sample. Parameters ---------- tmt: Any Name of treatment to plot Returns ------- matplotlib plot with predicted GATE on x-axis and GATE (and 95% CI) on y-axis """ if tmt not in self.treatments[1:]: raise ValueError(f'Invalid treatment; must be one of {self.treatments[1:]}') df = self.plot_data_dict[tmt].copy() rsq = round(self.cal_r_squared[np.where(self.treatments == tmt)[0][0] - 1], 3) df['95_err'] = 1.96 * df['se_gate'] fig = df.plot( kind='scatter', x='g_cate', y='gate', yerr='95_err', xlabel='Group Mean CATE', ylabel='GATE', title=f"Treatment = {tmt}, Calibration R^2 = {rsq}" ) return fig
[docs]class BLPEvaluationResults: """ Results class for BLP test. Parameters ---------- params: list or numpy array of floats Sequence of estimated coefficient values errs: list or numpy array of floats Sequence of estimated coefficient standard errors pvals: list or numpy array of floats Sequence of estimated coefficient p-values treatments: list or numpy array of floats Sequence of treatment labels """
[docs] def __init__( self, params: List[float], errs: List[float], pvals: List[float], treatments: np.array ): self.params = params self.errs = errs self.pvals = pvals self.treatments = treatments
[docs] def summary(self): """ Constructs dataframe summarizing the results of the BLP test. Parameters ---------- None Returns ------- pandas dataframe containing summary of BLP test results """ res = pd.DataFrame({ 'treatment': self.treatments[1:], 'blp_est': self.params, 'blp_se': self.errs, 'blp_pval': self.pvals }).round(3) return res
[docs]class UpliftEvaluationResults: """ Results class for uplift curve-based tests. Parameters ---------- params: list or numpy array of floats Sequence of estimated QINI coefficient values errs: list or numpy array of floats Sequence of estimated QINI coefficient standard errors pvals: list or numpy array of floats Sequence of estimated QINI coefficient p-values treatments: list or numpy array of floats Sequence of treatment labels curve_data_dict: dict Dictionary mapping treatment levels to dataframes containing necessary data for plotting uplift curves """
[docs] def __init__( self, params: List[float], errs: List[float], pvals: List[float], treatments: np.array, curve_data_dict: Dict[Any, pd.DataFrame] ): self.params = params self.errs = errs self.pvals = pvals self.treatments = treatments self.curves = curve_data_dict
[docs] def summary(self): """ Constructs dataframe summarizing the results of the QINI test. Parameters ---------- None Returns ------- pandas dataframe containing summary of QINI test results """ res = pd.DataFrame({ 'treatment': self.treatments[1:], 'est': self.params, 'se': self.errs, 'pval': self.pvals }).round(3) return res
[docs] def plot_uplift(self, tmt: Any, err_type: str = None): """ Plots uplift curves. Parameters ---------- tmt: any (sortable) Name of treatment to plot. err_type: str Type of error to plot. Accepted values are normal (None), two-sided uniform confidence band ('ucb2'), or 1-sided uniform confidence band ('ucb1'). Returns ------- matplotlib plot with percentage treated on x-axis and uplift metric (and 95% CI) on y-axis """ if tmt not in self.treatments[1:]: raise ValueError(f'Invalid treatment; must be one of {self.treatments[1:]}') df = self.curves[tmt].copy() if err_type is None: df['95_err'] = 1.96 * df['err'] elif err_type == 'ucb2': df['95_err'] = df['uniform_critical_value'] * df['err'] elif err_type == 'ucb1': df['95_err'] = df['uniform_one_side_critical_value'] * df['err'] else: raise ValueError(f"Invalid error type {err_type!r}; must be one of [None, 'ucb2', 'ucb1']") res = self.summary() coeff = round(res.loc[res['treatment'] == tmt]['est'].values[0], 3) err = round(res.loc[res['treatment'] == tmt]['se'].values[0], 3) if err_type == 'ucb1': fig = df.plot( kind='scatter', x='Percentage treated', y='value', yerr=[[df['95_err'], np.zeros(len(df))]], ylabel='Gain over Random', title=f"Treatment = {tmt}, Integral = {coeff} +/- {err}" ) else: fig = df.plot( kind='scatter', x='Percentage treated', y='value', yerr='95_err', ylabel='Gain over Random', title=f"Treatment = {tmt}, Integral = {coeff} +/- {err}" ) return fig
[docs]class EvaluationResults: """ Results class for combination of all tests. Parameters ---------- cal_res: CalibrationEvaluationResults object Results object for calibration test blp_res: BLPEvaluationResults object Results object for BLP test qini_res: UpliftEvaluationResults object Results object for QINI test toc_res: UpliftEvaluationResults object Results object for TOC test """
[docs] def __init__( self, cal_res: CalibrationEvaluationResults, blp_res: BLPEvaluationResults, qini_res: UpliftEvaluationResults, toc_res: UpliftEvaluationResults ): self.cal = cal_res self.blp = blp_res self.qini = qini_res self.toc = toc_res
[docs] def summary(self): """ Constructs dataframe summarizing the results of all 3 tests. Parameters ---------- None Returns ------- pandas dataframe containing summary of all test results """ res = self.blp.summary().merge( self.qini.summary().rename({'est': 'qini_est', 'se': 'qini_se', 'pval': 'qini_pval'}, axis=1), on='treatment' ).merge( self.toc.summary().rename({'est': 'autoc_est', 'se': 'autoc_se', 'pval': 'autoc_pval'}, axis=1), on='treatment' ).merge( self.cal.summary(), on='treatment' ) return res
[docs] def plot_cal(self, tmt: int): """ Plots group average treatment effects (GATEs) and predicted GATEs by quantile-based group in validation sample. Parameters ---------- tmt: integer Treatment level to plot Returns ------- matplotlib plot with predicted GATE on x-axis and GATE (and 95% CI) on y-axis """ return self.cal.plot_cal(tmt)
[docs] def plot_qini(self, tmt: int, err_type: str = None): """ Plots QINI curves. Parameters ---------- tmt: integer Treatment level to plot err_type: str Type of error to plot. Accepted values are normal (None), two-sided uniform confidence band ('ucb2'), or 1-sided uniform confidence band ('ucb1'). Returns ------- matplotlib plot with percentage treated on x-axis and QINI value (and 95% CI) on y-axis """ return self.qini.plot_uplift(tmt, err_type)
[docs] def plot_toc(self, tmt: int, err_type: str = None): """ Plots TOC curves. Parameters ---------- tmt: integer Treatment level to plot err_type: str Type of error to plot. Accepted values are normal (None), two-sided uniform confidence band ('ucb2'), or 1-sided uniform confidence band ('ucb1'). Returns ------- matplotlib plot with percentage treated on x-axis and TOC value (and 95% CI) on y-axis """ return self.toc.plot_uplift(tmt, err_type)