Source code for econml.policy._drlearner

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from warnings import warn
import numpy as np
from sklearn.base import clone
from ..utilities import check_inputs, filter_none_kwargs, check_input_arrays
from ..dr import DRLearner
from ..dr._drlearner import _ModelFinal
from .._tree_exporter import _SingleTreeExporterMixin
from ._base import PolicyLearner
from . import PolicyTree, PolicyForest


class _PolicyModelFinal(_ModelFinal):

    def fit(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, freq_weight=None, sample_var=None):
        if sample_var is not None:
            warn('Parameter `sample_var` is ignored by the final estimator')
            sample_var = None
        Y_pred, _ = nuisances
        self.d_y = Y_pred.shape[1:-1]  # track whether there's a Y dimension (must be a singleton)
        if (X is not None) and (self._featurizer is not None):
            X = self._featurizer.fit_transform(X)
        filtered_kwargs = filter_none_kwargs(sample_weight=sample_weight, sample_var=sample_var)
        ys = Y_pred[..., 1:] - Y_pred[..., [0]]  # subtract control results from each other arm
        if self.d_y:  # need to squeeze out singleton so that we fit on 2D array
            ys = ys.squeeze(1)
        ys = np.hstack([np.zeros((ys.shape[0], 1)), ys])
        self.model_cate = self._model_final.fit(X, ys, **filtered_kwargs)
        return self

    def predict(self, X=None):
        if (X is not None) and (self._featurizer is not None):
            X = self._featurizer.transform(X)
        pred = self.model_cate.predict_value(X)[:, 1:]
        if self.d_y:  # need to reintroduce singleton Y dimension
            return pred[:, np.newaxis, :]
        return pred

    def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None):
        return 0


class _DRLearnerWrapper(DRLearner):

    def _gen_ortho_learner_model_final(self):
        return _PolicyModelFinal(self._gen_model_final(), self._gen_featurizer(), self.multitask_model_final)


class _BaseDRPolicyLearner(PolicyLearner):

    def _gen_drpolicy_learner(self):
        pass

    def fit(self, Y, T, *, X=None, W=None, sample_weight=None, groups=None):
        """
        Estimate a policy model from data.

        Parameters
        ----------
        Y: (n,) vector of length n
            Outcomes for each sample
        T: (n,) vector of length n
            Treatments for each sample
        X: optional(n, d_x) matrix or None (Default=None)
            Features for each sample
        W: optional(n, d_w) matrix or None (Default=None)
            Controls for each sample
        sample_weight: optional(n,) vector or None (Default=None)
            Weights for each samples
        groups: (n,) vector, optional
            All rows corresponding to the same group will be kept together during splitting.
            If groups is not None, the `cv` argument passed to this class's initializer
            must support a 'groups' argument to its split method.

        Returns
        -------
        self: object instance
        """
        self.drlearner_ = self._gen_drpolicy_learner()
        self.drlearner_.fit(Y, T, X=X, W=W, sample_weight=sample_weight, groups=groups)
        return self

    def predict_value(self, X):
        """ Get effect values for each non-baseline treatment and for each sample.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            The training input samples.

        Returns
        -------
        values : array-like of shape (n_samples, n_treatments - 1)
            The predicted average value for each sample and for each non-baseline treatment, as compared
            to the baseline treatment value and based on the feature neighborhoods defined by the trees.
        """
        return self.drlearner_.const_marginal_effect(X)

    def predict_proba(self, X):
        """ Predict the probability of recommending each treatment

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            The input samples.

        Returns
        -------
        treatment_proba : array-like of shape (n_samples, n_treatments)
            The probability of each treatment recommendation
        """
        X, = check_input_arrays(X)
        if self.drlearner_.featurizer_ is not None:
            X = self.drlearner_.featurizer_.fit_transform(X)
        return self.policy_model_.predict_proba(X)

    def predict(self, X):
        """ Get recommended treatment for each sample.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            The training input samples.

        Returns
        -------
        treatment : array-like of shape (n_samples,)
            The index of the recommended treatment in the same order as in categories, or in
            lexicographic order if `categories='auto'`. 0 corresponds to the baseline/control treatment.
            For ensemble policy models, recommended treatments are aggregated from each model in the ensemble
            and the treatment that receives the most votes is returned. Use `predict_proba` to get the fraction
            of models in the ensemble that recommend each treatment for each sample.
        """
        return np.argmax(self.predict_proba(X), axis=1)

    def policy_feature_names(self, *, feature_names=None):
        """
        Get the output feature names.

        Parameters
        ----------
        feature_names: list of strings of length X.shape[1] or None
            The names of the input features. If None and X is a dataframe, it defaults to the column names
            from the dataframe.

        Returns
        -------
        out_feature_names: list of strings or None
            The names of the output features on which the policy model is fitted.
        """
        return self.drlearner_.cate_feature_names(feature_names=feature_names)

    def policy_treatment_names(self, *, treatment_names=None):
        """
        Get the names of the treatments.

        Parameters
        ----------
        treatment_names: list of strings of length n_categories
            The names of the treatments (including the baseling). If None then values are auto-generated
            based on input metadata.

        Returns
        -------
        out_treatment_names: list of strings
            The names of the treatments including the baseline/control treatment.
        """
        if treatment_names is not None:
            if len(treatment_names) != len(self.drlearner_.cate_treatment_names()) + 1:
                raise ValueError('The variable `treatment_names` should have length equal to '
                                 'n_treatments + 1, containing the value of the control/none/baseline treatment as '
                                 'the first element and the names of all the treatments as subsequent elements.')
            return treatment_names
        return ['None'] + self.drlearner_.cate_treatment_names()

    def feature_importances(self, max_depth=4, depth_decay_exponent=2.0):
        """

        Parameters
        ----------
        max_depth : int, default=4
            Splits of depth larger than `max_depth` are not used in this calculation
        depth_decay_exponent: double, default=2.0
            The contribution of each split to the total score is re-weighted by ``1 / (1 + `depth`)**2.0``.

        Returns
        -------
        feature_importances_ : ndarray of shape (n_features,)
            Normalized total parameter heterogeneity inducing importance of each feature
        """
        return self.policy_model_.feature_importances(max_depth=max_depth,
                                                      depth_decay_exponent=depth_decay_exponent)

    @property
    def feature_importances_(self):
        return self.feature_importances()

    @property
    def policy_model_(self):
        """ The trained final stage policy model
        """
        return self.drlearner_.multitask_model_cate


[docs]class DRPolicyTree(_BaseDRPolicyLearner): """ Policy learner that uses doubly-robust correction techniques to account for covariate shift (selection bias) between the treatment arms. In this estimator, the policy is estimated by first constructing doubly robust estimates of the counterfactual outcomes .. math :: Y_{i, t}^{DR} = E[Y | X_i, W_i, T_i=t]\ + \\frac{Y_i - E[Y | X_i, W_i, T_i=t]}{Pr[T_i=t | X_i, W_i]} \\cdot 1\\{T_i=t\\} Then optimizing the objective .. math :: V(\\pi) = \\sum_i \\sum_t \\pi_t(X_i) * (Y_{i, t} - Y_{i, 0}) with the constraint that only one of :math:`\\pi_t(X_i)` is 1 and the rest are 0, for each :math:`X_i`. Thus if we estimate the nuisance functions :math:`h(X, W, T) = E[Y | X, W, T]` and :math:`p_t(X, W)=Pr[T=t | X, W]` in the first stage, we can estimate the final stage cate for each treatment t, by running a constructing a decision tree that maximizes the objective :math:`V(\\pi)` The problem of estimating the nuisance function :math:`p` is a simple multi-class classification problem of predicting the label :math:`T` from :math:`X, W`. The :class:`.DRLearner` class takes as input the parameter ``model_propensity``, which is an arbitrary scikit-learn classifier, that is internally used to solve this classification problem. The second nuisance function :math:`h` is a simple regression problem and the :class:`.DRLearner` class takes as input the parameter ``model_regressor``, which is an arbitrary scikit-learn regressor that is internally used to solve this regression problem. Parameters ---------- model_propensity : scikit-learn classifier or 'auto', optional (default='auto') Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T, where T is a shape (n, ) array. If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen. model_regression : scikit-learn regressor or 'auto', optional (default='auto') Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments) concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and `predict` methods. If different models per treatment arm are desired, see the :class:`.MultiModelWrapper` helper class. If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen. featurizer : :term:`transformer`, optional, default None Must support fit_transform and transform. Used to create composite features in the final CATE regression. It is ignored if X is None. The final CATE will be trained on the outcome of featurizer.fit_transform(X). If featurizer=None, then CATE is trained on X. min_propensity : float, optional, default ``1e-6`` The minimum propensity at which to clip propensity estimates to avoid dividing by zero. categories: 'auto' or list, default 'auto' The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values). The first category will be treated as the control treatment. cv: int, cross-validation generator or an iterable, optional (default is 2) Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter` - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the treatment is discrete :class:`~sklearn.model_selection.StratifiedKFold` is used, else, :class:`~sklearn.model_selection.KFold` is used (with a random shuffle in either case). Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. mc_iters: int, optional (default=None) The number of times to rerun the first stage models to reduce the variance of the nuisances. mc_agg: {'mean', 'median'}, optional (default='mean') How to aggregate the nuisance value for each sample across the `mc_iters` monte carlo iterations of cross-fitting. max_depth : integer or None, optional (default=None) The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples. min_samples_split : int, float, optional (default=10) The minimum number of splitting samples required to split an internal node. - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. min_samples_leaf : int, float, optional (default=5) The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` splitting samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. After construction the tree is also pruned so that there are at least min_samples_leaf estimation samples on each leaf. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. min_weight_fraction_leaf : float, optional (default=0.) The minimum weighted fraction of the sum total of weights (of all splitting samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided. After construction the tree is pruned so that the fraction of the sum total weight of the estimation samples contained in each leaf node is at least min_weight_fraction_leaf max_features : int, float, string or None, optional (default="auto") The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features. min_impurity_decrease : float, optional (default=0.) A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of split samples, ``N_t`` is the number of split samples at the current node, ``N_t_L`` is the number of split samples in the left child, and ``N_t_R`` is the number of split samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. min_balancedness_tol: float in [0, .5], default=.45 How imbalanced a split we can tolerate. This enforces that each split leaves at least (.5 - min_balancedness_tol) fraction of samples on each side of the split; or fraction of the total weight of samples, when sample_weight is not None. Default value, ensures that at least 5% of the parent node weight falls in each side of the split. Set it to 0.0 for no balancedness and to .5 for perfectly balanced splits. For the formal inference theory to be valid, this has to be any positive constant bounded away from zero. honest : boolean, optional (default=True) Whether to use honest trees, i.e. half of the samples are used for creating the tree structure and the other half for the estimation at the leafs. If False, then all samples are used for both parts. random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used by :mod:`np.random<numpy.random>`. """
[docs] def __init__(self, *, model_regression="auto", model_propensity="auto", featurizer=None, min_propensity=1e-6, categories='auto', cv=2, mc_iters=None, mc_agg='mean', max_depth=None, min_samples_split=10, min_samples_leaf=5, min_weight_fraction_leaf=0., max_features="auto", min_impurity_decrease=0., min_balancedness_tol=.45, honest=True, random_state=None): self.model_regression = clone(model_regression, safe=False) self.model_propensity = clone(model_propensity, safe=False) self.featurizer = clone(featurizer, safe=False) self.min_propensity = min_propensity self.categories = categories self.cv = cv self.mc_iters = mc_iters self.mc_agg = mc_agg self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.min_weight_fraction_leaf = min_weight_fraction_leaf self.max_features = max_features self.min_impurity_decrease = min_impurity_decrease self.min_balancedness_tol = min_balancedness_tol self.honest = honest self.random_state = random_state
def _gen_drpolicy_learner(self): return _DRLearnerWrapper(model_regression=self.model_regression, model_propensity=self.model_propensity, featurizer=self.featurizer, min_propensity=self.min_propensity, categories=self.categories, cv=self.cv, mc_iters=self.mc_iters, mc_agg=self.mc_agg, model_final=PolicyTree(max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_features=self.max_features, min_impurity_decrease=self.min_impurity_decrease, min_balancedness_tol=self.min_balancedness_tol, honest=self.honest, random_state=self.random_state), multitask_model_final=True, random_state=self.random_state)
[docs] def plot(self, *, feature_names=None, treatment_names=None, ax=None, title=None, max_depth=None, filled=True, rounded=True, precision=3, fontsize=None): """ Exports policy trees to matplotlib Parameters ---------- ax : :class:`matplotlib.axes.Axes`, optional, default None The axes on which to plot title : string, optional, default None A title for the final figure to be printed at the top of the page. feature_names : list of strings, optional, default None Names of each of the features. treatment_names : list of strings, optional, default None Names of each of the treatments including the baseline/control max_depth: int or None, optional, default None The maximum tree depth to plot filled : bool, optional, default False When set to ``True``, paint nodes to indicate majority class for classification, extremity of values for regression, or purity of node for multi-output. rounded : bool, optional, default True When set to ``True``, draw node boxes with rounded corners and use Helvetica fonts instead of Times-Roman. precision : int, optional, default 3 Number of digits of precision for floating point in the values of impurity, threshold and value attributes of each node. fontsize : int, optional, default None Font size for text """ return self.policy_model_.plot(feature_names=self.policy_feature_names(feature_names=feature_names), treatment_names=self.policy_treatment_names(treatment_names=treatment_names), ax=ax, title=title, max_depth=max_depth, filled=filled, rounded=rounded, precision=precision, fontsize=fontsize)
[docs] def export_graphviz(self, *, out_file=None, feature_names=None, treatment_names=None, max_depth=None, filled=True, leaves_parallel=True, rotate=False, rounded=True, special_characters=False, precision=3): """ Export a graphviz dot file representing the learned tree model Parameters ---------- out_file : file object or string, optional, default None Handle or name of the output file. If ``None``, the result is returned as a string. feature_names : list of strings, optional, default None Names of each of the features. treatment_names : list of strings, optional, default None Names of each of the treatments, including the baseline treatment max_depth: int or None, optional, default None The maximum tree depth to plot filled : bool, optional, default False When set to ``True``, paint nodes to indicate majority class for classification, extremity of values for regression, or purity of node for multi-output. leaves_parallel : bool, optional, default True When set to ``True``, draw all leaf nodes at the bottom of the tree. rotate : bool, optional, default False When set to ``True``, orient tree left to right rather than top-down. rounded : bool, optional, default True When set to ``True``, draw node boxes with rounded corners and use Helvetica fonts instead of Times-Roman. special_characters : bool, optional, default False When set to ``False``, ignore special characters for PostScript compatibility. precision : int, optional, default 3 Number of digits of precision for floating point in the values of impurity, threshold and value attributes of each node. """ return self.policy_model_.export_graphviz(out_file=out_file, feature_names=self.policy_feature_names(feature_names=feature_names), treatment_names=self.policy_treatment_names( treatment_names=treatment_names), max_depth=max_depth, filled=filled, leaves_parallel=leaves_parallel, rotate=rotate, rounded=rounded, special_characters=special_characters, precision=precision)
[docs] def render(self, out_file, *, format='pdf', view=True, feature_names=None, treatment_names=None, max_depth=None, filled=True, leaves_parallel=True, rotate=False, rounded=True, special_characters=False, precision=3): """ Render the tree to a flie Parameters ---------- out_file : file name to save to format : string, optional, default 'pdf' The file format to render to; must be supported by graphviz view : bool, optional, default True Whether to open the rendered result with the default application. feature_names : list of strings, optional, default None Names of each of the features. treatment_names : list of strings, optional, default None Names of each of the treatments, including the baseline/control max_depth: int or None, optional, default None The maximum tree depth to plot filled : bool, optional, default False When set to ``True``, paint nodes to indicate majority class for classification, extremity of values for regression, or purity of node for multi-output. leaves_parallel : bool, optional, default True When set to ``True``, draw all leaf nodes at the bottom of the tree. rotate : bool, optional, default False When set to ``True``, orient tree left to right rather than top-down. rounded : bool, optional, default True When set to ``True``, draw node boxes with rounded corners and use Helvetica fonts instead of Times-Roman. special_characters : bool, optional, default False When set to ``False``, ignore special characters for PostScript compatibility. precision : int, optional, default 3 Number of digits of precision for floating point in the values of impurity, threshold and value attributes of each node. """ return self.policy_model_.render(out_file, format=format, view=view, feature_names=self.policy_feature_names(feature_names=feature_names), treatment_names=self.policy_treatment_names(treatment_names=treatment_names), max_depth=max_depth, filled=filled, leaves_parallel=leaves_parallel, rotate=rotate, rounded=rounded, special_characters=special_characters, precision=precision)
[docs]class DRPolicyForest(_BaseDRPolicyLearner): """ Policy learner that uses doubly-robust correction techniques to account for covariate shift (selection bias) between the treatment arms. In this estimator, the policy is estimated by first constructing doubly robust estimates of the counterfactual outcomes .. math :: Y_{i, t}^{DR} = E[Y | X_i, W_i, T_i=t]\ + \\frac{Y_i - E[Y | X_i, W_i, T_i=t]}{Pr[T_i=t | X_i, W_i]} \\cdot 1\\{T_i=t\\} Then optimizing the objective .. math :: V(\\pi) = \\sum_i \\sum_t \\pi_t(X_i) * (Y_{i, t} - Y_{i, 0}) with the constraint that only one of :math:`\\pi_t(X_i)` is 1 and the rest are 0, for each :math:`X_i`. Thus if we estimate the nuisance functions :math:`h(X, W, T) = E[Y | X, W, T]` and :math:`p_t(X, W)=Pr[T=t | X, W]` in the first stage, we can estimate the final stage cate for each treatment t, by running a constructing a decision tree that maximizes the objective :math:`V(\\pi)` The problem of estimating the nuisance function :math:`p` is a simple multi-class classification problem of predicting the label :math:`T` from :math:`X, W`. The :class:`.DRLearner` class takes as input the parameter ``model_propensity``, which is an arbitrary scikit-learn classifier, that is internally used to solve this classification problem. The second nuisance function :math:`h` is a simple regression problem and the :class:`.DRLearner` class takes as input the parameter ``model_regressor``, which is an arbitrary scikit-learn regressor that is internally used to solve this regression problem. Parameters ---------- model_propensity : scikit-learn classifier or 'auto', optional (default='auto') Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T, where T is a shape (n, ) array. If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen. model_regression : scikit-learn regressor or 'auto', optional (default='auto') Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments) concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and `predict` methods. If different models per treatment arm are desired, see the :class:`.MultiModelWrapper` helper class. If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen. featurizer : :term:`transformer`, optional, default None Must support fit_transform and transform. Used to create composite features in the final CATE regression. It is ignored if X is None. The final CATE will be trained on the outcome of featurizer.fit_transform(X). If featurizer=None, then CATE is trained on X. min_propensity : float, optional, default ``1e-6`` The minimum propensity at which to clip propensity estimates to avoid dividing by zero. categories: 'auto' or list, default 'auto' The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values). The first category will be treated as the control treatment. cv: int, cross-validation generator or an iterable, optional (default is 2) Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter` - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the treatment is discrete :class:`~sklearn.model_selection.StratifiedKFold` is used, else, :class:`~sklearn.model_selection.KFold` is used (with a random shuffle in either case). Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. mc_iters: int, optional (default=None) The number of times to rerun the first stage models to reduce the variance of the nuisances. mc_agg: {'mean', 'median'}, optional (default='mean') How to aggregate the nuisance value for each sample across the `mc_iters` monte carlo iterations of cross-fitting. n_estimators : integer, optional (default=100) The total number of trees in the forest. The forest consists of a forest of sqrt(n_estimators) sub-forests, where each sub-forest contains sqrt(n_estimators) trees. max_depth : integer or None, optional (default=None) The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples. min_samples_split : int, float, optional (default=10) The minimum number of splitting samples required to split an internal node. - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. min_samples_leaf : int, float, optional (default=5) The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` splitting samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. After construction the tree is also pruned so that there are at least min_samples_leaf estimation samples on each leaf. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. min_weight_fraction_leaf : float, optional (default=0.) The minimum weighted fraction of the sum total of weights (of all splitting samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided. After construction the tree is pruned so that the fraction of the sum total weight of the estimation samples contained in each leaf node is at least min_weight_fraction_leaf max_features : int, float, string or None, optional (default="auto") The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features. min_impurity_decrease : float, optional (default=0.) A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of split samples, ``N_t`` is the number of split samples at the current node, ``N_t_L`` is the number of split samples in the left child, and ``N_t_R`` is the number of split samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. max_samples : int or float in (0, 1], default=.5, The number of samples to use for each subsample that is used to train each tree: - If int, then train each tree on `max_samples` samples, sampled without replacement from all the samples - If float, then train each tree on ceil(`max_samples` * `n_samples`), sampled without replacement from all the samples. min_balancedness_tol: float in [0, .5], default=.45 How imbalanced a split we can tolerate. This enforces that each split leaves at least (.5 - min_balancedness_tol) fraction of samples on each side of the split; or fraction of the total weight of samples, when sample_weight is not None. Default value, ensures that at least 5% of the parent node weight falls in each side of the split. Set it to 0.0 for no balancedness and to .5 for perfectly balanced splits. For the formal inference theory to be valid, this has to be any positive constant bounded away from zero. honest : boolean, optional (default=True) Whether to use honest trees, i.e. half of the samples are used for creating the tree structure and the other half for the estimation at the leafs. If False, then all samples are used for both parts. n_jobs : int or None, optional (default=-1) The number of jobs to run in parallel for both `fit` and `predict`. ``None`` means 1 unless in a :func:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary <n_jobs>` for more details. verbose : int, optional (default=0) Controls the verbosity when fitting and predicting. random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used by :mod:`np.random<numpy.random>`. """
[docs] def __init__(self, *, model_regression="auto", model_propensity="auto", featurizer=None, min_propensity=1e-6, categories='auto', cv=2, mc_iters=None, mc_agg='mean', n_estimators=100, max_depth=None, min_samples_split=10, min_samples_leaf=5, min_weight_fraction_leaf=0., max_features="auto", min_impurity_decrease=0., max_samples=.5, min_balancedness_tol=.45, honest=True, n_jobs=-1, verbose=0, random_state=None): self.model_regression = clone(model_regression, safe=False) self.model_propensity = clone(model_propensity, safe=False) self.featurizer = clone(featurizer, safe=False) self.min_propensity = min_propensity self.categories = categories self.cv = cv self.mc_iters = mc_iters self.mc_agg = mc_agg self.n_estimators = n_estimators self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.min_weight_fraction_leaf = min_weight_fraction_leaf self.max_features = max_features self.min_impurity_decrease = min_impurity_decrease self.max_samples = max_samples self.min_balancedness_tol = min_balancedness_tol self.honest = honest self.n_jobs = n_jobs self.verbose = verbose self.random_state = random_state
def _gen_drpolicy_learner(self): return _DRLearnerWrapper(model_regression=self.model_regression, model_propensity=self.model_propensity, featurizer=self.featurizer, min_propensity=self.min_propensity, categories=self.categories, cv=self.cv, mc_iters=self.mc_iters, mc_agg=self.mc_agg, model_final=PolicyForest(max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_features=self.max_features, min_impurity_decrease=self.min_impurity_decrease, max_samples=self.max_samples, min_balancedness_tol=self.min_balancedness_tol, honest=self.honest, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state), multitask_model_final=True, random_state=self.random_state)
[docs] def plot(self, tree_id, *, feature_names=None, treatment_names=None, ax=None, title=None, max_depth=None, filled=True, rounded=True, precision=3, fontsize=None): """ Exports policy trees to matplotlib Parameters ---------- tree_id : int The id of the tree of the forest to plot ax : :class:`matplotlib.axes.Axes`, optional, default None The axes on which to plot title : string, optional, default None A title for the final figure to be printed at the top of the page. feature_names : list of strings, optional, default None Names of each of the features. treatment_names : list of strings, optional, default None Names of each of the treatments, starting with a name for the baseline/control treatment (alphanumerically smallest) max_depth: int or None, optional, default None The maximum tree depth to plot filled : bool, optional, default False When set to ``True``, paint nodes to indicate majority class for classification, extremity of values for regression, or purity of node for multi-output. rounded : bool, optional, default True When set to ``True``, draw node boxes with rounded corners and use Helvetica fonts instead of Times-Roman. precision : int, optional, default 3 Number of digits of precision for floating point in the values of impurity, threshold and value attributes of each node. fontsize : int, optional, default None Font size for text """ return self.policy_model_[tree_id].plot(feature_names=self.policy_feature_names(feature_names=feature_names), treatment_names=self.policy_treatment_names( treatment_names=treatment_names), ax=ax, title=title, max_depth=max_depth, filled=filled, rounded=rounded, precision=precision, fontsize=fontsize)
[docs] def export_graphviz(self, tree_id, *, out_file=None, feature_names=None, treatment_names=None, max_depth=None, filled=True, leaves_parallel=True, rotate=False, rounded=True, special_characters=False, precision=3): """ Export a graphviz dot file representing the learned tree model Parameters ---------- tree_id : int The id of the tree of the forest to plot out_file : file object or string, optional, default None Handle or name of the output file. If ``None``, the result is returned as a string. feature_names : list of strings, optional, default None Names of each of the features. treatment_names : list of strings, optional, default None Names of each of the treatments, starting with a name for the baseline/control/None treatment (alphanumerically smallest in case of discrete treatment) max_depth: int or None, optional, default None The maximum tree depth to plot filled : bool, optional, default False When set to ``True``, paint nodes to indicate majority class for classification, extremity of values for regression, or purity of node for multi-output. leaves_parallel : bool, optional, default True When set to ``True``, draw all leaf nodes at the bottom of the tree. rotate : bool, optional, default False When set to ``True``, orient tree left to right rather than top-down. rounded : bool, optional, default True When set to ``True``, draw node boxes with rounded corners and use Helvetica fonts instead of Times-Roman. special_characters : bool, optional, default False When set to ``False``, ignore special characters for PostScript compatibility. precision : int, optional, default 3 Number of digits of precision for floating point in the values of impurity, threshold and value attributes of each node. """ feature_names = self.policy_feature_names(feature_names=feature_names) return self.policy_model_[tree_id].export_graphviz(out_file=out_file, feature_names=feature_names, treatment_names=self.policy_treatment_names( treatment_names=treatment_names), max_depth=max_depth, filled=filled, leaves_parallel=leaves_parallel, rotate=rotate, rounded=rounded, special_characters=special_characters, precision=precision)
[docs] def render(self, tree_id, out_file, *, format='pdf', view=True, feature_names=None, treatment_names=None, max_depth=None, filled=True, leaves_parallel=True, rotate=False, rounded=True, special_characters=False, precision=3): """ Render the tree to a flie Parameters ---------- tree_id : int The id of the tree of the forest to plot out_file : file name to save to format : string, optional, default 'pdf' The file format to render to; must be supported by graphviz view : bool, optional, default True Whether to open the rendered result with the default application. feature_names : list of strings, optional, default None Names of each of the features. treatment_names : list of strings, optional, default None Names of each of the treatments, starting with a name for the baseline/control treatment (alphanumerically smallest in case of discrete treatment) max_depth: int or None, optional, default None The maximum tree depth to plot filled : bool, optional, default False When set to ``True``, paint nodes to indicate majority class for classification, extremity of values for regression, or purity of node for multi-output. leaves_parallel : bool, optional, default True When set to ``True``, draw all leaf nodes at the bottom of the tree. rotate : bool, optional, default False When set to ``True``, orient tree left to right rather than top-down. rounded : bool, optional, default True When set to ``True``, draw node boxes with rounded corners and use Helvetica fonts instead of Times-Roman. special_characters : bool, optional, default False When set to ``False``, ignore special characters for PostScript compatibility. precision : int, optional, default 3 Number of digits of precision for floating point in the values of impurity, threshold and value attributes of each node. """ feature_names = self.policy_feature_names(feature_names=feature_names) return self.policy_model_[tree_id].render(out_file, feature_names=feature_names, treatment_names=self.policy_treatment_names( treatment_names=treatment_names), format=format, view=view, max_depth=max_depth, filled=filled, leaves_parallel=leaves_parallel, rotate=rotate, rounded=rounded, special_characters=special_characters, precision=precision)