# Source code for econml.policy._drlearner

```
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
from warnings import warn
import numpy as np
from sklearn.base import clone
from ..utilities import check_inputs, filter_none_kwargs, check_input_arrays
from ..dr import DRLearner
from ..dr._drlearner import _ModelFinal
from .._tree_exporter import _SingleTreeExporterMixin
from ._base import PolicyLearner
from . import PolicyTree, PolicyForest
class _PolicyModelFinal(_ModelFinal):
def fit(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, freq_weight=None, sample_var=None):
if sample_var is not None:
warn('Parameter `sample_var` is ignored by the final estimator')
sample_var = None
Y_pred, _ = nuisances
self.d_y = Y_pred.shape[1:-1] # track whether there's a Y dimension (must be a singleton)
if (X is not None) and (self._featurizer is not None):
X = self._featurizer.fit_transform(X)
filtered_kwargs = filter_none_kwargs(sample_weight=sample_weight, sample_var=sample_var)
ys = Y_pred[..., 1:] - Y_pred[..., [0]] # subtract control results from each other arm
if self.d_y: # need to squeeze out singleton so that we fit on 2D array
ys = ys.squeeze(1)
ys = np.hstack([np.zeros((ys.shape[0], 1)), ys])
self.model_cate = self._model_final.fit(X, ys, **filtered_kwargs)
return self
def predict(self, X=None):
if (X is not None) and (self._featurizer is not None):
X = self._featurizer.transform(X)
pred = self.model_cate.predict_value(X)[:, 1:]
if self.d_y: # need to reintroduce singleton Y dimension
return pred[:, np.newaxis, :]
return pred
def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None):
return 0
class _DRLearnerWrapper(DRLearner):
def _gen_ortho_learner_model_final(self):
return _PolicyModelFinal(self._gen_model_final(), self._gen_featurizer(), self.multitask_model_final)
class _BaseDRPolicyLearner(PolicyLearner):
def _gen_drpolicy_learner(self):
pass
def fit(self, Y, T, *, X=None, W=None, sample_weight=None, groups=None):
"""
Estimate a policy model from data.
Parameters
----------
Y: (n,) vector of length n
Outcomes for each sample
T: (n,) vector of length n
Treatments for each sample
X: optional(n, d_x) matrix or None (Default=None)
Features for each sample
W: optional(n, d_w) matrix or None (Default=None)
Controls for each sample
sample_weight: optional(n,) vector or None (Default=None)
Weights for each samples
groups: (n,) vector, optional
All rows corresponding to the same group will be kept together during splitting.
If groups is not None, the `cv` argument passed to this class's initializer
must support a 'groups' argument to its split method.
Returns
-------
self: object instance
"""
self.drlearner_ = self._gen_drpolicy_learner()
self.drlearner_.fit(Y, T, X=X, W=W, sample_weight=sample_weight, groups=groups)
return self
def predict_value(self, X):
""" Get effect values for each non-baseline treatment and for each sample.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The training input samples.
Returns
-------
values : array-like of shape (n_samples, n_treatments - 1)
The predicted average value for each sample and for each non-baseline treatment, as compared
to the baseline treatment value and based on the feature neighborhoods defined by the trees.
"""
return self.drlearner_.const_marginal_effect(X)
def predict_proba(self, X):
""" Predict the probability of recommending each treatment
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input samples.
Returns
-------
treatment_proba : array-like of shape (n_samples, n_treatments)
The probability of each treatment recommendation
"""
X, = check_input_arrays(X)
if self.drlearner_.featurizer_ is not None:
X = self.drlearner_.featurizer_.fit_transform(X)
return self.policy_model_.predict_proba(X)
def predict(self, X):
""" Get recommended treatment for each sample.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The training input samples.
Returns
-------
treatment : array-like of shape (n_samples,)
The index of the recommended treatment in the same order as in categories, or in
lexicographic order if `categories='auto'`. 0 corresponds to the baseline/control treatment.
For ensemble policy models, recommended treatments are aggregated from each model in the ensemble
and the treatment that receives the most votes is returned. Use `predict_proba` to get the fraction
of models in the ensemble that recommend each treatment for each sample.
"""
return np.argmax(self.predict_proba(X), axis=1)
def policy_feature_names(self, *, feature_names=None):
"""
Get the output feature names.
Parameters
----------
feature_names: list of strings of length X.shape[1] or None
The names of the input features. If None and X is a dataframe, it defaults to the column names
from the dataframe.
Returns
-------
out_feature_names: list of strings or None
The names of the output features on which the policy model is fitted.
"""
return self.drlearner_.cate_feature_names(feature_names=feature_names)
def policy_treatment_names(self, *, treatment_names=None):
"""
Get the names of the treatments.
Parameters
----------
treatment_names: list of strings of length n_categories
The names of the treatments (including the baseling). If None then values are auto-generated
based on input metadata.
Returns
-------
out_treatment_names: list of strings
The names of the treatments including the baseline/control treatment.
"""
if treatment_names is not None:
if len(treatment_names) != len(self.drlearner_.cate_treatment_names()) + 1:
raise ValueError('The variable `treatment_names` should have length equal to '
'n_treatments + 1, containing the value of the control/none/baseline treatment as '
'the first element and the names of all the treatments as subsequent elements.')
return treatment_names
return ['None'] + self.drlearner_.cate_treatment_names()
def feature_importances(self, max_depth=4, depth_decay_exponent=2.0):
"""
Parameters
----------
max_depth : int, default=4
Splits of depth larger than `max_depth` are not used in this calculation
depth_decay_exponent: double, default=2.0
The contribution of each split to the total score is re-weighted by ``1 / (1 + `depth`)**2.0``.
Returns
-------
feature_importances_ : ndarray of shape (n_features,)
Normalized total parameter heterogeneity inducing importance of each feature
"""
return self.policy_model_.feature_importances(max_depth=max_depth,
depth_decay_exponent=depth_decay_exponent)
@property
def feature_importances_(self):
return self.feature_importances()
@property
def policy_model_(self):
""" The trained final stage policy model
"""
return self.drlearner_.multitask_model_cate
[docs]class DRPolicyTree(_BaseDRPolicyLearner):
"""
Policy learner that uses doubly-robust correction techniques to account for
covariate shift (selection bias) between the treatment arms.
In this estimator, the policy is estimated by first constructing doubly robust estimates of the counterfactual
outcomes
.. math ::
Y_{i, t}^{DR} = E[Y | X_i, W_i, T_i=t]\
+ \\frac{Y_i - E[Y | X_i, W_i, T_i=t]}{Pr[T_i=t | X_i, W_i]} \\cdot 1\\{T_i=t\\}
Then optimizing the objective
.. math ::
V(\\pi) = \\sum_i \\sum_t \\pi_t(X_i) * (Y_{i, t} - Y_{i, 0})
with the constraint that only one of :math:`\\pi_t(X_i)` is 1 and the rest are 0, for each :math:`X_i`.
Thus if we estimate the nuisance functions :math:`h(X, W, T) = E[Y | X, W, T]` and
:math:`p_t(X, W)=Pr[T=t | X, W]` in the first stage, we can estimate the final stage cate for each
treatment t, by running a constructing a decision tree that maximizes the objective :math:`V(\\pi)`
The problem of estimating the nuisance function :math:`p` is a simple multi-class classification
problem of predicting the label :math:`T` from :math:`X, W`. The :class:`.DRLearner`
class takes as input the parameter ``model_propensity``, which is an arbitrary scikit-learn
classifier, that is internally used to solve this classification problem.
The second nuisance function :math:`h` is a simple regression problem and the :class:`.DRLearner`
class takes as input the parameter ``model_regressor``, which is an arbitrary scikit-learn regressor that
is internally used to solve this regression problem.
Parameters
----------
model_propensity : scikit-learn classifier or 'auto', optional (default='auto')
Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated.
Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T,
where T is a shape (n, ) array.
If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen.
model_regression : scikit-learn regressor or 'auto', optional (default='auto')
Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments)
concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and
`predict` methods. If different models per treatment arm are desired, see the
:class:`.MultiModelWrapper` helper class.
If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
featurizer : :term:`transformer`, optional, default None
Must support fit_transform and transform. Used to create composite features in the final CATE regression.
It is ignored if X is None. The final CATE will be trained on the outcome of featurizer.fit_transform(X).
If featurizer=None, then CATE is trained on X.
min_propensity : float, optional, default ``1e-6``
The minimum propensity at which to clip propensity estimates to avoid dividing by zero.
categories: 'auto' or list, default 'auto'
The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values).
The first category will be treated as the control treatment.
cv: int, cross-validation generator or an iterable, optional (default is 2)
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
- None, to use the default 3-fold cross-validation,
- integer, to specify the number of folds.
- :term:`CV splitter`
- An iterable yielding (train, test) splits as arrays of indices.
For integer/None inputs, if the treatment is discrete
:class:`~sklearn.model_selection.StratifiedKFold` is used, else,
:class:`~sklearn.model_selection.KFold` is used
(with a random shuffle in either case).
Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all
W, X are None, then we call `split(ones((T.shape[0], 1)), T)`.
mc_iters: int, optional (default=None)
The number of times to rerun the first stage models to reduce the variance of the nuisances.
mc_agg: {'mean', 'median'}, optional (default='mean')
How to aggregate the nuisance value for each sample across the `mc_iters` monte carlo iterations of
cross-fitting.
max_depth : integer or None, optional (default=None)
The maximum depth of the tree. If None, then nodes are expanded until
all leaves are pure or until all leaves contain less than
min_samples_split samples.
min_samples_split : int, float, optional (default=10)
The minimum number of splitting samples required to split an internal node.
- If int, then consider `min_samples_split` as the minimum number.
- If float, then `min_samples_split` is a fraction and
`ceil(min_samples_split * n_samples)` are the minimum
number of samples for each split.
min_samples_leaf : int, float, optional (default=5)
The minimum number of samples required to be at a leaf node.
A split point at any depth will only be considered if it leaves at
least ``min_samples_leaf`` splitting samples in each of the left and
right branches. This may have the effect of smoothing the model,
especially in regression. After construction the tree is also pruned
so that there are at least min_samples_leaf estimation samples on
each leaf.
- If int, then consider `min_samples_leaf` as the minimum number.
- If float, then `min_samples_leaf` is a fraction and
`ceil(min_samples_leaf * n_samples)` are the minimum
number of samples for each node.
min_weight_fraction_leaf : float, optional (default=0.)
The minimum weighted fraction of the sum total of weights (of all
splitting samples) required to be at a leaf node. Samples have
equal weight when sample_weight is not provided. After construction
the tree is pruned so that the fraction of the sum total weight
of the estimation samples contained in each leaf node is at
least min_weight_fraction_leaf
max_features : int, float, string or None, optional (default="auto")
The number of features to consider when looking for the best split:
- If int, then consider `max_features` features at each split.
- If float, then `max_features` is a fraction and
`int(max_features * n_features)` features are considered at each
split.
- If "auto", then `max_features=n_features`.
- If "sqrt", then `max_features=sqrt(n_features)`.
- If "log2", then `max_features=log2(n_features)`.
- If None, then `max_features=n_features`.
Note: the search for a split does not stop until at least one
valid partition of the node samples is found, even if it requires to
effectively inspect more than ``max_features`` features.
min_impurity_decrease : float, optional (default=0.)
A node will be split if this split induces a decrease of the impurity
greater than or equal to this value.
The weighted impurity decrease equation is the following::
N_t / N * (impurity - N_t_R / N_t * right_impurity
- N_t_L / N_t * left_impurity)
where ``N`` is the total number of split samples, ``N_t`` is the number of
split samples at the current node, ``N_t_L`` is the number of split samples in the
left child, and ``N_t_R`` is the number of split samples in the right child.
``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
if ``sample_weight`` is passed.
min_balancedness_tol: float in [0, .5], default=.45
How imbalanced a split we can tolerate. This enforces that each split leaves at least
(.5 - min_balancedness_tol) fraction of samples on each side of the split; or fraction
of the total weight of samples, when sample_weight is not None. Default value, ensures
that at least 5% of the parent node weight falls in each side of the split. Set it to 0.0 for no
balancedness and to .5 for perfectly balanced splits. For the formal inference theory
to be valid, this has to be any positive constant bounded away from zero.
honest : boolean, optional (default=True)
Whether to use honest trees, i.e. half of the samples are used for
creating the tree structure and the other half for the estimation at
the leafs. If False, then all samples are used for both parts.
random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator;
If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used
by :mod:`np.random<numpy.random>`.
"""
[docs] def __init__(self, *,
model_regression="auto",
model_propensity="auto",
featurizer=None,
min_propensity=1e-6,
categories='auto',
cv=2,
mc_iters=None,
mc_agg='mean',
max_depth=None,
min_samples_split=10,
min_samples_leaf=5,
min_weight_fraction_leaf=0.,
max_features="auto",
min_impurity_decrease=0.,
min_balancedness_tol=.45,
honest=True,
random_state=None):
self.model_regression = clone(model_regression, safe=False)
self.model_propensity = clone(model_propensity, safe=False)
self.featurizer = clone(featurizer, safe=False)
self.min_propensity = min_propensity
self.categories = categories
self.cv = cv
self.mc_iters = mc_iters
self.mc_agg = mc_agg
self.max_depth = max_depth
self.min_samples_split = min_samples_split
self.min_samples_leaf = min_samples_leaf
self.min_weight_fraction_leaf = min_weight_fraction_leaf
self.max_features = max_features
self.min_impurity_decrease = min_impurity_decrease
self.min_balancedness_tol = min_balancedness_tol
self.honest = honest
self.random_state = random_state
def _gen_drpolicy_learner(self):
return _DRLearnerWrapper(model_regression=self.model_regression,
model_propensity=self.model_propensity,
featurizer=self.featurizer,
min_propensity=self.min_propensity,
categories=self.categories,
cv=self.cv,
mc_iters=self.mc_iters,
mc_agg=self.mc_agg,
model_final=PolicyTree(max_depth=self.max_depth,
min_samples_split=self.min_samples_split,
min_samples_leaf=self.min_samples_leaf,
min_weight_fraction_leaf=self.min_weight_fraction_leaf,
max_features=self.max_features,
min_impurity_decrease=self.min_impurity_decrease,
min_balancedness_tol=self.min_balancedness_tol,
honest=self.honest,
random_state=self.random_state),
multitask_model_final=True,
random_state=self.random_state)
[docs] def plot(self, *, feature_names=None, treatment_names=None, ax=None, title=None,
max_depth=None, filled=True, rounded=True, precision=3, fontsize=None):
"""
Exports policy trees to matplotlib
Parameters
----------
ax : :class:`matplotlib.axes.Axes`, optional, default None
The axes on which to plot
title : string, optional, default None
A title for the final figure to be printed at the top of the page.
feature_names : list of strings, optional, default None
Names of each of the features.
treatment_names : list of strings, optional, default None
Names of each of the treatments including the baseline/control
max_depth: int or None, optional, default None
The maximum tree depth to plot
filled : bool, optional, default False
When set to ``True``, paint nodes to indicate majority class for
classification, extremity of values for regression, or purity of node
for multi-output.
rounded : bool, optional, default True
When set to ``True``, draw node boxes with rounded corners and use
Helvetica fonts instead of Times-Roman.
precision : int, optional, default 3
Number of digits of precision for floating point in the values of
impurity, threshold and value attributes of each node.
fontsize : int, optional, default None
Font size for text
"""
return self.policy_model_.plot(feature_names=self.policy_feature_names(feature_names=feature_names),
treatment_names=self.policy_treatment_names(treatment_names=treatment_names),
ax=ax,
title=title,
max_depth=max_depth,
filled=filled,
rounded=rounded,
precision=precision,
fontsize=fontsize)
[docs] def export_graphviz(self, *, out_file=None,
feature_names=None, treatment_names=None,
max_depth=None, filled=True, leaves_parallel=True,
rotate=False, rounded=True, special_characters=False, precision=3):
"""
Export a graphviz dot file representing the learned tree model
Parameters
----------
out_file : file object or string, optional, default None
Handle or name of the output file. If ``None``, the result is
returned as a string.
feature_names : list of strings, optional, default None
Names of each of the features.
treatment_names : list of strings, optional, default None
Names of each of the treatments, including the baseline treatment
max_depth: int or None, optional, default None
The maximum tree depth to plot
filled : bool, optional, default False
When set to ``True``, paint nodes to indicate majority class for
classification, extremity of values for regression, or purity of node
for multi-output.
leaves_parallel : bool, optional, default True
When set to ``True``, draw all leaf nodes at the bottom of the tree.
rotate : bool, optional, default False
When set to ``True``, orient tree left to right rather than top-down.
rounded : bool, optional, default True
When set to ``True``, draw node boxes with rounded corners and use
Helvetica fonts instead of Times-Roman.
special_characters : bool, optional, default False
When set to ``False``, ignore special characters for PostScript
compatibility.
precision : int, optional, default 3
Number of digits of precision for floating point in the values of
impurity, threshold and value attributes of each node.
"""
return self.policy_model_.export_graphviz(out_file=out_file,
feature_names=self.policy_feature_names(feature_names=feature_names),
treatment_names=self.policy_treatment_names(
treatment_names=treatment_names),
max_depth=max_depth,
filled=filled,
leaves_parallel=leaves_parallel,
rotate=rotate,
rounded=rounded,
special_characters=special_characters,
precision=precision)
[docs] def render(self, out_file, *, format='pdf', view=True, feature_names=None,
treatment_names=None, max_depth=None,
filled=True, leaves_parallel=True, rotate=False, rounded=True,
special_characters=False, precision=3):
"""
Render the tree to a flie
Parameters
----------
out_file : file name to save to
format : string, optional, default 'pdf'
The file format to render to; must be supported by graphviz
view : bool, optional, default True
Whether to open the rendered result with the default application.
feature_names : list of strings, optional, default None
Names of each of the features.
treatment_names : list of strings, optional, default None
Names of each of the treatments, including the baseline/control
max_depth: int or None, optional, default None
The maximum tree depth to plot
filled : bool, optional, default False
When set to ``True``, paint nodes to indicate majority class for
classification, extremity of values for regression, or purity of node
for multi-output.
leaves_parallel : bool, optional, default True
When set to ``True``, draw all leaf nodes at the bottom of the tree.
rotate : bool, optional, default False
When set to ``True``, orient tree left to right rather than top-down.
rounded : bool, optional, default True
When set to ``True``, draw node boxes with rounded corners and use
Helvetica fonts instead of Times-Roman.
special_characters : bool, optional, default False
When set to ``False``, ignore special characters for PostScript
compatibility.
precision : int, optional, default 3
Number of digits of precision for floating point in the values of
impurity, threshold and value attributes of each node.
"""
return self.policy_model_.render(out_file,
format=format,
view=view,
feature_names=self.policy_feature_names(feature_names=feature_names),
treatment_names=self.policy_treatment_names(treatment_names=treatment_names),
max_depth=max_depth,
filled=filled,
leaves_parallel=leaves_parallel,
rotate=rotate,
rounded=rounded,
special_characters=special_characters,
precision=precision)
[docs]class DRPolicyForest(_BaseDRPolicyLearner):
"""
Policy learner that uses doubly-robust correction techniques to account for
covariate shift (selection bias) between the treatment arms.
In this estimator, the policy is estimated by first constructing doubly robust estimates of the counterfactual
outcomes
.. math ::
Y_{i, t}^{DR} = E[Y | X_i, W_i, T_i=t]\
+ \\frac{Y_i - E[Y | X_i, W_i, T_i=t]}{Pr[T_i=t | X_i, W_i]} \\cdot 1\\{T_i=t\\}
Then optimizing the objective
.. math ::
V(\\pi) = \\sum_i \\sum_t \\pi_t(X_i) * (Y_{i, t} - Y_{i, 0})
with the constraint that only one of :math:`\\pi_t(X_i)` is 1 and the rest are 0, for each :math:`X_i`.
Thus if we estimate the nuisance functions :math:`h(X, W, T) = E[Y | X, W, T]` and
:math:`p_t(X, W)=Pr[T=t | X, W]` in the first stage, we can estimate the final stage cate for each
treatment t, by running a constructing a decision tree that maximizes the objective :math:`V(\\pi)`
The problem of estimating the nuisance function :math:`p` is a simple multi-class classification
problem of predicting the label :math:`T` from :math:`X, W`. The :class:`.DRLearner`
class takes as input the parameter ``model_propensity``, which is an arbitrary scikit-learn
classifier, that is internally used to solve this classification problem.
The second nuisance function :math:`h` is a simple regression problem and the :class:`.DRLearner`
class takes as input the parameter ``model_regressor``, which is an arbitrary scikit-learn regressor that
is internally used to solve this regression problem.
Parameters
----------
model_propensity : scikit-learn classifier or 'auto', optional (default='auto')
Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated.
Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T,
where T is a shape (n, ) array.
If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen.
model_regression : scikit-learn regressor or 'auto', optional (default='auto')
Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments)
concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and
`predict` methods. If different models per treatment arm are desired, see the
:class:`.MultiModelWrapper` helper class.
If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
featurizer : :term:`transformer`, optional, default None
Must support fit_transform and transform. Used to create composite features in the final CATE regression.
It is ignored if X is None. The final CATE will be trained on the outcome of featurizer.fit_transform(X).
If featurizer=None, then CATE is trained on X.
min_propensity : float, optional, default ``1e-6``
The minimum propensity at which to clip propensity estimates to avoid dividing by zero.
categories: 'auto' or list, default 'auto'
The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values).
The first category will be treated as the control treatment.
cv: int, cross-validation generator or an iterable, optional (default is 2)
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
- None, to use the default 3-fold cross-validation,
- integer, to specify the number of folds.
- :term:`CV splitter`
- An iterable yielding (train, test) splits as arrays of indices.
For integer/None inputs, if the treatment is discrete
:class:`~sklearn.model_selection.StratifiedKFold` is used, else,
:class:`~sklearn.model_selection.KFold` is used
(with a random shuffle in either case).
Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all
W, X are None, then we call `split(ones((T.shape[0], 1)), T)`.
mc_iters: int, optional (default=None)
The number of times to rerun the first stage models to reduce the variance of the nuisances.
mc_agg: {'mean', 'median'}, optional (default='mean')
How to aggregate the nuisance value for each sample across the `mc_iters` monte carlo iterations of
cross-fitting.
n_estimators : integer, optional (default=100)
The total number of trees in the forest. The forest consists of a
forest of sqrt(n_estimators) sub-forests, where each sub-forest
contains sqrt(n_estimators) trees.
max_depth : integer or None, optional (default=None)
The maximum depth of the tree. If None, then nodes are expanded until
all leaves are pure or until all leaves contain less than
min_samples_split samples.
min_samples_split : int, float, optional (default=10)
The minimum number of splitting samples required to split an internal node.
- If int, then consider `min_samples_split` as the minimum number.
- If float, then `min_samples_split` is a fraction and
`ceil(min_samples_split * n_samples)` are the minimum
number of samples for each split.
min_samples_leaf : int, float, optional (default=5)
The minimum number of samples required to be at a leaf node.
A split point at any depth will only be considered if it leaves at
least ``min_samples_leaf`` splitting samples in each of the left and
right branches. This may have the effect of smoothing the model,
especially in regression. After construction the tree is also pruned
so that there are at least min_samples_leaf estimation samples on
each leaf.
- If int, then consider `min_samples_leaf` as the minimum number.
- If float, then `min_samples_leaf` is a fraction and
`ceil(min_samples_leaf * n_samples)` are the minimum
number of samples for each node.
min_weight_fraction_leaf : float, optional (default=0.)
The minimum weighted fraction of the sum total of weights (of all
splitting samples) required to be at a leaf node. Samples have
equal weight when sample_weight is not provided. After construction
the tree is pruned so that the fraction of the sum total weight
of the estimation samples contained in each leaf node is at
least min_weight_fraction_leaf
max_features : int, float, string or None, optional (default="auto")
The number of features to consider when looking for the best split:
- If int, then consider `max_features` features at each split.
- If float, then `max_features` is a fraction and
`int(max_features * n_features)` features are considered at each
split.
- If "auto", then `max_features=n_features`.
- If "sqrt", then `max_features=sqrt(n_features)`.
- If "log2", then `max_features=log2(n_features)`.
- If None, then `max_features=n_features`.
Note: the search for a split does not stop until at least one
valid partition of the node samples is found, even if it requires to
effectively inspect more than ``max_features`` features.
min_impurity_decrease : float, optional (default=0.)
A node will be split if this split induces a decrease of the impurity
greater than or equal to this value.
The weighted impurity decrease equation is the following::
N_t / N * (impurity - N_t_R / N_t * right_impurity
- N_t_L / N_t * left_impurity)
where ``N`` is the total number of split samples, ``N_t`` is the number of
split samples at the current node, ``N_t_L`` is the number of split samples in the
left child, and ``N_t_R`` is the number of split samples in the right child.
``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
if ``sample_weight`` is passed.
max_samples : int or float in (0, 1], default=.5,
The number of samples to use for each subsample that is used to train each tree:
- If int, then train each tree on `max_samples` samples, sampled without replacement from all the samples
- If float, then train each tree on ceil(`max_samples` * `n_samples`), sampled without replacement
from all the samples.
min_balancedness_tol: float in [0, .5], default=.45
How imbalanced a split we can tolerate. This enforces that each split leaves at least
(.5 - min_balancedness_tol) fraction of samples on each side of the split; or fraction
of the total weight of samples, when sample_weight is not None. Default value, ensures
that at least 5% of the parent node weight falls in each side of the split. Set it to 0.0 for no
balancedness and to .5 for perfectly balanced splits. For the formal inference theory
to be valid, this has to be any positive constant bounded away from zero.
honest : boolean, optional (default=True)
Whether to use honest trees, i.e. half of the samples are used for
creating the tree structure and the other half for the estimation at
the leafs. If False, then all samples are used for both parts.
n_jobs : int or None, optional (default=-1)
The number of jobs to run in parallel for both `fit` and `predict`.
``None`` means 1 unless in a :func:`joblib.parallel_backend` context.
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
for more details.
verbose : int, optional (default=0)
Controls the verbosity when fitting and predicting.
random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator;
If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used
by :mod:`np.random<numpy.random>`.
"""
[docs] def __init__(self, *,
model_regression="auto",
model_propensity="auto",
featurizer=None,
min_propensity=1e-6,
categories='auto',
cv=2,
mc_iters=None,
mc_agg='mean',
n_estimators=100,
max_depth=None,
min_samples_split=10,
min_samples_leaf=5,
min_weight_fraction_leaf=0.,
max_features="auto",
min_impurity_decrease=0.,
max_samples=.5,
min_balancedness_tol=.45,
honest=True,
n_jobs=-1,
verbose=0,
random_state=None):
self.model_regression = clone(model_regression, safe=False)
self.model_propensity = clone(model_propensity, safe=False)
self.featurizer = clone(featurizer, safe=False)
self.min_propensity = min_propensity
self.categories = categories
self.cv = cv
self.mc_iters = mc_iters
self.mc_agg = mc_agg
self.n_estimators = n_estimators
self.max_depth = max_depth
self.min_samples_split = min_samples_split
self.min_samples_leaf = min_samples_leaf
self.min_weight_fraction_leaf = min_weight_fraction_leaf
self.max_features = max_features
self.min_impurity_decrease = min_impurity_decrease
self.max_samples = max_samples
self.min_balancedness_tol = min_balancedness_tol
self.honest = honest
self.n_jobs = n_jobs
self.verbose = verbose
self.random_state = random_state
def _gen_drpolicy_learner(self):
return _DRLearnerWrapper(model_regression=self.model_regression,
model_propensity=self.model_propensity,
featurizer=self.featurizer,
min_propensity=self.min_propensity,
categories=self.categories,
cv=self.cv,
mc_iters=self.mc_iters,
mc_agg=self.mc_agg,
model_final=PolicyForest(max_depth=self.max_depth,
min_samples_split=self.min_samples_split,
min_samples_leaf=self.min_samples_leaf,
min_weight_fraction_leaf=self.min_weight_fraction_leaf,
max_features=self.max_features,
min_impurity_decrease=self.min_impurity_decrease,
max_samples=self.max_samples,
min_balancedness_tol=self.min_balancedness_tol,
honest=self.honest,
n_jobs=self.n_jobs,
verbose=self.verbose,
random_state=self.random_state),
multitask_model_final=True,
random_state=self.random_state)
[docs] def plot(self, tree_id, *, feature_names=None, treatment_names=None,
ax=None, title=None,
max_depth=None, filled=True, rounded=True, precision=3, fontsize=None):
"""
Exports policy trees to matplotlib
Parameters
----------
tree_id : int
The id of the tree of the forest to plot
ax : :class:`matplotlib.axes.Axes`, optional, default None
The axes on which to plot
title : string, optional, default None
A title for the final figure to be printed at the top of the page.
feature_names : list of strings, optional, default None
Names of each of the features.
treatment_names : list of strings, optional, default None
Names of each of the treatments, starting with a name for the baseline/control treatment
(alphanumerically smallest)
max_depth: int or None, optional, default None
The maximum tree depth to plot
filled : bool, optional, default False
When set to ``True``, paint nodes to indicate majority class for
classification, extremity of values for regression, or purity of node
for multi-output.
rounded : bool, optional, default True
When set to ``True``, draw node boxes with rounded corners and use
Helvetica fonts instead of Times-Roman.
precision : int, optional, default 3
Number of digits of precision for floating point in the values of
impurity, threshold and value attributes of each node.
fontsize : int, optional, default None
Font size for text
"""
return self.policy_model_[tree_id].plot(feature_names=self.policy_feature_names(feature_names=feature_names),
treatment_names=self.policy_treatment_names(
treatment_names=treatment_names),
ax=ax,
title=title,
max_depth=max_depth,
filled=filled,
rounded=rounded,
precision=precision,
fontsize=fontsize)
[docs] def export_graphviz(self, tree_id, *, out_file=None, feature_names=None, treatment_names=None,
max_depth=None,
filled=True, leaves_parallel=True,
rotate=False, rounded=True, special_characters=False, precision=3):
"""
Export a graphviz dot file representing the learned tree model
Parameters
----------
tree_id : int
The id of the tree of the forest to plot
out_file : file object or string, optional, default None
Handle or name of the output file. If ``None``, the result is
returned as a string.
feature_names : list of strings, optional, default None
Names of each of the features.
treatment_names : list of strings, optional, default None
Names of each of the treatments, starting with a name for the baseline/control/None treatment
(alphanumerically smallest in case of discrete treatment)
max_depth: int or None, optional, default None
The maximum tree depth to plot
filled : bool, optional, default False
When set to ``True``, paint nodes to indicate majority class for
classification, extremity of values for regression, or purity of node
for multi-output.
leaves_parallel : bool, optional, default True
When set to ``True``, draw all leaf nodes at the bottom of the tree.
rotate : bool, optional, default False
When set to ``True``, orient tree left to right rather than top-down.
rounded : bool, optional, default True
When set to ``True``, draw node boxes with rounded corners and use
Helvetica fonts instead of Times-Roman.
special_characters : bool, optional, default False
When set to ``False``, ignore special characters for PostScript
compatibility.
precision : int, optional, default 3
Number of digits of precision for floating point in the values of
impurity, threshold and value attributes of each node.
"""
feature_names = self.policy_feature_names(feature_names=feature_names)
return self.policy_model_[tree_id].export_graphviz(out_file=out_file,
feature_names=feature_names,
treatment_names=self.policy_treatment_names(
treatment_names=treatment_names),
max_depth=max_depth,
filled=filled,
leaves_parallel=leaves_parallel,
rotate=rotate,
rounded=rounded,
special_characters=special_characters,
precision=precision)
[docs] def render(self, tree_id, out_file, *, format='pdf', view=True,
feature_names=None,
treatment_names=None,
max_depth=None,
filled=True, leaves_parallel=True, rotate=False, rounded=True,
special_characters=False, precision=3):
"""
Render the tree to a flie
Parameters
----------
tree_id : int
The id of the tree of the forest to plot
out_file : file name to save to
format : string, optional, default 'pdf'
The file format to render to; must be supported by graphviz
view : bool, optional, default True
Whether to open the rendered result with the default application.
feature_names : list of strings, optional, default None
Names of each of the features.
treatment_names : list of strings, optional, default None
Names of each of the treatments, starting with a name for the baseline/control treatment
(alphanumerically smallest in case of discrete treatment)
max_depth: int or None, optional, default None
The maximum tree depth to plot
filled : bool, optional, default False
When set to ``True``, paint nodes to indicate majority class for
classification, extremity of values for regression, or purity of node
for multi-output.
leaves_parallel : bool, optional, default True
When set to ``True``, draw all leaf nodes at the bottom of the tree.
rotate : bool, optional, default False
When set to ``True``, orient tree left to right rather than top-down.
rounded : bool, optional, default True
When set to ``True``, draw node boxes with rounded corners and use
Helvetica fonts instead of Times-Roman.
special_characters : bool, optional, default False
When set to ``False``, ignore special characters for PostScript
compatibility.
precision : int, optional, default 3
Number of digits of precision for floating point in the values of
impurity, threshold and value attributes of each node.
"""
feature_names = self.policy_feature_names(feature_names=feature_names)
return self.policy_model_[tree_id].render(out_file,
feature_names=feature_names,
treatment_names=self.policy_treatment_names(
treatment_names=treatment_names),
format=format,
view=view,
max_depth=max_depth,
filled=filled,
leaves_parallel=leaves_parallel,
rotate=rotate,
rounded=rounded,
special_characters=special_characters,
precision=precision)
```