In [1]:
from sklearn.datasets import load_boston
import sklearn.ensemble
import numpy as np
In [2]:
boston = load_boston()
In [3]:
rf = sklearn.ensemble.RandomForestRegressor(n_estimators=1000)
In [4]:
train, test, labels_train, labels_test = sklearn.model_selection.train_test_split(boston.data, boston.target, train_size=0.80)
In [5]:
rf.fit(train, labels_train)
Out[5]:
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=1000, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)
In [6]:
print('Random Forest MSError', np.mean((rf.predict(test) - labels_test) ** 2))
('Random Forest MSError', 17.349331324117653)
In [7]:
print('MSError when predicting the mean', np.mean((labels_train.mean() - labels_test) ** 2))
('MSError when predicting the mean', 79.186326166360075)
In [8]:
categorical_features = np.argwhere(np.array([len(set(boston.data[:,x])) for x in range(boston.data.shape[1])]) <= 10).flatten()
In [9]:
import lime
import lime.lime_tabular
In [10]:
explainer = lime.lime_tabular.LimeTabularExplainer(train, feature_names=boston.feature_names, class_names=['price'], categorical_features=categorical_features, verbose=True, mode='regression')
In [11]:
i = 25
exp = explainer.explain_instance(test[i], rf.predict, num_features=5)
Intercept 23.9047475063
Prediction_local [ 22.32579479]
Right: 23.1073
In [12]:
exp.show_in_notebook(show_table=True)
In [13]:
exp.as_list()
Out[13]:
[('6.99 < LSTAT <= 11.43', 1.7571320048618118),
 ('6.21 < RM <= 6.62', -1.5638211582388033),
 ('NOX > 0.62', -0.77384372989110417),
 ('19.10 < PTRATIO <= 20.20', -0.60756112694664299),
 ('2.08 < DIS <= 3.17', -0.39085870918058263)]