12. Imputation

12.1. Simple imputer

[1]:
import numpy as np
from sklearn.impute import SimpleImputer

X = [
    [7, 2, 3],
    [4, np.nan, 6],
    [10, 5, 9]
]

imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(X)
[1]:
SimpleImputer(add_indicator=False, copy=True, fill_value=None,
              missing_values=nan, strategy='mean', verbose=0)
[2]:
imp.transform(X)
[2]:
array([[ 7. ,  2. ,  3. ],
       [ 4. ,  3.5,  6. ],
       [10. ,  5. ,  9. ]])

12.2. Iterative imputer

[3]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

X = [
    [7, 2, 3],
    [4, np.nan, 6],
    [10, 5, 9]
]

imp = IterativeImputer(missing_values=np.nan, random_state=37)
imp.fit(X)
[3]:
IterativeImputer(add_indicator=False, estimator=None,
                 imputation_order='ascending', initial_strategy='mean',
                 max_iter=10, max_value=None, min_value=None,
                 missing_values=nan, n_nearest_features=None, random_state=37,
                 sample_posterior=False, tol=0.001, verbose=0)
[4]:
imp.transform(X)
[4]:
array([[ 7.       ,  2.       ,  3.       ],
       [ 4.       ,  2.6000004,  6.       ],
       [10.       ,  5.       ,  9.       ]])

12.3. Missing indicator

[5]:
from sklearn.impute import MissingIndicator

X = [
    [7, 2, 3],
    [4, np.nan, 6],
    [10, 5, 9]
]

ind = MissingIndicator()
ind.fit(X)
[5]:
MissingIndicator(error_on_new=True, features='missing-only', missing_values=nan,
                 sparse='auto')
[6]:
ind.transform(X)
[6]:
array([[False],
       [ True],
       [False]])