12. Imputation

12.1. Simple imputer

[1]:

import numpy as np
from sklearn.impute import SimpleImputer

X = [
    [7, 2, 3],
    [4, np.nan, 6],
    [10, 5, 9]
]

imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(X)

[1]:

SimpleImputer(add_indicator=False, copy=True, fill_value=None,
              missing_values=nan, strategy='mean', verbose=0)

[2]:

imp.transform(X)

[2]:

array([[ 7. ,  2. ,  3. ],
       [ 4. ,  3.5,  6. ],
       [10. ,  5. ,  9. ]])

12.2. Iterative imputer

[3]:

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

X = [
    [7, 2, 3],
    [4, np.nan, 6],
    [10, 5, 9]
]

imp = IterativeImputer(missing_values=np.nan, random_state=37)
imp.fit(X)

[3]:

IterativeImputer(add_indicator=False, estimator=None,
                 imputation_order='ascending', initial_strategy='mean',
                 max_iter=10, max_value=None, min_value=None,
                 missing_values=nan, n_nearest_features=None, random_state=37,
                 sample_posterior=False, tol=0.001, verbose=0)

[4]:

imp.transform(X)

[4]:

array([[ 7.       ,  2.       ,  3.       ],
       [ 4.       ,  2.6000004,  6.       ],
       [10.       ,  5.       ,  9.       ]])

12.3. Missing indicator

[5]:

from sklearn.impute import MissingIndicator

X = [
    [7, 2, 3],
    [4, np.nan, 6],
    [10, 5, 9]
]

ind = MissingIndicator()
ind.fit(X)

[5]:

MissingIndicator(error_on_new=True, features='missing-only', missing_values=nan,
                 sparse='auto')

[6]:

ind.transform(X)

[6]:

array([[False],
       [ True],
       [False]])