12. Imputation
12.1. Simple imputer
[1]:
import numpy as np
from sklearn.impute import SimpleImputer
X = [
[7, 2, 3],
[4, np.nan, 6],
[10, 5, 9]
]
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(X)
[1]:
SimpleImputer(add_indicator=False, copy=True, fill_value=None,
missing_values=nan, strategy='mean', verbose=0)
[2]:
imp.transform(X)
[2]:
array([[ 7. , 2. , 3. ],
[ 4. , 3.5, 6. ],
[10. , 5. , 9. ]])
12.2. Iterative imputer
[3]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
X = [
[7, 2, 3],
[4, np.nan, 6],
[10, 5, 9]
]
imp = IterativeImputer(missing_values=np.nan, random_state=37)
imp.fit(X)
[3]:
IterativeImputer(add_indicator=False, estimator=None,
imputation_order='ascending', initial_strategy='mean',
max_iter=10, max_value=None, min_value=None,
missing_values=nan, n_nearest_features=None, random_state=37,
sample_posterior=False, tol=0.001, verbose=0)
[4]:
imp.transform(X)
[4]:
array([[ 7. , 2. , 3. ],
[ 4. , 2.6000004, 6. ],
[10. , 5. , 9. ]])
12.3. Missing indicator
[5]:
from sklearn.impute import MissingIndicator
X = [
[7, 2, 3],
[4, np.nan, 6],
[10, 5, 9]
]
ind = MissingIndicator()
ind.fit(X)
[5]:
MissingIndicator(error_on_new=True, features='missing-only', missing_values=nan,
sparse='auto')
[6]:
ind.transform(X)
[6]:
array([[False],
[ True],
[False]])