欠損
data.isnull().sum()
カラムの型
data.dtypes.value_counts()
カテゴリカル変数の種類
data.select_dtypes('object').apply(pd.Series.nunique, axis = 0)
前処理
Label Encoding
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le_count = 0
for col in app_train:
if app_train[col].dtype == 'object':
if len(list(app_train[col].unique())) <= 2:
le.fit(app_train[col])
app_train[col] = le.transform(app_train[col])
app_test[col] = le.transform(app_test[col])
le_count += 1
print('%d columns were label encoded.' % le_count)
One Hot Encoding
app_train = pd.get_dummies(app_train)
app_test = pd.get_dummies(app_test)