Titanic example revisited – all together
In this section, we are going to put all the bits and pieces of feature engineering and dimensionality reduction together:
import re import numpy as np import pandas as pd import random as rd from sklearn import preprocessing from sklearn.cluster import KMeans from sklearn.ensemble import RandomForestRegressor from sklearn.decomposition import PCA # Print options np.set_printoptions(precision=4, threshold=10000, linewidth=160, edgeitems=999, suppress=True) pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) pd.set_option('display.width', 160) pd.set_option('expand_frame_repr', False) pd.set_option('precision', 4) # constructing binary features def process_embarked(): global df_titanic_data # replacing the missing values with the most common value in the variable df_titanic_data.Embarked[df.Embarked.isnull()] = df_titanic_data.Embarked.dropna().mode().values # converting the values into numbers ...