# import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
import random
from itertools import permutations
# Dataset: https://archive.ics.uci.edu/ml/datasets/Wine+Quality
# Загружаем два компонента
inputData1 = pd.read_csv('winequality-red.csv', delimiter=';')
# Учитываем признак того, что вино красное
inputData1['red'] = 1.0
inputData2 = pd.read_csv('winequality-white.csv', delimiter=';')
inputData2['red'] = 0.0
# Учитываем признак того, что вино белое (не красное)
# Объединяем в единый фрейм
inputData = pd.concat([inputData1,inputData2])
targetColumn = 'quality'
inputData.head()
fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | quality | red | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11.0 | 34.0 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 | 1.0 |
1 | 7.8 | 0.88 | 0.00 | 2.6 | 0.098 | 25.0 | 67.0 | 0.9968 | 3.20 | 0.68 | 9.8 | 5 | 1.0 |
2 | 7.8 | 0.76 | 0.04 | 2.3 | 0.092 | 15.0 | 54.0 | 0.9970 | 3.26 | 0.65 | 9.8 | 5 | 1.0 |
3 | 11.2 | 0.28 | 0.56 | 1.9 | 0.075 | 17.0 | 60.0 | 0.9980 | 3.16 | 0.58 | 9.8 | 6 | 1.0 |
4 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11.0 | 34.0 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 | 1.0 |
inputData.shape
(6497, 13)
def get_accuracy_rate(targetDF):
targetColumn = 'quality'
FeatureColumns = targetDF.columns.tolist()
FeatureColumns.remove(targetColumn)
# Разделяем на X и y
X = targetDF[FeatureColumns].values
y = targetDF[targetColumn].values
# Random Forests с 100 estimator'ами
rf_clf = RandomForestClassifier(n_estimators=100)
# Задаем кросс-валидацию на 5 фолдов
kf = KFold(n_splits=5, shuffle=True)
# Считаем верность (accuracy) для каждого из фолдов
print("Accuracy: {0}".format(cross_val_score(rf_clf, X, y, scoring='accuracy', cv=kf.split(X,y))))
return np.mean(cross_val_score(rf_clf, X, y, scoring='accuracy', cv=kf.split(X,y)))
get_accuracy_rate(inputData)
Accuracy: [ 0.69076923 0.68923077 0.68745189 0.69515012 0.68899153]
0.68308320009474743
inputData.quality.value_counts()
6 2836 5 2138 7 1079 4 216 8 193 3 30 9 5 Name: quality, dtype: int64
# Вспомогательный фрейм, временно отбрасываем столбцы quality и red
df = inputData.drop(['quality', 'red'], axis=1)
# Половину нормализуем, половину преобразуем в z-scores
columns_list = df.columns.tolist()
half_of_list = int(len(columns_list)/2)
# Нормализация
df_normalized_columns = df[columns_list[:half_of_list]] # Половину столбцов
df_norm = (df_normalized_columns - df_normalized_columns.mean()) / (df_normalized_columns.max() - df_normalized_columns.min())
get_accuracy_rate(df_norm)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-13-6cf556646405> in <module>() ----> 1 get_accuracy_rate(df_norm) <ipython-input-4-a667f2e3c8a0> in get_accuracy_rate(targetDF) 3 4 FeatureColumns = targetDF.columns.tolist() ----> 5 FeatureColumns.remove(targetColumn) 6 7 # Разделяем на X и y ValueError: list.remove(x): x not in list
# Преобразование в z-scores:
df_z_statistics_columns = df[columns_list[half_of_list:]] # Вторую половину
df_z_statistics = (df_z_statistics_columns - df_z_statistics_columns.mean())/df_z_statistics_columns.std()
# Умножим бинарный столбец на случайное число (таким образом, для 1 получится случайное число, а 0 не изменится)
inputData['red'] = inputData['red'].apply(lambda x : x*random.random())
# Производим повторную конкатенацию в единый frame
new_df = pd.concat([df_norm, df_z_statistics, inputData['red'], inputData['quality']], axis=1)
listOfColumns = inputData.columns.tolist()
listOfColumns.remove('quality')
listOfColumns.remove('red')
for iterationNumber in range(10):
# Добавляем случайные (шумовые) столбцы
name_for_column = 'elem'+'_'+str(iterationNumber)
new_df[name_for_column] = 1
new_df[name_for_column] = new_df[name_for_column].apply(lambda x : x* random.random())
name_for_column = 'elem1'+'_'+str(iterationNumber)
new_df[name_for_column] = 1
new_df[name_for_column] = new_df[name_for_column].apply(lambda x : x* random.gauss(5,2)*random.random()+random.random())
name_for_column = 'elem2'+'_'+str(iterationNumber)
new_df[name_for_column] = new_df['elem1'+'_'+str(iterationNumber)]
new_df[name_for_column] = new_df[name_for_column].apply(lambda x : x**2* random.gauss(4,3)*random.random())
print("Iteration : #{0}".format(iterationNumber))
print("Mean accuracy: {0}".format(get_accuracy_rate(new_df)))
Iteration : #0 Accuracy: [ 0.66923077 0.66923077 0.67051578 0.66281755 0.6443418 ] Mean accuracy: 0.655535026943803 Iteration : #1 Accuracy: [ 0.62076923 0.66538462 0.64126251 0.63125481 0.61508853] Mean accuracy: 0.6398332445076094 Iteration : #2 Accuracy: [ 0.62 0.62846154 0.59661278 0.61431871 0.617398 ] Mean accuracy: 0.6192109907029075 Iteration : #3 Accuracy: [ 0.59538462 0.62769231 0.61200924 0.61431871 0.59276366] Mean accuracy: 0.6056631728548587 Iteration : #4 Accuracy: [ 0.60384615 0.58769231 0.59584296 0.59199384 0.62509623] Mean accuracy: 0.5924256528690709 Iteration : #5 Accuracy: [ 0.58076923 0.58538462 0.59507313 0.59353349 0.59815242] Mean accuracy: 0.5947373719429146 Iteration : #6 Accuracy: [ 0.58153846 0.58692308 0.61354888 0.58275597 0.5604311 ] Mean accuracy: 0.5782675430804762 Iteration : #7 Accuracy: [ 0.58615385 0.56923077 0.58198614 0.57120862 0.59199384] Mean accuracy: 0.578883401433055 Iteration : #8 Accuracy: [ 0.58 0.57461538 0.57505774 0.5804465 0.59507313] Mean accuracy: 0.5770326286492569 Iteration : #9 Accuracy: [ 0.57769231 0.60769231 0.54272517 0.55966128 0.56966898] Mean accuracy: 0.568572037662107
# Играем с коллинеарностью
#new_df['collinear_1'] = random.random() * new_df['volatile acidity'] + random.random()*new_df['total sulfur dioxide'] + random.random()
name_1 = random.choice(columns_list)
name_2 = random.choice(columns_list)
new_df['collinear_1'] = random.random() * new_df[name_1] + random.random()*new_df[name_2] + random.random()
new_df['collinear_1'] = new_df['collinear_1'].apply(lambda x : x+random.random())
name_3 = random.choice(columns_list)
name_4 = random.choice(columns_list)
name_5 = random.choice(columns_list)
new_df['collinear_2'] = random.random() * new_df[name_3] + random.gauss(random.randint(5,10), random.randint(10,21))*new_df[name_4] + random.gauss(random.randint(5,50), random.randint(2,21))*new_df[name_5]
new_df['collinear_2'] = new_df['collinear_2'].apply(lambda x : x+random.random())
print("Collinear: {0} and {1}".format(name_1, name_2))
print("Collinear: {0}, {1} and {2}".format(name_3, name_4, name_5))
print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))
name_1 = random.choice(columns_list)
name_2 = random.choice(columns_list)
new_df['collinear_3'] = random.random() * new_df[name_1] + random.random()*new_df[name_2] + random.random()
new_df['collinear_3'] = new_df['collinear_3'].apply(lambda x : x+random.gauss(random.randint(10,25), random.randint(5,21)))
name_3 = random.choice(columns_list)
name_4 = random.choice(columns_list)
name_5 = random.choice(columns_list)
new_df['collinear_4'] = random.random() * new_df[name_3] + random.random()*new_df[name_4] + random.random()*new_df[name_5]
new_df['collinear_4'] = new_df['collinear_4'].apply(lambda x : x+random.random())
print("Collinear: {0} and {1}".format(name_1, name_2))
print("Collinear: {0}, {1} and {2}".format(name_3, name_4, name_5))
print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))
name_1 = random.choice(columns_list)
name_2 = random.choice(columns_list)
new_df['collinear_5'] = random.random() * new_df[name_1] + random.random()*new_df[name_2] + random.random()
new_df['collinear_5'] = new_df['collinear_5'].apply(lambda x : x+random.random())
name_3 = random.choice(columns_list)
name_4 = random.choice(columns_list)
name_5 = random.choice(columns_list)
new_df['collinear_6'] = random.random() * new_df[name_3] + random.random()*new_df[name_4] + random.random()*new_df[name_5]
new_df['collinear_6'] = new_df['collinear_6'].apply(lambda x : x+random.random())
print("Collinear: {0} and {1}".format(name_1, name_2))
print("Collinear: {0}, {1} and {2}".format(name_3, name_4, name_5))
print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))
Collinear: residual sugar and citric acid Collinear: sulphates, free sulfur dioxide and density Accuracy: [ 0.57307692 0.59846154 0.60662048 0.57274827 0.56812933] Mean accuracy: 0.5701080120802985 Collinear: free sulfur dioxide and pH Collinear: free sulfur dioxide, total sulfur dioxide and fixed acidity Accuracy: [ 0.56923077 0.56692308 0.56351039 0.58198614 0.55273287] Mean accuracy: 0.5670283650145082 Collinear: pH and chlorides Collinear: citric acid, pH and pH Accuracy: [ 0.57846154 0.57692308 0.58583526 0.56120092 0.59969207] Mean accuracy: 0.5708830461301593
for iterationNumber in range(5):
# Делаем коллинеарность по 2,3 и 4
# Добавляем каждого по половине из количества исходных столбцов
mod_dataframe = new_df
while (get_accuracy_rate(mod_dataframe) >= get_accuracy_rate(new_df)):
mod_dataframe = new_df
candidates = [i for i in permutations(listOfColumns,3)]
for (name1, name2, name3) in random.sample(candidates,k=2*len(inputData.columns)):
mod_dataframe['name__'+name1+'_'+name2+'_'+name3] = random.random()*mod_dataframe[name1]+random.random()*mod_dataframe[name2]+random.random()*mod_dataframe[name3]
mod_dataframe['name__'+name1+'_'+name2+'_'+name3] = mod_dataframe['name__'+name1+'_'+name2+'_'+name3].apply(lambda x : x + random.random())
new_df = mod_dataframe
print("Iteration #{0}, 3-permutations: ".format(iterationNumber))
print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))
while (get_accuracy_rate(mod_dataframe) >= get_accuracy_rate(new_df)):
mod_dataframe = new_df
candidates = [i for i in permutations(listOfColumns,2)]
for (name1, name2) in random.sample(candidates,k=2*len(inputData.columns)):
mod_dataframe['name__'+name1+'_'+name2] = random.random()*mod_dataframe[name1]+random.random()*mod_dataframe[name2]
mod_dataframe['name__'+name1+'_'+name2] = mod_dataframe['name__'+name1+'_'+name2].apply(lambda x : x*2 + random.random())
new_df = mod_dataframe
print("Iteration #{0}, 2-permutations: ".format(iterationNumber))
print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))
while (get_accuracy_rate(mod_dataframe) >= get_accuracy_rate(new_df)):
mod_dataframe = new_df
candidates = [i for i in permutations(listOfColumns,4)]
for (name1, name2, name3, name4) in random.sample(candidates,k=2*len(inputData.columns)):
mod_dataframe['name__'+name1+'_'+name2+'_'+name3+'_'+name4] = random.random()*mod_dataframe[name1]+random.random()*new_df[name2]+random.random()*mod_dataframe[name3]+random.random()*mod_dataframe[name4]#+random.random()*mod_dataframe[name2]+random.random()*mod_dataframe[name4]
mod_dataframe['name__'+name1+'_'+name2+'_'+name3+'_'+name4] = mod_dataframe['name__'+name1+'_'+name2+'_'+name3+'_'+name4].apply(lambda x : x*2.3 + random.random())
new_df = mod_dataframe
print("Iteration #{0}, 4-permutations: ".format(iterationNumber))
print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))
Accuracy: [ 0.56461538 0.57461538 0.57428791 0.57505774 0.58968437] Accuracy: [ 0.55 0.57615385 0.57351809 0.56428022 0.59815242] Iteration #0, 3-permutations: Accuracy: [ 0.58461538 0.56692308 0.56581986 0.5604311 0.55966128] Mean accuracy: 0.5747228045241902 Accuracy: [ 0.57923077 0.58692308 0.58121632 0.55273287 0.57505774] Accuracy: [ 0.57 0.58615385 0.57736721 0.56428022 0.56505004] Accuracy: [ 0.56846154 0.57461538 0.55812163 0.56966898 0.57351809] Accuracy: [ 0.54769231 0.57846154 0.55119323 0.5704388 0.58121632] Accuracy: [ 0.54923077 0.55307692 0.56581986 0.5873749 0.57659738] Accuracy: [ 0.57230769 0.58 0.56735951 0.59199384 0.56735951] Iteration #0, 2-permutations: Accuracy: [ 0.57769231 0.57615385 0.57274827 0.56274057 0.56812933] Mean accuracy: 0.5708826908272636 Accuracy: [ 0.56923077 0.58538462 0.54580446 0.56889915 0.56812933] Accuracy: [ 0.58307692 0.56538462 0.5604311 0.56889915 0.57736721] Accuracy: [ 0.57153846 0.56923077 0.5804465 0.55658199 0.56197075] Accuracy: [ 0.56615385 0.56538462 0.57505774 0.5873749 0.56812933] Accuracy: [ 0.59 0.57538462 0.59353349 0.56197075 0.56505004] Accuracy: [ 0.55846154 0.58769231 0.5904542 0.57351809 0.57274827] Accuracy: [ 0.57 0.58769231 0.56658968 0.56197075 0.58506543] Accuracy: [ 0.58153846 0.58153846 0.58275597 0.58429561 0.5604311 ] Accuracy: [ 0.55307692 0.58076923 0.59276366 0.5973826 0.57351809] Accuracy: [ 0.58615385 0.56461538 0.57890685 0.57659738 0.59276366] Iteration #0, 4-permutations: Accuracy: [ 0.57230769 0.56538462 0.56274057 0.58429561 0.57274827] Mean accuracy: 0.5756481316989399 Accuracy: [ 0.55846154 0.58692308 0.58121632 0.59815242 0.5704388 ] Accuracy: [ 0.57384615 0.57384615 0.56351039 0.60200154 0.57274827] Iteration #1, 3-permutations: Accuracy: [ 0.59769231 0.58615385 0.57967667 0.57736721 0.5704388 ] Mean accuracy: 0.5756500266477171 Accuracy: [ 0.57 0.55923077 0.58121632 0.56812933 0.58891455] Accuracy: [ 0.58076923 0.56307692 0.5973826 0.57197844 0.5704388 ] Iteration #1, 2-permutations: Accuracy: [ 0.6 0.56846154 0.5704388 0.53733641 0.57736721] Mean accuracy: 0.5751861195002073 Accuracy: [ 0.59076923 0.58 0.54965358 0.5604311 0.59122402] Accuracy: [ 0.59307692 0.58538462 0.58891455 0.55812163 0.55812163] Accuracy: [ 0.57692308 0.57846154 0.55504234 0.58891455 0.5904542 ] Accuracy: [ 0.59384615 0.58384615 0.58583526 0.55812163 0.58583526] Iteration #1, 4-permutations: Accuracy: [ 0.59076923 0.56769231 0.56658968 0.5604311 0.56966898] Mean accuracy: 0.5794996150885295 Accuracy: [ 0.57307692 0.56846154 0.56812933 0.5873749 0.58968437] Accuracy: [ 0.57615385 0.58692308 0.56966898 0.58506543 0.53733641]
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-18-ab31e2c20667> in <module>() 7 8 mod_dataframe = new_df ----> 9 while (get_accuracy_rate(mod_dataframe) >= get_accuracy_rate(new_df)): 10 mod_dataframe = new_df 11 candidates = [i for i in permutations(listOfColumns,3)] <ipython-input-4-a667f2e3c8a0> in get_accuracy_rate(targetDF) 17 # Считаем верность (accuracy) для каждого из фолдов 18 print("Accuracy: {0}".format(cross_val_score(rf_clf, X, y, scoring='accuracy', cv=kf.split(X,y)))) ---> 19 return np.mean(cross_val_score(rf_clf, X, y, scoring='accuracy', cv=kf.split(X,y))) C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch) 138 train, test, verbose, None, 139 fit_params) --> 140 for train, test in cv_iter) 141 return np.array(scores)[:, 0] 142 C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable) 756 # was dispatched. In particular this covers the edge 757 # case of Parallel used with an exhausted iterator. --> 758 while self.dispatch_one_batch(iterator): 759 self._iterating = True 760 else: C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator) 606 return False 607 else: --> 608 self._dispatch(tasks) 609 return True 610 C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch) 569 dispatch_timestamp = time.time() 570 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self) --> 571 job = self._backend.apply_async(batch, callback=cb) 572 self._jobs.append(job) 573 C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback) 107 def apply_async(self, func, callback=None): 108 """Schedule a func to be run""" --> 109 result = ImmediateResult(func) 110 if callback: 111 callback(result) C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch) 324 # Don't delay the application, to avoid keeping the input 325 # arguments in memory --> 326 self.results = batch() 327 328 def get(self): C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] 132 133 def __len__(self): C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] 132 133 def __len__(self): C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score) 236 estimator.fit(X_train, **fit_params) 237 else: --> 238 estimator.fit(X_train, y_train, **fit_params) 239 240 except Exception as e: C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py in fit(self, X, y, sample_weight) 324 t, self, X, y, sample_weight, i, len(trees), 325 verbose=self.verbose, class_weight=self.class_weight) --> 326 for i, t in enumerate(trees)) 327 328 # Collect newly grown trees C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable) 756 # was dispatched. In particular this covers the edge 757 # case of Parallel used with an exhausted iterator. --> 758 while self.dispatch_one_batch(iterator): 759 self._iterating = True 760 else: C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator) 606 return False 607 else: --> 608 self._dispatch(tasks) 609 return True 610 C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch) 569 dispatch_timestamp = time.time() 570 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self) --> 571 job = self._backend.apply_async(batch, callback=cb) 572 self._jobs.append(job) 573 C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback) 107 def apply_async(self, func, callback=None): 108 """Schedule a func to be run""" --> 109 result = ImmediateResult(func) 110 if callback: 111 callback(result) C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch) 324 # Don't delay the application, to avoid keeping the input 325 # arguments in memory --> 326 self.results = batch() 327 328 def get(self): C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] 132 133 def __len__(self): C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] 132 133 def __len__(self): C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py in _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees, verbose, class_weight) 118 curr_sample_weight *= compute_sample_weight('balanced', y, indices) 119 --> 120 tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False) 121 else: 122 tree.fit(X, y, sample_weight=sample_weight, check_input=False) C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\tree\tree.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted) 737 sample_weight=sample_weight, 738 check_input=check_input, --> 739 X_idx_sorted=X_idx_sorted) 740 return self 741 C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\tree\tree.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted) 348 self.min_impurity_split) 349 --> 350 builder.build(self.tree_, X, y, sample_weight, X_idx_sorted) 351 352 if self.n_outputs_ == 1: KeyboardInterrupt:
print("Final testing of accuracy")
print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))
Final testing of accuracy Accuracy: [ 0.59846154 0.56230769 0.57351809 0.59430331 0.55427252] Mean accuracy: 0.5765708533191212
targetColumn = 'quality'
# Чтобы нельзя было увидеть исходные столбцы из wine quality, перемешиваем их
new_df[targetColumn]
# Берем названия столбцов
cols = list(new_df.columns)
# Перемешиваем их
random.shuffle(cols)
# Задаем новый порядок
df = new_df.reindex_axis(cols, axis=1)
# Теперь столбец с целевой переменной перемещаем на последнее место
df = df.drop(['quality'], axis=1)
df = pd.concat([df,new_df[targetColumn]], axis=1)
df
name__alcohol_chlorides_citric acid_residual sugar | name__total sulfur dioxide_citric acid | name__total sulfur dioxide_chlorides | name__citric acid_chlorides | elem_0 | name__density_sulphates_fixed acidity_total sulfur dioxide | name__citric acid_alcohol | name__volatile acidity_residual sugar_citric acid_total sulfur dioxide | name__residual sugar_sulphates | elem1_0 | ... | name__total sulfur dioxide_volatile acidity_pH_alcohol | name__density_total sulfur dioxide | name__total sulfur dioxide_citric acid_residual sugar_sulphates | name__density_residual sugar_alcohol_citric acid | name__free sulfur dioxide_total sulfur dioxide | name__total sulfur dioxide_sulphates_fixed acidity_pH | name__citric acid_sulphates_fixed acidity_pH | name__chlorides_fixed acidity_density_volatile acidity | name__volatile acidity_alcohol_citric acid_residual sugar | quality | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -0.478283 | -2.213967 | -2.640030 | 0.684143 | 0.963702 | 1.222948 | -1.152197 | -2.027630 | 0.661563 | 0.910785 | ... | -1.906174 | -1.059609 | -2.187578 | 0.655273 | -3.801710 | -1.356129 | 4.404720 | 1.686248 | -0.425134 | 5 |
1 | -0.416882 | -0.788398 | -1.619442 | 0.140811 | 0.125972 | 2.235487 | -1.014809 | -0.948248 | 0.544026 | 2.898419 | ... | -0.937044 | -0.557045 | -1.195460 | -0.085663 | -1.534467 | -0.335009 | 2.509282 | 1.628263 | -0.111945 | 5 |
2 | -0.202222 | -1.071511 | -1.579510 | 0.091978 | 0.567123 | 1.799232 | -0.379224 | -1.826647 | 0.747934 | 1.883060 | ... | -1.182087 | -0.407708 | -2.243586 | -0.072177 | -2.081265 | -1.016039 | 2.228239 | 1.274932 | 0.407152 | 5 |
3 | -0.248368 | -0.760257 | -1.101823 | 0.722705 | 0.447410 | 2.193620 | -0.654403 | -1.398025 | 0.477155 | 3.149005 | ... | -2.416570 | 0.331195 | -1.507227 | 0.746818 | -2.397424 | -1.181693 | 0.937475 | 1.966068 | 0.680110 | 6 |
4 | -0.413262 | -2.363772 | -2.433693 | 0.257073 | 0.109966 | 0.964414 | -0.987758 | -2.587727 | 0.782671 | 3.737424 | ... | -1.163915 | -0.512178 | -2.933797 | 0.293850 | -3.468229 | -0.885008 | 3.725170 | 1.792081 | -0.521255 | 5 |
5 | -0.555860 | -1.617497 | -1.771817 | 0.915180 | 0.274020 | 1.247003 | -1.279413 | -2.254374 | 0.423601 | 1.259898 | ... | -1.802123 | -0.640739 | -2.373853 | 0.429295 | -3.270010 | -0.692253 | 4.557135 | 1.575228 | -0.186711 | 5 |
6 | -1.143486 | -1.344627 | -1.543131 | 0.796627 | 0.434179 | 0.788886 | -1.634137 | -1.226786 | 0.457391 | 1.556075 | ... | -1.519889 | -0.403382 | -1.666689 | -0.218545 | -2.569563 | -1.316604 | 0.473790 | 0.551849 | -0.170027 | 5 |
7 | 0.187024 | -2.333007 | -2.302782 | 0.827787 | 0.922521 | -1.568044 | -0.763121 | -2.481517 | -0.153139 | 3.547803 | ... | -1.856568 | -3.012415 | -3.115052 | 0.349773 | -3.507581 | -2.567682 | 1.704580 | 0.749956 | -0.294431 | 7 |
8 | -1.002961 | -2.303354 | -2.673976 | 0.177249 | 0.250748 | 0.685932 | -0.578056 | -2.864814 | 0.756897 | 1.835178 | ... | -2.061171 | -1.537410 | -2.768741 | -0.116310 | -3.987075 | -1.522621 | 2.815789 | 1.017975 | -0.155973 | 7 |
9 | 0.722679 | 0.465044 | 0.187940 | 0.624961 | 0.466162 | 3.240046 | 0.622276 | -0.020450 | 1.168646 | 4.016444 | ... | 0.408039 | 1.273450 | 0.132694 | 0.913820 | -1.732049 | 1.135831 | 5.004930 | 1.460171 | 0.616403 | 5 |
10 | -0.805252 | -1.438636 | -1.689252 | 0.608807 | 0.141400 | 0.898329 | -1.093433 | -1.280877 | 0.571205 | 2.088654 | ... | -2.408880 | -0.528962 | -1.275497 | -0.249468 | -2.296349 | -1.150133 | 1.029836 | 1.282106 | 0.137737 | 5 |
11 | 0.700490 | 0.108888 | -0.012820 | 0.982347 | 0.359799 | 3.909861 | 0.981651 | -0.081168 | 1.126112 | 3.598302 | ... | 0.768855 | 1.153206 | 0.292383 | 0.747113 | -0.944767 | 1.856977 | 5.683687 | 1.002447 | 0.310331 | 5 |
12 | -0.258602 | -0.841112 | -1.222429 | 0.860943 | 0.299262 | -0.365507 | -0.107277 | -1.210638 | 0.081789 | 1.592301 | ... | -0.308574 | -1.188232 | -2.069158 | -0.316118 | -1.845358 | -0.714589 | 4.396622 | 0.492017 | -0.098624 | 5 |
13 | -1.369402 | -1.949055 | -2.290166 | 1.074353 | 0.791086 | 6.408129 | -1.888737 | -2.533301 | 3.163933 | 1.175871 | ... | -2.949081 | -1.330478 | -1.284520 | -0.328896 | -3.317492 | 1.785229 | 13.585969 | 1.793077 | 0.161766 | 5 |
14 | -1.126438 | 1.263476 | 1.709850 | 0.872946 | 0.619282 | 5.725044 | -1.903149 | 1.571477 | 1.494160 | 1.312797 | ... | -1.082434 | 2.990815 | 1.597808 | 0.085041 | 3.687123 | 3.104751 | 4.624860 | 2.103116 | -0.411927 | 5 |
15 | -0.498509 | 1.154674 | 2.120706 | 1.170033 | 0.081316 | 5.329532 | -1.075350 | 1.560374 | 1.100451 | 4.937626 | ... | -0.573198 | 3.276460 | 2.100919 | 0.247025 | 3.315187 | 2.962000 | 5.191529 | 1.717098 | 0.341791 | 5 |
16 | 0.188322 | 0.317919 | 0.521554 | 0.397273 | 0.859256 | 3.003631 | 0.530147 | 0.392047 | 0.999356 | 0.874371 | ... | 0.037734 | 0.570264 | 0.893967 | 0.959701 | 0.275360 | 1.703801 | 3.939884 | 1.009635 | 0.591173 | 7 |
17 | -0.578201 | -1.147809 | -1.012134 | 1.628251 | 0.987506 | 4.754865 | -1.394030 | -1.900020 | 1.996386 | 5.337035 | ... | -3.013082 | -0.901929 | -0.659227 | -0.357837 | -2.710896 | 1.359244 | 8.425891 | 1.284350 | -0.083949 | 5 |
18 | -1.197140 | -2.490853 | -2.223153 | 0.708309 | 0.592840 | 0.200974 | -1.385509 | -2.316716 | 0.557840 | 2.118574 | ... | -2.705586 | -1.628289 | -3.064470 | 0.170940 | -3.890830 | -2.064242 | 1.691804 | 1.512927 | -0.264051 | 4 |
19 | -0.669481 | -1.234895 | -1.081497 | 1.514516 | 0.584530 | 4.081584 | -0.822537 | -1.333693 | 1.726978 | 0.224845 | ... | -3.202157 | -0.489831 | -0.512077 | -0.381910 | -2.279208 | 0.050886 | 5.080082 | 1.642790 | -0.020820 | 6 |
20 | -0.475792 | -1.254881 | -1.292806 | 0.246659 | 0.824237 | 0.726824 | -0.526297 | -1.821336 | 0.935710 | 2.710390 | ... | -1.843603 | -0.545160 | -1.160431 | -0.110632 | -0.519993 | -0.512707 | 3.269135 | 0.610509 | -0.418407 | 6 |
21 | -0.062695 | -0.484099 | -0.787870 | 0.767299 | 0.048866 | 2.302608 | -0.344390 | -0.911288 | 0.409977 | 4.017808 | ... | -0.445627 | 0.500191 | -0.458833 | 0.984103 | -1.392748 | 0.372676 | 5.049873 | 1.739980 | 0.128020 | 5 |
22 | -0.128106 | -2.072688 | -2.407602 | 0.978550 | 0.111442 | 2.608775 | -1.299084 | -1.919077 | 1.566608 | 2.349346 | ... | -2.896897 | -1.639309 | -2.186183 | -0.097270 | -3.085381 | -0.802771 | 4.713787 | 0.943972 | -0.320782 | 5 |
23 | -0.792323 | -0.593678 | -1.003570 | 0.398502 | 0.736443 | 1.111517 | -1.268817 | -1.456753 | 0.797764 | 1.185308 | ... | -2.601814 | -0.187135 | -1.812473 | -0.091447 | -2.808533 | -1.183920 | 0.002838 | 0.669112 | 0.280655 | 5 |
24 | -0.429528 | -1.679373 | -2.493998 | 0.770772 | 0.105107 | 0.942746 | -1.062227 | -2.160717 | 0.282306 | 1.732734 | ... | -1.561004 | -1.021895 | -1.803214 | 0.329543 | -2.138758 | -1.183470 | 4.142319 | 0.684148 | 0.169529 | 6 |
25 | -1.013358 | -2.002284 | -3.048006 | 0.451947 | 0.289703 | 0.181959 | -0.837330 | -2.527793 | 0.072096 | 1.831610 | ... | -3.036272 | -1.894093 | -2.799900 | -0.763523 | -3.809952 | -1.681011 | 1.850960 | 0.309223 | 0.153014 | 5 |
26 | -0.897272 | -1.901506 | -3.040008 | 0.604336 | 0.615587 | 0.493634 | -0.882756 | -3.189613 | 0.288642 | 1.959315 | ... | -2.686791 | -2.643124 | -3.025610 | -0.007652 | -4.601875 | -2.243167 | 1.965979 | 1.288481 | -0.140444 | 5 |
27 | -0.392025 | -1.425848 | -1.711070 | 0.991837 | 0.354453 | 2.279911 | -0.820984 | -2.770463 | 1.040287 | 1.768816 | ... | -3.238078 | -0.964924 | -1.878471 | 0.476292 | -3.846969 | -0.539989 | 4.786496 | 1.259148 | 0.009888 | 5 |
28 | -1.015235 | -1.757910 | -2.531480 | 0.531653 | 0.087781 | 1.147167 | -1.619059 | -2.708835 | 0.142814 | 4.882426 | ... | -2.244693 | -1.085859 | -3.030047 | 0.292079 | -2.613734 | -1.475827 | 3.974091 | 1.045569 | 0.344409 | 5 |
29 | -0.812856 | -2.036425 | -3.038716 | 0.863130 | 0.956309 | 0.527511 | -0.899260 | -2.744348 | 0.966399 | 2.762723 | ... | -1.530598 | -2.566893 | -3.165582 | 0.660980 | -4.459248 | -2.010311 | 2.936815 | 0.914611 | 0.356791 | 6 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4868 | 0.773966 | 1.031942 | 0.814174 | 0.113335 | 0.106613 | 0.705251 | 0.950414 | 0.685295 | 1.179255 | 1.447798 | ... | 0.984347 | -0.160937 | 1.258517 | 0.204865 | 1.627361 | 0.874435 | 2.783931 | 0.229651 | 0.994845 | 6 |
4869 | -0.325304 | 0.007284 | -0.989292 | -0.019937 | 0.536001 | 0.454124 | -0.266204 | -0.944899 | 0.452231 | 2.912857 | ... | -1.234651 | 0.243895 | -0.680293 | 0.638123 | -2.957975 | -0.596431 | -0.579771 | 1.118092 | 0.860843 | 6 |
4870 | 1.407362 | 1.248991 | 0.729838 | 0.440455 | 0.747578 | -1.951163 | 2.144557 | 0.772573 | 0.285851 | 1.161407 | ... | 2.189510 | 0.382576 | 0.647076 | 0.158944 | 0.629816 | -0.202777 | -2.118623 | -0.604631 | 1.123709 | 7 |
4871 | 2.209935 | 0.365248 | -0.401793 | 0.770510 | 0.166730 | -2.931917 | 3.056815 | -0.695417 | 0.397739 | 3.855181 | ... | 3.163627 | -1.618340 | 0.070879 | 0.112822 | -0.636209 | -0.279786 | 2.274530 | -1.271899 | 0.629962 | 6 |
4872 | -0.035135 | 2.563889 | 2.508937 | 0.247788 | 0.540455 | 2.139895 | -0.474984 | 3.388282 | 0.396316 | 1.747787 | ... | 0.078527 | 3.047676 | 2.619323 | 0.384270 | 4.100411 | 2.160890 | 0.538127 | 0.913770 | 0.811652 | 5 |
4873 | 1.858546 | 0.561262 | 0.682497 | 0.113391 | 0.647477 | -1.844306 | 2.073612 | 1.046149 | 0.304821 | 1.533352 | ... | 3.567208 | -0.648470 | 0.257282 | 0.488681 | 1.258308 | 0.281621 | 1.203277 | -0.806852 | 1.005950 | 6 |
4874 | -0.140961 | 0.488357 | 1.059394 | 0.027545 | 0.063533 | -0.576996 | -0.044634 | 0.692409 | 0.226179 | 4.860501 | ... | 0.414404 | 0.516456 | 0.473963 | -0.501014 | -1.046891 | 1.510466 | 2.433562 | -0.555042 | -0.232443 | 6 |
4875 | -0.180113 | 0.076192 | -0.103752 | 0.022736 | 0.380758 | -1.202363 | -0.372775 | -0.076515 | -0.118406 | 4.955946 | ... | -1.884645 | -0.621701 | -0.380639 | -0.649475 | -1.295568 | -1.189119 | -2.459199 | 0.397683 | 0.470148 | 6 |
4876 | 1.304160 | 1.042614 | 0.520731 | 0.774318 | 0.034238 | -1.207375 | 2.208312 | 0.993403 | 0.452742 | 3.761774 | ... | 2.553453 | -0.731263 | 0.459425 | 0.714278 | 1.312428 | 0.699373 | 2.440311 | -0.632819 | 0.585020 | 7 |
4877 | -1.951085 | -0.384324 | -0.828416 | 0.245470 | 0.025559 | -1.673372 | -1.826046 | -0.265926 | -0.255501 | 1.127825 | ... | -2.363450 | -1.378423 | -1.115043 | -1.252807 | -2.147823 | -0.885678 | -1.898321 | -0.452237 | -0.766517 | 5 |
4878 | -1.166221 | -1.028839 | -0.754207 | 0.296731 | 0.868769 | -2.601292 | -1.077188 | -0.931504 | -0.068753 | 3.829208 | ... | -1.146448 | -1.553153 | -1.463527 | -0.775106 | -2.462946 | -1.325951 | -1.992374 | -0.045361 | 0.295007 | 4 |
4879 | -0.219551 | 2.035592 | 2.177237 | 0.975339 | 0.230322 | 0.897686 | -0.426011 | 2.538158 | 0.527566 | 3.882505 | ... | -0.266849 | 2.540680 | 2.332699 | -0.039273 | 5.890210 | 1.471003 | -0.485617 | 0.459435 | -0.032751 | 6 |
4880 | -0.617287 | 2.049617 | 2.721893 | 0.599718 | 0.831959 | 0.817070 | -1.171510 | 2.654859 | 0.156911 | 1.345925 | ... | 0.094293 | 2.183317 | 1.947614 | -0.415179 | 5.401485 | 1.695110 | -0.880692 | 0.316263 | 0.706347 | 6 |
4881 | -1.005690 | 0.761560 | 0.589218 | 0.642400 | 0.225156 | 0.568350 | -1.478329 | 0.890466 | 0.982594 | 2.982989 | ... | -2.140410 | 0.544678 | 0.416322 | -0.154365 | 0.896756 | -0.512274 | -2.148811 | 0.528597 | -0.033937 | 6 |
4882 | 0.956398 | 1.863328 | 1.528597 | 0.142277 | 0.256223 | -1.753222 | 1.111464 | 2.347314 | -0.264179 | 2.883249 | ... | 1.718214 | 0.797376 | 1.392095 | 0.195698 | 3.362565 | 1.132632 | -1.297307 | -0.031256 | 0.751454 | 5 |
4883 | 1.569808 | 0.943914 | 2.032330 | 0.824035 | 0.281259 | -3.391974 | 1.532926 | 1.851298 | -0.125040 | 2.025174 | ... | 3.312068 | -0.287759 | 1.488011 | 0.349068 | 4.319608 | 0.967502 | -1.664008 | -0.994605 | 0.875385 | 6 |
4884 | -0.304538 | 2.027566 | 2.400392 | 0.016879 | 0.829558 | 0.788623 | -0.324252 | 2.889908 | 0.009044 | 1.920823 | ... | 0.157820 | 2.758599 | 2.966721 | 0.033440 | 5.758555 | 2.180229 | -0.976330 | 0.425362 | 0.620639 | 5 |
4885 | -0.873283 | 1.831221 | 2.558328 | 0.255377 | 0.461025 | 1.164142 | -1.301629 | 2.776247 | 0.458644 | 3.889055 | ... | -0.167620 | 1.900748 | 2.170648 | -0.198759 | 6.147723 | 1.995351 | -0.416972 | 0.338289 | 0.187383 | 6 |
4886 | 2.703794 | 0.852767 | 0.941143 | 0.276276 | 0.773274 | 1.801400 | 3.045503 | 0.790318 | 1.393247 | 0.351957 | ... | 3.278612 | -1.030422 | 1.753329 | 1.414937 | 2.549355 | 3.287896 | 7.031204 | -0.627692 | 0.888318 | 7 |
4887 | 2.781300 | -1.264810 | -1.164251 | 0.305303 | 0.724363 | -2.122917 | 3.673328 | -1.275776 | 1.385913 | 0.931251 | ... | 3.329545 | -3.622192 | -1.550825 | 1.091990 | -3.105554 | -1.025556 | 1.292526 | -1.024504 | 1.738765 | 7 |
4888 | -0.562415 | 0.739697 | 0.638122 | 0.062751 | 0.256872 | -0.362067 | -1.821780 | 1.095450 | 0.659374 | 1.834312 | ... | -1.998783 | 0.240934 | 1.050916 | -0.326037 | 1.877147 | 0.684427 | -1.807519 | -0.190896 | -0.145854 | 5 |
4889 | -0.797154 | 0.533663 | 0.888649 | 0.789960 | 0.009050 | -0.013914 | -1.043141 | 0.525176 | 0.308703 | 0.700887 | ... | -1.424222 | 0.840558 | 0.341886 | -0.206782 | 0.914131 | -0.238012 | -2.260413 | 0.163318 | 0.178770 | 6 |
4890 | 1.870422 | 0.327469 | -0.035135 | 0.290066 | 0.697058 | -2.836534 | 1.930947 | 0.328936 | 0.213337 | 1.013143 | ... | 1.408807 | -2.594730 | -0.422990 | -0.219193 | -0.522468 | -0.731795 | -3.004844 | -0.716293 | 1.002671 | 6 |
4891 | 0.893812 | 0.810451 | 0.631597 | 0.097856 | 0.950967 | -2.195252 | 0.328870 | 0.550190 | 0.555023 | 0.994869 | ... | 1.163509 | -0.831607 | 0.110677 | -0.141000 | 1.301248 | -0.102037 | -0.696146 | -0.146933 | 0.699947 | 6 |
4892 | -0.535997 | 0.528190 | 0.595106 | 0.873245 | 0.829751 | -0.245070 | -0.669575 | 0.350100 | 0.327846 | 3.961168 | ... | -1.018608 | 0.030936 | 0.576389 | -0.108447 | 0.642000 | 1.002172 | 1.395517 | 0.368097 | 0.514939 | 5 |
4893 | 1.565767 | -0.566277 | -0.198058 | 0.770311 | 0.041606 | -1.841681 | 1.814109 | -0.853114 | 0.217202 | 1.537309 | ... | 0.963009 | -1.525094 | -0.343275 | 0.434351 | -0.892645 | -0.691755 | 0.449269 | -0.159772 | 0.509648 | 6 |
4894 | -0.639722 | 1.547914 | 2.415930 | 0.051300 | 0.675902 | 0.562113 | -0.992682 | 2.867673 | 0.155048 | 8.198587 | ... | -0.150793 | 1.956783 | 1.919438 | 0.280612 | 4.436135 | 1.171161 | -1.410279 | 0.852113 | 0.656496 | 5 |
4895 | -0.647004 | -0.148548 | 0.580455 | 0.106387 | 0.033165 | -0.832290 | -1.259207 | -0.006483 | 0.527104 | 4.594427 | ... | -2.020286 | -0.452110 | -0.027390 | -1.215087 | 0.385619 | -0.638500 | -2.649501 | -0.396957 | -0.041138 | 6 |
4896 | 3.010521 | 0.275813 | -0.143146 | 0.338426 | 0.281199 | -4.269512 | 4.277830 | 0.164078 | -0.423296 | 2.223884 | ... | 4.649211 | -2.564197 | 0.417968 | 0.367732 | -0.686613 | 0.329296 | -0.622103 | -0.633970 | 1.492943 | 7 |
4897 | 2.293042 | -0.108104 | -0.126011 | 0.583681 | 0.613773 | -4.368150 | 2.809822 | -0.639298 | -0.481201 | 4.477054 | ... | 2.599396 | -1.711998 | -0.351413 | -0.104888 | -1.038270 | -1.145385 | -1.614386 | -0.701789 | 1.187200 | 6 |
6497 rows × 224 columns
df = df.sample(frac=0.9).reset_index(drop=True)
df
name__alcohol_chlorides_citric acid_residual sugar | name__total sulfur dioxide_citric acid | name__total sulfur dioxide_chlorides | name__citric acid_chlorides | elem_0 | name__density_sulphates_fixed acidity_total sulfur dioxide | name__citric acid_alcohol | name__volatile acidity_residual sugar_citric acid_total sulfur dioxide | name__residual sugar_sulphates | elem1_0 | ... | name__total sulfur dioxide_volatile acidity_pH_alcohol | name__density_total sulfur dioxide | name__total sulfur dioxide_citric acid_residual sugar_sulphates | name__density_residual sugar_alcohol_citric acid | name__free sulfur dioxide_total sulfur dioxide | name__total sulfur dioxide_sulphates_fixed acidity_pH | name__citric acid_sulphates_fixed acidity_pH | name__chlorides_fixed acidity_density_volatile acidity | name__volatile acidity_alcohol_citric acid_residual sugar | quality | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.227592 | 1.558518 | 1.664460 | -0.027473 | 0.330031 | 0.425976 | 0.135259 | 1.938229 | -0.438415 | 2.641077 | ... | 0.434843 | 1.469472 | 1.606509 | 0.237339 | 0.973398 | 0.594251 | -2.123956 | 0.921411 | 0.855172 | 5 |
1 | -0.045473 | 2.788140 | 2.877576 | 0.215871 | 0.066918 | 3.723036 | -0.676680 | 2.660169 | 1.155078 | 1.836417 | ... | 0.803771 | 2.806482 | 3.646644 | 0.008963 | 5.400050 | 3.804687 | 4.651396 | 0.073775 | 0.168840 | 5 |
2 | -1.712185 | 2.780299 | 3.217684 | 0.713956 | 0.838745 | 3.194167 | -2.385246 | 3.304605 | 0.312159 | 0.852291 | ... | -1.875952 | 4.266674 | 3.311602 | 0.351215 | 7.241129 | 2.389160 | -1.715098 | 1.386217 | 0.387089 | 5 |
3 | -0.419863 | 3.237882 | 4.196849 | 0.854662 | 0.371440 | 3.626434 | -0.722377 | 4.006264 | 0.795518 | 1.193159 | ... | 2.373420 | 5.088340 | 3.918420 | 0.855602 | 4.837012 | 3.568480 | 2.502341 | 1.444522 | 0.041461 | 6 |
4 | -0.307260 | 0.258421 | 0.198612 | 0.415486 | 0.855955 | 0.366932 | -1.100628 | 0.366125 | 0.374110 | 8.471863 | ... | -1.700884 | 0.870573 | 0.284465 | -0.420926 | -1.540604 | 0.632883 | -0.794523 | 1.079984 | 0.369052 | 4 |
5 | 1.183370 | 0.334783 | 0.099776 | 0.135584 | 0.052050 | -0.169885 | 1.143624 | -0.065276 | 1.179131 | 3.918653 | ... | 1.015351 | -1.304507 | 0.880431 | 0.184012 | 1.490661 | 1.363457 | 1.483631 | -0.193233 | 0.949995 | 6 |
6 | -0.244344 | 0.177226 | 0.148857 | 0.541161 | 0.955383 | -0.035611 | -0.312978 | 0.215212 | 0.274592 | 3.001541 | ... | -0.850271 | 1.416221 | 0.952724 | 0.881361 | -0.589111 | -0.506174 | -3.405627 | 1.186127 | 0.442047 | 5 |
7 | -0.758581 | 0.209856 | -0.118122 | 0.756227 | 0.963111 | 2.267244 | -1.959541 | -0.130295 | 1.035297 | 5.330133 | ... | -2.358864 | 0.811680 | -0.539444 | 0.058470 | -0.112394 | 0.115939 | -0.004706 | 1.010447 | -0.294211 | 5 |
8 | 0.509738 | 0.796781 | 0.694178 | 1.159435 | 0.109525 | -1.442529 | 0.167842 | 0.052686 | 0.576750 | 0.906169 | ... | -0.127120 | -1.081137 | 0.636389 | -0.620195 | 0.196229 | -0.173579 | 0.287197 | 0.152346 | 0.567403 | 5 |
9 | -1.105694 | 0.058315 | 0.257915 | 0.983759 | 0.067465 | -1.020146 | -1.000149 | 0.457775 | 0.161189 | 1.858575 | ... | -2.365750 | 0.224420 | 0.127513 | -0.218428 | -0.031064 | -0.597352 | -4.039884 | 0.540300 | -0.214238 | 6 |
10 | 0.517795 | 0.657187 | 0.958631 | 0.959237 | 0.406489 | 0.863070 | 0.192019 | 1.013132 | 0.678814 | 3.641739 | ... | 0.724614 | 0.734983 | 0.864792 | -0.310225 | 1.169010 | 1.584385 | 2.394734 | 0.682109 | -0.037966 | 7 |
11 | -0.216544 | 2.487724 | 3.757297 | 0.833667 | 0.246426 | 1.869204 | -0.276657 | 4.246059 | 0.051775 | 3.213570 | ... | 0.239199 | 3.715593 | 3.536151 | 0.390983 | 5.636058 | 2.648358 | -0.787942 | 0.261716 | 0.134986 | 5 |
12 | -0.030823 | 1.669713 | 1.449701 | 0.308632 | 0.870826 | 0.965102 | -1.088238 | 1.160680 | 0.292854 | 1.508543 | ... | -0.267321 | 1.008892 | 1.009107 | 0.013575 | 3.137530 | 1.605813 | 2.328965 | -0.173506 | 0.119533 | 8 |
13 | 2.532512 | 0.663371 | 1.040911 | 0.532441 | 0.930141 | -1.942990 | 2.524843 | 0.462912 | 0.277203 | 2.694953 | ... | 3.339025 | -1.467694 | 0.363239 | 0.294848 | 0.799586 | -0.071826 | -1.295806 | -0.092188 | 1.515673 | 7 |
14 | -0.927300 | 2.290394 | 2.567119 | -0.003484 | 0.587684 | 3.771613 | -1.158052 | 3.286506 | 0.637932 | 1.149233 | ... | -0.688389 | 3.786359 | 3.192368 | 0.088963 | 1.101538 | 1.706938 | -1.852492 | 0.988406 | 0.120376 | 6 |
15 | -0.805252 | -1.438636 | -1.689252 | 0.608807 | 0.141400 | 0.898329 | -1.093433 | -1.280877 | 0.571205 | 2.088654 | ... | -2.408880 | -0.528962 | -1.275497 | -0.249468 | -2.296349 | -1.150133 | 1.029836 | 1.282106 | 0.137737 | 5 |
16 | 3.707683 | 0.207071 | 0.947326 | 0.778530 | 0.536602 | -0.319422 | 4.828039 | 0.798528 | 1.612137 | 5.169873 | ... | 6.808884 | -0.867807 | 0.630836 | 2.098455 | -0.823225 | 1.941429 | 6.819429 | -0.218895 | 1.198769 | 7 |
17 | 0.549017 | 1.362632 | 1.238093 | 0.097687 | 0.413920 | -1.666465 | 0.549350 | 1.630658 | 0.052506 | 5.341274 | ... | 0.923685 | -0.179301 | 0.986284 | -0.060968 | 2.808849 | 0.024984 | -3.028244 | -0.603218 | 0.089566 | 6 |
18 | -1.256569 | -2.564801 | -2.550288 | 0.299483 | 0.820501 | -0.709686 | -0.681529 | -2.672096 | 0.472935 | 1.443661 | ... | -2.808752 | -2.298254 | -2.942024 | 0.096998 | -4.577862 | -2.510457 | 2.660393 | 0.760032 | -0.497542 | 5 |
19 | -0.935267 | 1.413965 | 2.034552 | 0.161635 | 0.295060 | 3.336941 | -2.246983 | 1.608296 | 0.469460 | 5.965373 | ... | -1.517290 | 3.610432 | 1.835860 | 0.299163 | 2.466863 | 0.821008 | -0.571721 | 1.643549 | 0.692445 | 6 |
20 | -0.678544 | 1.209503 | 2.059097 | 0.123954 | 0.808909 | 0.918322 | -0.955396 | 1.369504 | -0.071198 | 3.310036 | ... | 0.261020 | 1.919125 | 0.919221 | 0.179152 | 2.786442 | 0.826415 | -0.812206 | 0.495839 | 0.094861 | 5 |
21 | 0.117119 | 1.616873 | 2.945610 | 0.803248 | 0.203891 | 0.532842 | -0.183320 | 2.481310 | 0.361501 | 0.862744 | ... | 3.279540 | 2.511152 | 2.188161 | -0.349228 | -0.099568 | 3.831945 | 6.926805 | 0.011939 | 0.154438 | 5 |
22 | 2.156631 | 0.762914 | 0.756830 | 0.625000 | 0.406208 | -2.475535 | 3.567960 | 0.954996 | 0.134723 | 3.315622 | ... | 3.434240 | -0.950215 | 0.542298 | 1.159385 | 1.573037 | 0.415830 | -1.968778 | -0.136317 | 1.362478 | 6 |
23 | 0.118243 | 0.882021 | 0.305616 | 0.627140 | 0.882556 | -1.322782 | 0.801350 | 0.289063 | 0.831394 | 0.951858 | ... | 0.952706 | -0.904282 | 0.379914 | -0.526693 | -1.330874 | 0.319565 | 1.609707 | -0.340182 | 0.282026 | 7 |
24 | -1.777068 | 0.290613 | 0.681867 | 0.174307 | 0.750957 | -1.773042 | -2.131557 | 0.046237 | 0.394325 | 4.272851 | ... | -2.830815 | -0.192334 | -0.064730 | -0.987951 | -1.618146 | -0.786654 | -2.991774 | 0.589283 | 0.140826 | 4 |
25 | -0.861646 | 2.328985 | 2.566253 | 0.690703 | 0.809659 | 3.626765 | -0.835995 | 3.659660 | 0.519721 | 0.774169 | ... | -0.151479 | 4.178866 | 3.252275 | 0.814671 | 4.614573 | 2.509126 | -1.425625 | 0.596027 | 0.418696 | 7 |
26 | 2.037610 | -0.473717 | -1.471609 | 0.762277 | 0.957321 | -3.351200 | 1.802759 | -1.565907 | 0.543127 | 3.446179 | ... | 0.910942 | -2.667700 | -1.001588 | 0.119780 | -1.647659 | -2.282807 | -2.464412 | -0.197085 | 1.139601 | 4 |
27 | 1.859263 | -1.699892 | -2.522410 | 0.484512 | 0.969631 | 2.367803 | 2.496390 | -2.986239 | 0.572276 | 3.952370 | ... | 0.139259 | -1.258217 | -1.781532 | 1.921199 | -4.517802 | -2.037521 | 2.060177 | 1.528924 | 0.537580 | 7 |
28 | 0.713291 | -1.411833 | -1.958340 | 0.584091 | 0.138193 | 2.690837 | 1.085428 | -2.659101 | 0.780953 | 1.850247 | ... | -0.287627 | -0.207652 | -1.391198 | 1.812283 | -2.077882 | -0.555998 | 3.625295 | 2.178251 | 0.108438 | 5 |
29 | -0.131261 | 0.272915 | 1.232472 | 0.793575 | 0.021082 | 0.624774 | -0.221999 | 0.672610 | 0.769102 | 0.853623 | ... | 0.172306 | 0.711089 | 0.874524 | 0.551561 | 4.382707 | -0.113409 | -2.353924 | 0.743680 | 0.779778 | 6 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5817 | 1.684336 | 0.148344 | -0.638565 | 0.346559 | 0.262823 | -1.177912 | 2.383119 | -0.389892 | 0.746837 | 0.739020 | ... | 1.918008 | -0.713744 | 0.193917 | 0.957180 | -0.274454 | 0.007778 | -0.517266 | -0.298535 | 1.256392 | 6 |
5818 | -1.339294 | 3.050986 | 4.393677 | 0.151229 | 0.520696 | 4.030173 | -1.911230 | 4.799026 | 0.538913 | 1.296810 | ... | 0.166820 | 5.827812 | 4.308435 | 0.429215 | 5.217967 | 3.736739 | -1.211057 | 1.116808 | 0.209065 | 7 |
5819 | -0.775040 | 0.369025 | -0.125675 | 0.406338 | 0.593824 | 0.468962 | -1.622197 | 0.336658 | 0.520481 | 1.432850 | ... | -2.062937 | 1.211595 | 0.423135 | 0.173661 | 0.682381 | 0.177941 | -0.534050 | 0.728148 | -0.063169 | 6 |
5820 | -0.969049 | 2.183934 | 3.320431 | 0.166064 | 0.289940 | 4.262753 | -1.416369 | 3.461153 | 0.611477 | 0.533747 | ... | -1.328558 | 4.308086 | 3.342853 | 0.765311 | 3.356561 | 2.146287 | -1.664236 | 1.089743 | 0.829043 | 6 |
5821 | -0.549015 | 1.905231 | 2.171229 | 0.164107 | 0.256748 | 2.720944 | -0.684559 | 2.581841 | 0.125723 | 2.223438 | ... | 1.666210 | 3.606771 | 2.610019 | 0.835962 | 4.155324 | 2.963773 | 3.502350 | 1.233858 | 0.992361 | 6 |
5822 | -0.184794 | 3.401827 | 4.012294 | 0.800918 | 0.763852 | 5.524412 | -1.069056 | 4.502148 | 1.504858 | 2.624384 | ... | 0.237726 | 5.634735 | 4.819634 | 0.408350 | 7.521802 | 4.243802 | 1.657415 | 1.061499 | 0.262839 | 6 |
5823 | 0.837030 | 0.362535 | 0.899495 | -0.013025 | 0.066164 | -0.943357 | 0.403683 | -0.161115 | 0.249230 | 2.731013 | ... | 1.486147 | -0.674847 | 0.033081 | -0.275181 | 0.129740 | 0.462483 | 1.559184 | -0.161609 | 0.038261 | 6 |
5824 | 0.553997 | -1.417418 | -2.868272 | 0.848740 | 0.436089 | 1.799074 | 0.665291 | -2.862720 | 0.678211 | 1.837529 | ... | -2.008403 | -1.268380 | -1.966829 | 1.542900 | -4.556147 | -2.065627 | 0.527535 | 1.727122 | 0.591156 | 5 |
5825 | 3.573800 | 0.159316 | 0.220005 | 0.894024 | 0.044911 | -3.571741 | 3.995029 | 0.275360 | 0.193808 | 1.766974 | ... | 4.606328 | -1.505350 | 0.545157 | 1.146069 | 3.102784 | -0.556956 | -3.288948 | -0.527583 | 1.023172 | 7 |
5826 | -1.013447 | -2.321277 | -3.017300 | 0.333081 | 0.290490 | 2.441209 | -1.583040 | -3.613429 | 0.914697 | 2.074566 | ... | -3.155182 | -1.892768 | -3.156555 | 0.146704 | -4.097368 | -1.584924 | 4.986059 | 1.345938 | -0.165180 | 5 |
5827 | 0.306293 | 0.913355 | 0.920927 | 0.928401 | 0.362644 | 0.859068 | 0.562698 | 0.804431 | 0.292529 | 4.605179 | ... | 1.242005 | 2.398811 | 0.928890 | 1.253982 | 3.264975 | 0.055238 | -2.171233 | 1.175908 | 0.311707 | 5 |
5828 | -0.668103 | 1.948764 | 2.007476 | 0.443412 | 0.446517 | 0.639765 | -0.976233 | 2.694725 | 0.759375 | 6.267733 | ... | -0.399241 | 2.747313 | 2.283765 | -0.225974 | 2.812130 | 1.361302 | -1.749146 | 0.979406 | -0.334729 | 6 |
5829 | 0.724978 | 3.218822 | 3.754466 | 0.096578 | 0.045546 | 1.982709 | 0.008561 | 4.284980 | 0.166749 | 7.317244 | ... | 2.158178 | 4.596267 | 3.876167 | 0.332410 | 3.243396 | 3.449588 | 1.030720 | 0.915898 | 0.995796 | 6 |
5830 | -0.970513 | 0.752263 | 0.682730 | 0.731552 | 0.800870 | 0.183587 | -1.139633 | 1.287314 | -0.407099 | 2.622624 | ... | -0.680387 | 1.002067 | 0.640098 | -0.069868 | -1.363895 | -0.784909 | -2.967880 | 1.188553 | 0.746109 | 5 |
5831 | 3.853778 | 0.323418 | -0.636761 | 0.564543 | 0.051277 | 6.295425 | 4.050108 | -0.912915 | 1.350268 | 3.435931 | ... | 3.513643 | 1.584078 | 0.415694 | 4.256205 | -1.530000 | 1.339207 | 4.001286 | 2.938228 | 1.932350 | 5 |
5832 | -0.096259 | 2.470160 | 3.236854 | 0.349927 | 0.656872 | 0.968289 | -0.254539 | 2.636414 | 0.188441 | 4.112849 | ... | 0.768548 | 2.392730 | 2.509947 | -0.069369 | 2.103849 | 1.872789 | -0.844271 | 0.348204 | -0.240732 | 5 |
5833 | -0.340376 | -0.600902 | -2.255388 | 0.667252 | 0.301831 | 2.615126 | -0.671852 | -1.725816 | 0.908508 | 1.598279 | ... | -2.356687 | -0.104468 | -1.342173 | 0.259757 | -1.120634 | -1.024434 | -0.054126 | 1.700223 | 0.008373 | 4 |
5834 | -0.507955 | 4.235877 | 4.756860 | 0.879236 | 0.004114 | 4.523185 | -0.751009 | 5.430181 | 0.453104 | 2.154023 | ... | 1.927748 | 6.751035 | 4.520947 | 0.792219 | 5.435910 | 5.531239 | 2.973025 | 1.533935 | 0.012274 | 6 |
5835 | 2.317845 | -0.094592 | -0.251396 | 0.524351 | 0.253310 | -0.756686 | 3.756340 | 0.346190 | 0.976396 | 1.309117 | ... | 3.156594 | -0.757255 | 0.138546 | 1.589919 | -1.284629 | 1.153767 | 2.314617 | -0.137656 | 0.994792 | 7 |
5836 | -0.593844 | -2.465929 | -2.725095 | 0.662868 | 0.077390 | -0.039329 | -1.218243 | -2.512400 | 0.880912 | 3.538369 | ... | -2.166060 | -1.527828 | -2.954220 | 0.477999 | -3.874563 | -1.447480 | 2.658658 | 1.070658 | 0.016114 | 5 |
5837 | 2.896888 | -0.468643 | -0.597451 | -0.060037 | 0.605553 | -2.372631 | 3.662627 | -0.258713 | 0.301636 | 0.469918 | ... | 3.485849 | -2.369108 | -0.926800 | 1.759057 | -1.818055 | -0.612234 | 0.185349 | 0.119998 | 1.193040 | 6 |
5838 | -0.973390 | 0.846689 | 1.501724 | 0.749495 | 0.430618 | 2.397338 | -2.322811 | 0.863082 | 0.148679 | 1.922318 | ... | -3.389113 | 2.885768 | 0.906865 | -0.245648 | -0.103736 | -0.134827 | -4.735280 | 1.091735 | 0.286391 | 7 |
5839 | -0.970243 | -1.451962 | -2.427624 | 0.451720 | 0.451164 | 0.728259 | -0.952172 | -2.435741 | 0.097194 | 0.905717 | ... | -2.353980 | -1.560815 | -2.883206 | -0.016729 | -4.260586 | -1.848093 | 1.653681 | 0.807726 | 0.135076 | 5 |
5840 | -1.081001 | -0.290023 | -0.472889 | 0.662982 | 0.584349 | -1.689980 | -1.338542 | 0.321087 | 0.425478 | 6.158132 | ... | -2.709867 | -0.594374 | -0.429635 | -0.499436 | -1.682714 | -1.671447 | -4.122221 | 0.319736 | -0.195001 | 6 |
5841 | 0.742556 | 2.129944 | 1.785111 | 0.586710 | 0.636589 | -1.007158 | 1.727429 | 2.054153 | 0.916939 | 3.965481 | ... | 1.940891 | 1.026992 | 1.698250 | 0.183743 | 5.725875 | 1.926919 | -1.491753 | -0.188857 | 0.656961 | 6 |
5842 | 2.862729 | 1.558536 | 1.367980 | 0.151518 | 0.457180 | -2.143215 | 2.679898 | 1.907546 | 0.103726 | 3.200558 | ... | 3.821308 | -0.535265 | 1.849478 | 0.905898 | 2.039208 | 1.480508 | -0.336158 | -0.409510 | 0.621124 | 6 |
5843 | 0.299162 | -0.072308 | -0.738608 | 0.917164 | 0.497839 | 0.316393 | 0.709187 | -0.398455 | 0.773037 | 2.731865 | ... | -1.170987 | -0.113737 | -0.078206 | 0.350289 | -0.865735 | -1.029711 | -3.015142 | 1.001908 | 1.101463 | 6 |
5844 | 3.180215 | 0.056742 | -0.012276 | 0.152967 | 0.458279 | -1.674190 | 3.883508 | -0.400242 | 0.502771 | 4.632986 | ... | 3.746248 | -2.272361 | 0.038005 | 0.689289 | 0.593933 | -0.256932 | 1.852634 | -0.285590 | 0.917205 | 7 |
5845 | 1.337500 | -0.824857 | -0.434262 | 0.047159 | 0.656366 | -3.105874 | 1.110251 | -0.623407 | -0.016263 | 4.013435 | ... | -0.517778 | -2.311745 | -0.442607 | 0.192987 | -0.542765 | -1.897293 | -3.731458 | -0.866768 | 0.282215 | 6 |
5846 | 1.953950 | -1.253213 | -1.125297 | 1.012160 | 0.229267 | 2.976473 | 1.813165 | -1.079942 | 1.188837 | 5.262199 | ... | 2.206069 | 0.799553 | -0.607373 | 1.879514 | -1.655481 | 0.024347 | 4.908465 | 1.709886 | 1.196523 | 7 |
5847 rows × 224 columns
df.quality.value_counts()
6 2544 5 1919 7 986 4 191 8 176 3 26 9 5 Name: quality, dtype: int64
df = df.drop(df[df.quality == 9 ].index)
df = df.drop(df[df.quality == 3 ].index)
df
name__alcohol_chlorides_citric acid_residual sugar | name__total sulfur dioxide_citric acid | name__total sulfur dioxide_chlorides | name__citric acid_chlorides | elem_0 | name__density_sulphates_fixed acidity_total sulfur dioxide | name__citric acid_alcohol | name__volatile acidity_residual sugar_citric acid_total sulfur dioxide | name__residual sugar_sulphates | elem1_0 | ... | name__total sulfur dioxide_volatile acidity_pH_alcohol | name__density_total sulfur dioxide | name__total sulfur dioxide_citric acid_residual sugar_sulphates | name__density_residual sugar_alcohol_citric acid | name__free sulfur dioxide_total sulfur dioxide | name__total sulfur dioxide_sulphates_fixed acidity_pH | name__citric acid_sulphates_fixed acidity_pH | name__chlorides_fixed acidity_density_volatile acidity | name__volatile acidity_alcohol_citric acid_residual sugar | quality | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.227592 | 1.558518 | 1.664460 | -0.027473 | 0.330031 | 0.425976 | 0.135259 | 1.938229 | -0.438415 | 2.641077 | ... | 0.434843 | 1.469472 | 1.606509 | 0.237339 | 0.973398 | 0.594251 | -2.123956 | 0.921411 | 0.855172 | 5 |
1 | -0.045473 | 2.788140 | 2.877576 | 0.215871 | 0.066918 | 3.723036 | -0.676680 | 2.660169 | 1.155078 | 1.836417 | ... | 0.803771 | 2.806482 | 3.646644 | 0.008963 | 5.400050 | 3.804687 | 4.651396 | 0.073775 | 0.168840 | 5 |
2 | -1.712185 | 2.780299 | 3.217684 | 0.713956 | 0.838745 | 3.194167 | -2.385246 | 3.304605 | 0.312159 | 0.852291 | ... | -1.875952 | 4.266674 | 3.311602 | 0.351215 | 7.241129 | 2.389160 | -1.715098 | 1.386217 | 0.387089 | 5 |
3 | -0.419863 | 3.237882 | 4.196849 | 0.854662 | 0.371440 | 3.626434 | -0.722377 | 4.006264 | 0.795518 | 1.193159 | ... | 2.373420 | 5.088340 | 3.918420 | 0.855602 | 4.837012 | 3.568480 | 2.502341 | 1.444522 | 0.041461 | 6 |
4 | -0.307260 | 0.258421 | 0.198612 | 0.415486 | 0.855955 | 0.366932 | -1.100628 | 0.366125 | 0.374110 | 8.471863 | ... | -1.700884 | 0.870573 | 0.284465 | -0.420926 | -1.540604 | 0.632883 | -0.794523 | 1.079984 | 0.369052 | 4 |
5 | 1.183370 | 0.334783 | 0.099776 | 0.135584 | 0.052050 | -0.169885 | 1.143624 | -0.065276 | 1.179131 | 3.918653 | ... | 1.015351 | -1.304507 | 0.880431 | 0.184012 | 1.490661 | 1.363457 | 1.483631 | -0.193233 | 0.949995 | 6 |
6 | -0.244344 | 0.177226 | 0.148857 | 0.541161 | 0.955383 | -0.035611 | -0.312978 | 0.215212 | 0.274592 | 3.001541 | ... | -0.850271 | 1.416221 | 0.952724 | 0.881361 | -0.589111 | -0.506174 | -3.405627 | 1.186127 | 0.442047 | 5 |
7 | -0.758581 | 0.209856 | -0.118122 | 0.756227 | 0.963111 | 2.267244 | -1.959541 | -0.130295 | 1.035297 | 5.330133 | ... | -2.358864 | 0.811680 | -0.539444 | 0.058470 | -0.112394 | 0.115939 | -0.004706 | 1.010447 | -0.294211 | 5 |
8 | 0.509738 | 0.796781 | 0.694178 | 1.159435 | 0.109525 | -1.442529 | 0.167842 | 0.052686 | 0.576750 | 0.906169 | ... | -0.127120 | -1.081137 | 0.636389 | -0.620195 | 0.196229 | -0.173579 | 0.287197 | 0.152346 | 0.567403 | 5 |
9 | -1.105694 | 0.058315 | 0.257915 | 0.983759 | 0.067465 | -1.020146 | -1.000149 | 0.457775 | 0.161189 | 1.858575 | ... | -2.365750 | 0.224420 | 0.127513 | -0.218428 | -0.031064 | -0.597352 | -4.039884 | 0.540300 | -0.214238 | 6 |
10 | 0.517795 | 0.657187 | 0.958631 | 0.959237 | 0.406489 | 0.863070 | 0.192019 | 1.013132 | 0.678814 | 3.641739 | ... | 0.724614 | 0.734983 | 0.864792 | -0.310225 | 1.169010 | 1.584385 | 2.394734 | 0.682109 | -0.037966 | 7 |
11 | -0.216544 | 2.487724 | 3.757297 | 0.833667 | 0.246426 | 1.869204 | -0.276657 | 4.246059 | 0.051775 | 3.213570 | ... | 0.239199 | 3.715593 | 3.536151 | 0.390983 | 5.636058 | 2.648358 | -0.787942 | 0.261716 | 0.134986 | 5 |
12 | -0.030823 | 1.669713 | 1.449701 | 0.308632 | 0.870826 | 0.965102 | -1.088238 | 1.160680 | 0.292854 | 1.508543 | ... | -0.267321 | 1.008892 | 1.009107 | 0.013575 | 3.137530 | 1.605813 | 2.328965 | -0.173506 | 0.119533 | 8 |
13 | 2.532512 | 0.663371 | 1.040911 | 0.532441 | 0.930141 | -1.942990 | 2.524843 | 0.462912 | 0.277203 | 2.694953 | ... | 3.339025 | -1.467694 | 0.363239 | 0.294848 | 0.799586 | -0.071826 | -1.295806 | -0.092188 | 1.515673 | 7 |
14 | -0.927300 | 2.290394 | 2.567119 | -0.003484 | 0.587684 | 3.771613 | -1.158052 | 3.286506 | 0.637932 | 1.149233 | ... | -0.688389 | 3.786359 | 3.192368 | 0.088963 | 1.101538 | 1.706938 | -1.852492 | 0.988406 | 0.120376 | 6 |
15 | -0.805252 | -1.438636 | -1.689252 | 0.608807 | 0.141400 | 0.898329 | -1.093433 | -1.280877 | 0.571205 | 2.088654 | ... | -2.408880 | -0.528962 | -1.275497 | -0.249468 | -2.296349 | -1.150133 | 1.029836 | 1.282106 | 0.137737 | 5 |
16 | 3.707683 | 0.207071 | 0.947326 | 0.778530 | 0.536602 | -0.319422 | 4.828039 | 0.798528 | 1.612137 | 5.169873 | ... | 6.808884 | -0.867807 | 0.630836 | 2.098455 | -0.823225 | 1.941429 | 6.819429 | -0.218895 | 1.198769 | 7 |
17 | 0.549017 | 1.362632 | 1.238093 | 0.097687 | 0.413920 | -1.666465 | 0.549350 | 1.630658 | 0.052506 | 5.341274 | ... | 0.923685 | -0.179301 | 0.986284 | -0.060968 | 2.808849 | 0.024984 | -3.028244 | -0.603218 | 0.089566 | 6 |
18 | -1.256569 | -2.564801 | -2.550288 | 0.299483 | 0.820501 | -0.709686 | -0.681529 | -2.672096 | 0.472935 | 1.443661 | ... | -2.808752 | -2.298254 | -2.942024 | 0.096998 | -4.577862 | -2.510457 | 2.660393 | 0.760032 | -0.497542 | 5 |
19 | -0.935267 | 1.413965 | 2.034552 | 0.161635 | 0.295060 | 3.336941 | -2.246983 | 1.608296 | 0.469460 | 5.965373 | ... | -1.517290 | 3.610432 | 1.835860 | 0.299163 | 2.466863 | 0.821008 | -0.571721 | 1.643549 | 0.692445 | 6 |
20 | -0.678544 | 1.209503 | 2.059097 | 0.123954 | 0.808909 | 0.918322 | -0.955396 | 1.369504 | -0.071198 | 3.310036 | ... | 0.261020 | 1.919125 | 0.919221 | 0.179152 | 2.786442 | 0.826415 | -0.812206 | 0.495839 | 0.094861 | 5 |
21 | 0.117119 | 1.616873 | 2.945610 | 0.803248 | 0.203891 | 0.532842 | -0.183320 | 2.481310 | 0.361501 | 0.862744 | ... | 3.279540 | 2.511152 | 2.188161 | -0.349228 | -0.099568 | 3.831945 | 6.926805 | 0.011939 | 0.154438 | 5 |
22 | 2.156631 | 0.762914 | 0.756830 | 0.625000 | 0.406208 | -2.475535 | 3.567960 | 0.954996 | 0.134723 | 3.315622 | ... | 3.434240 | -0.950215 | 0.542298 | 1.159385 | 1.573037 | 0.415830 | -1.968778 | -0.136317 | 1.362478 | 6 |
23 | 0.118243 | 0.882021 | 0.305616 | 0.627140 | 0.882556 | -1.322782 | 0.801350 | 0.289063 | 0.831394 | 0.951858 | ... | 0.952706 | -0.904282 | 0.379914 | -0.526693 | -1.330874 | 0.319565 | 1.609707 | -0.340182 | 0.282026 | 7 |
24 | -1.777068 | 0.290613 | 0.681867 | 0.174307 | 0.750957 | -1.773042 | -2.131557 | 0.046237 | 0.394325 | 4.272851 | ... | -2.830815 | -0.192334 | -0.064730 | -0.987951 | -1.618146 | -0.786654 | -2.991774 | 0.589283 | 0.140826 | 4 |
25 | -0.861646 | 2.328985 | 2.566253 | 0.690703 | 0.809659 | 3.626765 | -0.835995 | 3.659660 | 0.519721 | 0.774169 | ... | -0.151479 | 4.178866 | 3.252275 | 0.814671 | 4.614573 | 2.509126 | -1.425625 | 0.596027 | 0.418696 | 7 |
26 | 2.037610 | -0.473717 | -1.471609 | 0.762277 | 0.957321 | -3.351200 | 1.802759 | -1.565907 | 0.543127 | 3.446179 | ... | 0.910942 | -2.667700 | -1.001588 | 0.119780 | -1.647659 | -2.282807 | -2.464412 | -0.197085 | 1.139601 | 4 |
27 | 1.859263 | -1.699892 | -2.522410 | 0.484512 | 0.969631 | 2.367803 | 2.496390 | -2.986239 | 0.572276 | 3.952370 | ... | 0.139259 | -1.258217 | -1.781532 | 1.921199 | -4.517802 | -2.037521 | 2.060177 | 1.528924 | 0.537580 | 7 |
28 | 0.713291 | -1.411833 | -1.958340 | 0.584091 | 0.138193 | 2.690837 | 1.085428 | -2.659101 | 0.780953 | 1.850247 | ... | -0.287627 | -0.207652 | -1.391198 | 1.812283 | -2.077882 | -0.555998 | 3.625295 | 2.178251 | 0.108438 | 5 |
29 | -0.131261 | 0.272915 | 1.232472 | 0.793575 | 0.021082 | 0.624774 | -0.221999 | 0.672610 | 0.769102 | 0.853623 | ... | 0.172306 | 0.711089 | 0.874524 | 0.551561 | 4.382707 | -0.113409 | -2.353924 | 0.743680 | 0.779778 | 6 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5817 | 1.684336 | 0.148344 | -0.638565 | 0.346559 | 0.262823 | -1.177912 | 2.383119 | -0.389892 | 0.746837 | 0.739020 | ... | 1.918008 | -0.713744 | 0.193917 | 0.957180 | -0.274454 | 0.007778 | -0.517266 | -0.298535 | 1.256392 | 6 |
5818 | -1.339294 | 3.050986 | 4.393677 | 0.151229 | 0.520696 | 4.030173 | -1.911230 | 4.799026 | 0.538913 | 1.296810 | ... | 0.166820 | 5.827812 | 4.308435 | 0.429215 | 5.217967 | 3.736739 | -1.211057 | 1.116808 | 0.209065 | 7 |
5819 | -0.775040 | 0.369025 | -0.125675 | 0.406338 | 0.593824 | 0.468962 | -1.622197 | 0.336658 | 0.520481 | 1.432850 | ... | -2.062937 | 1.211595 | 0.423135 | 0.173661 | 0.682381 | 0.177941 | -0.534050 | 0.728148 | -0.063169 | 6 |
5820 | -0.969049 | 2.183934 | 3.320431 | 0.166064 | 0.289940 | 4.262753 | -1.416369 | 3.461153 | 0.611477 | 0.533747 | ... | -1.328558 | 4.308086 | 3.342853 | 0.765311 | 3.356561 | 2.146287 | -1.664236 | 1.089743 | 0.829043 | 6 |
5821 | -0.549015 | 1.905231 | 2.171229 | 0.164107 | 0.256748 | 2.720944 | -0.684559 | 2.581841 | 0.125723 | 2.223438 | ... | 1.666210 | 3.606771 | 2.610019 | 0.835962 | 4.155324 | 2.963773 | 3.502350 | 1.233858 | 0.992361 | 6 |
5822 | -0.184794 | 3.401827 | 4.012294 | 0.800918 | 0.763852 | 5.524412 | -1.069056 | 4.502148 | 1.504858 | 2.624384 | ... | 0.237726 | 5.634735 | 4.819634 | 0.408350 | 7.521802 | 4.243802 | 1.657415 | 1.061499 | 0.262839 | 6 |
5823 | 0.837030 | 0.362535 | 0.899495 | -0.013025 | 0.066164 | -0.943357 | 0.403683 | -0.161115 | 0.249230 | 2.731013 | ... | 1.486147 | -0.674847 | 0.033081 | -0.275181 | 0.129740 | 0.462483 | 1.559184 | -0.161609 | 0.038261 | 6 |
5824 | 0.553997 | -1.417418 | -2.868272 | 0.848740 | 0.436089 | 1.799074 | 0.665291 | -2.862720 | 0.678211 | 1.837529 | ... | -2.008403 | -1.268380 | -1.966829 | 1.542900 | -4.556147 | -2.065627 | 0.527535 | 1.727122 | 0.591156 | 5 |
5825 | 3.573800 | 0.159316 | 0.220005 | 0.894024 | 0.044911 | -3.571741 | 3.995029 | 0.275360 | 0.193808 | 1.766974 | ... | 4.606328 | -1.505350 | 0.545157 | 1.146069 | 3.102784 | -0.556956 | -3.288948 | -0.527583 | 1.023172 | 7 |
5826 | -1.013447 | -2.321277 | -3.017300 | 0.333081 | 0.290490 | 2.441209 | -1.583040 | -3.613429 | 0.914697 | 2.074566 | ... | -3.155182 | -1.892768 | -3.156555 | 0.146704 | -4.097368 | -1.584924 | 4.986059 | 1.345938 | -0.165180 | 5 |
5827 | 0.306293 | 0.913355 | 0.920927 | 0.928401 | 0.362644 | 0.859068 | 0.562698 | 0.804431 | 0.292529 | 4.605179 | ... | 1.242005 | 2.398811 | 0.928890 | 1.253982 | 3.264975 | 0.055238 | -2.171233 | 1.175908 | 0.311707 | 5 |
5828 | -0.668103 | 1.948764 | 2.007476 | 0.443412 | 0.446517 | 0.639765 | -0.976233 | 2.694725 | 0.759375 | 6.267733 | ... | -0.399241 | 2.747313 | 2.283765 | -0.225974 | 2.812130 | 1.361302 | -1.749146 | 0.979406 | -0.334729 | 6 |
5829 | 0.724978 | 3.218822 | 3.754466 | 0.096578 | 0.045546 | 1.982709 | 0.008561 | 4.284980 | 0.166749 | 7.317244 | ... | 2.158178 | 4.596267 | 3.876167 | 0.332410 | 3.243396 | 3.449588 | 1.030720 | 0.915898 | 0.995796 | 6 |
5830 | -0.970513 | 0.752263 | 0.682730 | 0.731552 | 0.800870 | 0.183587 | -1.139633 | 1.287314 | -0.407099 | 2.622624 | ... | -0.680387 | 1.002067 | 0.640098 | -0.069868 | -1.363895 | -0.784909 | -2.967880 | 1.188553 | 0.746109 | 5 |
5831 | 3.853778 | 0.323418 | -0.636761 | 0.564543 | 0.051277 | 6.295425 | 4.050108 | -0.912915 | 1.350268 | 3.435931 | ... | 3.513643 | 1.584078 | 0.415694 | 4.256205 | -1.530000 | 1.339207 | 4.001286 | 2.938228 | 1.932350 | 5 |
5832 | -0.096259 | 2.470160 | 3.236854 | 0.349927 | 0.656872 | 0.968289 | -0.254539 | 2.636414 | 0.188441 | 4.112849 | ... | 0.768548 | 2.392730 | 2.509947 | -0.069369 | 2.103849 | 1.872789 | -0.844271 | 0.348204 | -0.240732 | 5 |
5833 | -0.340376 | -0.600902 | -2.255388 | 0.667252 | 0.301831 | 2.615126 | -0.671852 | -1.725816 | 0.908508 | 1.598279 | ... | -2.356687 | -0.104468 | -1.342173 | 0.259757 | -1.120634 | -1.024434 | -0.054126 | 1.700223 | 0.008373 | 4 |
5834 | -0.507955 | 4.235877 | 4.756860 | 0.879236 | 0.004114 | 4.523185 | -0.751009 | 5.430181 | 0.453104 | 2.154023 | ... | 1.927748 | 6.751035 | 4.520947 | 0.792219 | 5.435910 | 5.531239 | 2.973025 | 1.533935 | 0.012274 | 6 |
5835 | 2.317845 | -0.094592 | -0.251396 | 0.524351 | 0.253310 | -0.756686 | 3.756340 | 0.346190 | 0.976396 | 1.309117 | ... | 3.156594 | -0.757255 | 0.138546 | 1.589919 | -1.284629 | 1.153767 | 2.314617 | -0.137656 | 0.994792 | 7 |
5836 | -0.593844 | -2.465929 | -2.725095 | 0.662868 | 0.077390 | -0.039329 | -1.218243 | -2.512400 | 0.880912 | 3.538369 | ... | -2.166060 | -1.527828 | -2.954220 | 0.477999 | -3.874563 | -1.447480 | 2.658658 | 1.070658 | 0.016114 | 5 |
5837 | 2.896888 | -0.468643 | -0.597451 | -0.060037 | 0.605553 | -2.372631 | 3.662627 | -0.258713 | 0.301636 | 0.469918 | ... | 3.485849 | -2.369108 | -0.926800 | 1.759057 | -1.818055 | -0.612234 | 0.185349 | 0.119998 | 1.193040 | 6 |
5838 | -0.973390 | 0.846689 | 1.501724 | 0.749495 | 0.430618 | 2.397338 | -2.322811 | 0.863082 | 0.148679 | 1.922318 | ... | -3.389113 | 2.885768 | 0.906865 | -0.245648 | -0.103736 | -0.134827 | -4.735280 | 1.091735 | 0.286391 | 7 |
5839 | -0.970243 | -1.451962 | -2.427624 | 0.451720 | 0.451164 | 0.728259 | -0.952172 | -2.435741 | 0.097194 | 0.905717 | ... | -2.353980 | -1.560815 | -2.883206 | -0.016729 | -4.260586 | -1.848093 | 1.653681 | 0.807726 | 0.135076 | 5 |
5840 | -1.081001 | -0.290023 | -0.472889 | 0.662982 | 0.584349 | -1.689980 | -1.338542 | 0.321087 | 0.425478 | 6.158132 | ... | -2.709867 | -0.594374 | -0.429635 | -0.499436 | -1.682714 | -1.671447 | -4.122221 | 0.319736 | -0.195001 | 6 |
5841 | 0.742556 | 2.129944 | 1.785111 | 0.586710 | 0.636589 | -1.007158 | 1.727429 | 2.054153 | 0.916939 | 3.965481 | ... | 1.940891 | 1.026992 | 1.698250 | 0.183743 | 5.725875 | 1.926919 | -1.491753 | -0.188857 | 0.656961 | 6 |
5842 | 2.862729 | 1.558536 | 1.367980 | 0.151518 | 0.457180 | -2.143215 | 2.679898 | 1.907546 | 0.103726 | 3.200558 | ... | 3.821308 | -0.535265 | 1.849478 | 0.905898 | 2.039208 | 1.480508 | -0.336158 | -0.409510 | 0.621124 | 6 |
5843 | 0.299162 | -0.072308 | -0.738608 | 0.917164 | 0.497839 | 0.316393 | 0.709187 | -0.398455 | 0.773037 | 2.731865 | ... | -1.170987 | -0.113737 | -0.078206 | 0.350289 | -0.865735 | -1.029711 | -3.015142 | 1.001908 | 1.101463 | 6 |
5844 | 3.180215 | 0.056742 | -0.012276 | 0.152967 | 0.458279 | -1.674190 | 3.883508 | -0.400242 | 0.502771 | 4.632986 | ... | 3.746248 | -2.272361 | 0.038005 | 0.689289 | 0.593933 | -0.256932 | 1.852634 | -0.285590 | 0.917205 | 7 |
5845 | 1.337500 | -0.824857 | -0.434262 | 0.047159 | 0.656366 | -3.105874 | 1.110251 | -0.623407 | -0.016263 | 4.013435 | ... | -0.517778 | -2.311745 | -0.442607 | 0.192987 | -0.542765 | -1.897293 | -3.731458 | -0.866768 | 0.282215 | 6 |
5846 | 1.953950 | -1.253213 | -1.125297 | 1.012160 | 0.229267 | 2.976473 | 1.813165 | -1.079942 | 1.188837 | 5.262199 | ... | 2.206069 | 0.799553 | -0.607373 | 1.879514 | -1.655481 | 0.024347 | 4.908465 | 1.709886 | 1.196523 | 7 |
5816 rows × 224 columns
df.quality.value_counts()
6 2544 5 1919 7 986 4 191 8 176 Name: quality, dtype: int64
(df[targetColumn].apply(lambda x : x-4)).value_counts()
2 2544 1 1919 3 986 0 191 4 176 Name: quality, dtype: int64
df[targetColumn] = df[targetColumn].apply(lambda x : x-4)
df
name__alcohol_chlorides_citric acid_residual sugar | name__total sulfur dioxide_citric acid | name__total sulfur dioxide_chlorides | name__citric acid_chlorides | elem_0 | name__density_sulphates_fixed acidity_total sulfur dioxide | name__citric acid_alcohol | name__volatile acidity_residual sugar_citric acid_total sulfur dioxide | name__residual sugar_sulphates | elem1_0 | ... | name__total sulfur dioxide_volatile acidity_pH_alcohol | name__density_total sulfur dioxide | name__total sulfur dioxide_citric acid_residual sugar_sulphates | name__density_residual sugar_alcohol_citric acid | name__free sulfur dioxide_total sulfur dioxide | name__total sulfur dioxide_sulphates_fixed acidity_pH | name__citric acid_sulphates_fixed acidity_pH | name__chlorides_fixed acidity_density_volatile acidity | name__volatile acidity_alcohol_citric acid_residual sugar | quality | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.227592 | 1.558518 | 1.664460 | -0.027473 | 0.330031 | 0.425976 | 0.135259 | 1.938229 | -0.438415 | 2.641077 | ... | 0.434843 | 1.469472 | 1.606509 | 0.237339 | 0.973398 | 0.594251 | -2.123956 | 0.921411 | 0.855172 | 1 |
1 | -0.045473 | 2.788140 | 2.877576 | 0.215871 | 0.066918 | 3.723036 | -0.676680 | 2.660169 | 1.155078 | 1.836417 | ... | 0.803771 | 2.806482 | 3.646644 | 0.008963 | 5.400050 | 3.804687 | 4.651396 | 0.073775 | 0.168840 | 1 |
2 | -1.712185 | 2.780299 | 3.217684 | 0.713956 | 0.838745 | 3.194167 | -2.385246 | 3.304605 | 0.312159 | 0.852291 | ... | -1.875952 | 4.266674 | 3.311602 | 0.351215 | 7.241129 | 2.389160 | -1.715098 | 1.386217 | 0.387089 | 1 |
3 | -0.419863 | 3.237882 | 4.196849 | 0.854662 | 0.371440 | 3.626434 | -0.722377 | 4.006264 | 0.795518 | 1.193159 | ... | 2.373420 | 5.088340 | 3.918420 | 0.855602 | 4.837012 | 3.568480 | 2.502341 | 1.444522 | 0.041461 | 2 |
4 | -0.307260 | 0.258421 | 0.198612 | 0.415486 | 0.855955 | 0.366932 | -1.100628 | 0.366125 | 0.374110 | 8.471863 | ... | -1.700884 | 0.870573 | 0.284465 | -0.420926 | -1.540604 | 0.632883 | -0.794523 | 1.079984 | 0.369052 | 0 |
5 | 1.183370 | 0.334783 | 0.099776 | 0.135584 | 0.052050 | -0.169885 | 1.143624 | -0.065276 | 1.179131 | 3.918653 | ... | 1.015351 | -1.304507 | 0.880431 | 0.184012 | 1.490661 | 1.363457 | 1.483631 | -0.193233 | 0.949995 | 2 |
6 | -0.244344 | 0.177226 | 0.148857 | 0.541161 | 0.955383 | -0.035611 | -0.312978 | 0.215212 | 0.274592 | 3.001541 | ... | -0.850271 | 1.416221 | 0.952724 | 0.881361 | -0.589111 | -0.506174 | -3.405627 | 1.186127 | 0.442047 | 1 |
7 | -0.758581 | 0.209856 | -0.118122 | 0.756227 | 0.963111 | 2.267244 | -1.959541 | -0.130295 | 1.035297 | 5.330133 | ... | -2.358864 | 0.811680 | -0.539444 | 0.058470 | -0.112394 | 0.115939 | -0.004706 | 1.010447 | -0.294211 | 1 |
8 | 0.509738 | 0.796781 | 0.694178 | 1.159435 | 0.109525 | -1.442529 | 0.167842 | 0.052686 | 0.576750 | 0.906169 | ... | -0.127120 | -1.081137 | 0.636389 | -0.620195 | 0.196229 | -0.173579 | 0.287197 | 0.152346 | 0.567403 | 1 |
9 | -1.105694 | 0.058315 | 0.257915 | 0.983759 | 0.067465 | -1.020146 | -1.000149 | 0.457775 | 0.161189 | 1.858575 | ... | -2.365750 | 0.224420 | 0.127513 | -0.218428 | -0.031064 | -0.597352 | -4.039884 | 0.540300 | -0.214238 | 2 |
10 | 0.517795 | 0.657187 | 0.958631 | 0.959237 | 0.406489 | 0.863070 | 0.192019 | 1.013132 | 0.678814 | 3.641739 | ... | 0.724614 | 0.734983 | 0.864792 | -0.310225 | 1.169010 | 1.584385 | 2.394734 | 0.682109 | -0.037966 | 3 |
11 | -0.216544 | 2.487724 | 3.757297 | 0.833667 | 0.246426 | 1.869204 | -0.276657 | 4.246059 | 0.051775 | 3.213570 | ... | 0.239199 | 3.715593 | 3.536151 | 0.390983 | 5.636058 | 2.648358 | -0.787942 | 0.261716 | 0.134986 | 1 |
12 | -0.030823 | 1.669713 | 1.449701 | 0.308632 | 0.870826 | 0.965102 | -1.088238 | 1.160680 | 0.292854 | 1.508543 | ... | -0.267321 | 1.008892 | 1.009107 | 0.013575 | 3.137530 | 1.605813 | 2.328965 | -0.173506 | 0.119533 | 4 |
13 | 2.532512 | 0.663371 | 1.040911 | 0.532441 | 0.930141 | -1.942990 | 2.524843 | 0.462912 | 0.277203 | 2.694953 | ... | 3.339025 | -1.467694 | 0.363239 | 0.294848 | 0.799586 | -0.071826 | -1.295806 | -0.092188 | 1.515673 | 3 |
14 | -0.927300 | 2.290394 | 2.567119 | -0.003484 | 0.587684 | 3.771613 | -1.158052 | 3.286506 | 0.637932 | 1.149233 | ... | -0.688389 | 3.786359 | 3.192368 | 0.088963 | 1.101538 | 1.706938 | -1.852492 | 0.988406 | 0.120376 | 2 |
15 | -0.805252 | -1.438636 | -1.689252 | 0.608807 | 0.141400 | 0.898329 | -1.093433 | -1.280877 | 0.571205 | 2.088654 | ... | -2.408880 | -0.528962 | -1.275497 | -0.249468 | -2.296349 | -1.150133 | 1.029836 | 1.282106 | 0.137737 | 1 |
16 | 3.707683 | 0.207071 | 0.947326 | 0.778530 | 0.536602 | -0.319422 | 4.828039 | 0.798528 | 1.612137 | 5.169873 | ... | 6.808884 | -0.867807 | 0.630836 | 2.098455 | -0.823225 | 1.941429 | 6.819429 | -0.218895 | 1.198769 | 3 |
17 | 0.549017 | 1.362632 | 1.238093 | 0.097687 | 0.413920 | -1.666465 | 0.549350 | 1.630658 | 0.052506 | 5.341274 | ... | 0.923685 | -0.179301 | 0.986284 | -0.060968 | 2.808849 | 0.024984 | -3.028244 | -0.603218 | 0.089566 | 2 |
18 | -1.256569 | -2.564801 | -2.550288 | 0.299483 | 0.820501 | -0.709686 | -0.681529 | -2.672096 | 0.472935 | 1.443661 | ... | -2.808752 | -2.298254 | -2.942024 | 0.096998 | -4.577862 | -2.510457 | 2.660393 | 0.760032 | -0.497542 | 1 |
19 | -0.935267 | 1.413965 | 2.034552 | 0.161635 | 0.295060 | 3.336941 | -2.246983 | 1.608296 | 0.469460 | 5.965373 | ... | -1.517290 | 3.610432 | 1.835860 | 0.299163 | 2.466863 | 0.821008 | -0.571721 | 1.643549 | 0.692445 | 2 |
20 | -0.678544 | 1.209503 | 2.059097 | 0.123954 | 0.808909 | 0.918322 | -0.955396 | 1.369504 | -0.071198 | 3.310036 | ... | 0.261020 | 1.919125 | 0.919221 | 0.179152 | 2.786442 | 0.826415 | -0.812206 | 0.495839 | 0.094861 | 1 |
21 | 0.117119 | 1.616873 | 2.945610 | 0.803248 | 0.203891 | 0.532842 | -0.183320 | 2.481310 | 0.361501 | 0.862744 | ... | 3.279540 | 2.511152 | 2.188161 | -0.349228 | -0.099568 | 3.831945 | 6.926805 | 0.011939 | 0.154438 | 1 |
22 | 2.156631 | 0.762914 | 0.756830 | 0.625000 | 0.406208 | -2.475535 | 3.567960 | 0.954996 | 0.134723 | 3.315622 | ... | 3.434240 | -0.950215 | 0.542298 | 1.159385 | 1.573037 | 0.415830 | -1.968778 | -0.136317 | 1.362478 | 2 |
23 | 0.118243 | 0.882021 | 0.305616 | 0.627140 | 0.882556 | -1.322782 | 0.801350 | 0.289063 | 0.831394 | 0.951858 | ... | 0.952706 | -0.904282 | 0.379914 | -0.526693 | -1.330874 | 0.319565 | 1.609707 | -0.340182 | 0.282026 | 3 |
24 | -1.777068 | 0.290613 | 0.681867 | 0.174307 | 0.750957 | -1.773042 | -2.131557 | 0.046237 | 0.394325 | 4.272851 | ... | -2.830815 | -0.192334 | -0.064730 | -0.987951 | -1.618146 | -0.786654 | -2.991774 | 0.589283 | 0.140826 | 0 |
25 | -0.861646 | 2.328985 | 2.566253 | 0.690703 | 0.809659 | 3.626765 | -0.835995 | 3.659660 | 0.519721 | 0.774169 | ... | -0.151479 | 4.178866 | 3.252275 | 0.814671 | 4.614573 | 2.509126 | -1.425625 | 0.596027 | 0.418696 | 3 |
26 | 2.037610 | -0.473717 | -1.471609 | 0.762277 | 0.957321 | -3.351200 | 1.802759 | -1.565907 | 0.543127 | 3.446179 | ... | 0.910942 | -2.667700 | -1.001588 | 0.119780 | -1.647659 | -2.282807 | -2.464412 | -0.197085 | 1.139601 | 0 |
27 | 1.859263 | -1.699892 | -2.522410 | 0.484512 | 0.969631 | 2.367803 | 2.496390 | -2.986239 | 0.572276 | 3.952370 | ... | 0.139259 | -1.258217 | -1.781532 | 1.921199 | -4.517802 | -2.037521 | 2.060177 | 1.528924 | 0.537580 | 3 |
28 | 0.713291 | -1.411833 | -1.958340 | 0.584091 | 0.138193 | 2.690837 | 1.085428 | -2.659101 | 0.780953 | 1.850247 | ... | -0.287627 | -0.207652 | -1.391198 | 1.812283 | -2.077882 | -0.555998 | 3.625295 | 2.178251 | 0.108438 | 1 |
29 | -0.131261 | 0.272915 | 1.232472 | 0.793575 | 0.021082 | 0.624774 | -0.221999 | 0.672610 | 0.769102 | 0.853623 | ... | 0.172306 | 0.711089 | 0.874524 | 0.551561 | 4.382707 | -0.113409 | -2.353924 | 0.743680 | 0.779778 | 2 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5817 | 1.684336 | 0.148344 | -0.638565 | 0.346559 | 0.262823 | -1.177912 | 2.383119 | -0.389892 | 0.746837 | 0.739020 | ... | 1.918008 | -0.713744 | 0.193917 | 0.957180 | -0.274454 | 0.007778 | -0.517266 | -0.298535 | 1.256392 | 2 |
5818 | -1.339294 | 3.050986 | 4.393677 | 0.151229 | 0.520696 | 4.030173 | -1.911230 | 4.799026 | 0.538913 | 1.296810 | ... | 0.166820 | 5.827812 | 4.308435 | 0.429215 | 5.217967 | 3.736739 | -1.211057 | 1.116808 | 0.209065 | 3 |
5819 | -0.775040 | 0.369025 | -0.125675 | 0.406338 | 0.593824 | 0.468962 | -1.622197 | 0.336658 | 0.520481 | 1.432850 | ... | -2.062937 | 1.211595 | 0.423135 | 0.173661 | 0.682381 | 0.177941 | -0.534050 | 0.728148 | -0.063169 | 2 |
5820 | -0.969049 | 2.183934 | 3.320431 | 0.166064 | 0.289940 | 4.262753 | -1.416369 | 3.461153 | 0.611477 | 0.533747 | ... | -1.328558 | 4.308086 | 3.342853 | 0.765311 | 3.356561 | 2.146287 | -1.664236 | 1.089743 | 0.829043 | 2 |
5821 | -0.549015 | 1.905231 | 2.171229 | 0.164107 | 0.256748 | 2.720944 | -0.684559 | 2.581841 | 0.125723 | 2.223438 | ... | 1.666210 | 3.606771 | 2.610019 | 0.835962 | 4.155324 | 2.963773 | 3.502350 | 1.233858 | 0.992361 | 2 |
5822 | -0.184794 | 3.401827 | 4.012294 | 0.800918 | 0.763852 | 5.524412 | -1.069056 | 4.502148 | 1.504858 | 2.624384 | ... | 0.237726 | 5.634735 | 4.819634 | 0.408350 | 7.521802 | 4.243802 | 1.657415 | 1.061499 | 0.262839 | 2 |
5823 | 0.837030 | 0.362535 | 0.899495 | -0.013025 | 0.066164 | -0.943357 | 0.403683 | -0.161115 | 0.249230 | 2.731013 | ... | 1.486147 | -0.674847 | 0.033081 | -0.275181 | 0.129740 | 0.462483 | 1.559184 | -0.161609 | 0.038261 | 2 |
5824 | 0.553997 | -1.417418 | -2.868272 | 0.848740 | 0.436089 | 1.799074 | 0.665291 | -2.862720 | 0.678211 | 1.837529 | ... | -2.008403 | -1.268380 | -1.966829 | 1.542900 | -4.556147 | -2.065627 | 0.527535 | 1.727122 | 0.591156 | 1 |
5825 | 3.573800 | 0.159316 | 0.220005 | 0.894024 | 0.044911 | -3.571741 | 3.995029 | 0.275360 | 0.193808 | 1.766974 | ... | 4.606328 | -1.505350 | 0.545157 | 1.146069 | 3.102784 | -0.556956 | -3.288948 | -0.527583 | 1.023172 | 3 |
5826 | -1.013447 | -2.321277 | -3.017300 | 0.333081 | 0.290490 | 2.441209 | -1.583040 | -3.613429 | 0.914697 | 2.074566 | ... | -3.155182 | -1.892768 | -3.156555 | 0.146704 | -4.097368 | -1.584924 | 4.986059 | 1.345938 | -0.165180 | 1 |
5827 | 0.306293 | 0.913355 | 0.920927 | 0.928401 | 0.362644 | 0.859068 | 0.562698 | 0.804431 | 0.292529 | 4.605179 | ... | 1.242005 | 2.398811 | 0.928890 | 1.253982 | 3.264975 | 0.055238 | -2.171233 | 1.175908 | 0.311707 | 1 |
5828 | -0.668103 | 1.948764 | 2.007476 | 0.443412 | 0.446517 | 0.639765 | -0.976233 | 2.694725 | 0.759375 | 6.267733 | ... | -0.399241 | 2.747313 | 2.283765 | -0.225974 | 2.812130 | 1.361302 | -1.749146 | 0.979406 | -0.334729 | 2 |
5829 | 0.724978 | 3.218822 | 3.754466 | 0.096578 | 0.045546 | 1.982709 | 0.008561 | 4.284980 | 0.166749 | 7.317244 | ... | 2.158178 | 4.596267 | 3.876167 | 0.332410 | 3.243396 | 3.449588 | 1.030720 | 0.915898 | 0.995796 | 2 |
5830 | -0.970513 | 0.752263 | 0.682730 | 0.731552 | 0.800870 | 0.183587 | -1.139633 | 1.287314 | -0.407099 | 2.622624 | ... | -0.680387 | 1.002067 | 0.640098 | -0.069868 | -1.363895 | -0.784909 | -2.967880 | 1.188553 | 0.746109 | 1 |
5831 | 3.853778 | 0.323418 | -0.636761 | 0.564543 | 0.051277 | 6.295425 | 4.050108 | -0.912915 | 1.350268 | 3.435931 | ... | 3.513643 | 1.584078 | 0.415694 | 4.256205 | -1.530000 | 1.339207 | 4.001286 | 2.938228 | 1.932350 | 1 |
5832 | -0.096259 | 2.470160 | 3.236854 | 0.349927 | 0.656872 | 0.968289 | -0.254539 | 2.636414 | 0.188441 | 4.112849 | ... | 0.768548 | 2.392730 | 2.509947 | -0.069369 | 2.103849 | 1.872789 | -0.844271 | 0.348204 | -0.240732 | 1 |
5833 | -0.340376 | -0.600902 | -2.255388 | 0.667252 | 0.301831 | 2.615126 | -0.671852 | -1.725816 | 0.908508 | 1.598279 | ... | -2.356687 | -0.104468 | -1.342173 | 0.259757 | -1.120634 | -1.024434 | -0.054126 | 1.700223 | 0.008373 | 0 |
5834 | -0.507955 | 4.235877 | 4.756860 | 0.879236 | 0.004114 | 4.523185 | -0.751009 | 5.430181 | 0.453104 | 2.154023 | ... | 1.927748 | 6.751035 | 4.520947 | 0.792219 | 5.435910 | 5.531239 | 2.973025 | 1.533935 | 0.012274 | 2 |
5835 | 2.317845 | -0.094592 | -0.251396 | 0.524351 | 0.253310 | -0.756686 | 3.756340 | 0.346190 | 0.976396 | 1.309117 | ... | 3.156594 | -0.757255 | 0.138546 | 1.589919 | -1.284629 | 1.153767 | 2.314617 | -0.137656 | 0.994792 | 3 |
5836 | -0.593844 | -2.465929 | -2.725095 | 0.662868 | 0.077390 | -0.039329 | -1.218243 | -2.512400 | 0.880912 | 3.538369 | ... | -2.166060 | -1.527828 | -2.954220 | 0.477999 | -3.874563 | -1.447480 | 2.658658 | 1.070658 | 0.016114 | 1 |
5837 | 2.896888 | -0.468643 | -0.597451 | -0.060037 | 0.605553 | -2.372631 | 3.662627 | -0.258713 | 0.301636 | 0.469918 | ... | 3.485849 | -2.369108 | -0.926800 | 1.759057 | -1.818055 | -0.612234 | 0.185349 | 0.119998 | 1.193040 | 2 |
5838 | -0.973390 | 0.846689 | 1.501724 | 0.749495 | 0.430618 | 2.397338 | -2.322811 | 0.863082 | 0.148679 | 1.922318 | ... | -3.389113 | 2.885768 | 0.906865 | -0.245648 | -0.103736 | -0.134827 | -4.735280 | 1.091735 | 0.286391 | 3 |
5839 | -0.970243 | -1.451962 | -2.427624 | 0.451720 | 0.451164 | 0.728259 | -0.952172 | -2.435741 | 0.097194 | 0.905717 | ... | -2.353980 | -1.560815 | -2.883206 | -0.016729 | -4.260586 | -1.848093 | 1.653681 | 0.807726 | 0.135076 | 1 |
5840 | -1.081001 | -0.290023 | -0.472889 | 0.662982 | 0.584349 | -1.689980 | -1.338542 | 0.321087 | 0.425478 | 6.158132 | ... | -2.709867 | -0.594374 | -0.429635 | -0.499436 | -1.682714 | -1.671447 | -4.122221 | 0.319736 | -0.195001 | 2 |
5841 | 0.742556 | 2.129944 | 1.785111 | 0.586710 | 0.636589 | -1.007158 | 1.727429 | 2.054153 | 0.916939 | 3.965481 | ... | 1.940891 | 1.026992 | 1.698250 | 0.183743 | 5.725875 | 1.926919 | -1.491753 | -0.188857 | 0.656961 | 2 |
5842 | 2.862729 | 1.558536 | 1.367980 | 0.151518 | 0.457180 | -2.143215 | 2.679898 | 1.907546 | 0.103726 | 3.200558 | ... | 3.821308 | -0.535265 | 1.849478 | 0.905898 | 2.039208 | 1.480508 | -0.336158 | -0.409510 | 0.621124 | 2 |
5843 | 0.299162 | -0.072308 | -0.738608 | 0.917164 | 0.497839 | 0.316393 | 0.709187 | -0.398455 | 0.773037 | 2.731865 | ... | -1.170987 | -0.113737 | -0.078206 | 0.350289 | -0.865735 | -1.029711 | -3.015142 | 1.001908 | 1.101463 | 2 |
5844 | 3.180215 | 0.056742 | -0.012276 | 0.152967 | 0.458279 | -1.674190 | 3.883508 | -0.400242 | 0.502771 | 4.632986 | ... | 3.746248 | -2.272361 | 0.038005 | 0.689289 | 0.593933 | -0.256932 | 1.852634 | -0.285590 | 0.917205 | 3 |
5845 | 1.337500 | -0.824857 | -0.434262 | 0.047159 | 0.656366 | -3.105874 | 1.110251 | -0.623407 | -0.016263 | 4.013435 | ... | -0.517778 | -2.311745 | -0.442607 | 0.192987 | -0.542765 | -1.897293 | -3.731458 | -0.866768 | 0.282215 | 2 |
5846 | 1.953950 | -1.253213 | -1.125297 | 1.012160 | 0.229267 | 2.976473 | 1.813165 | -1.079942 | 1.188837 | 5.262199 | ... | 2.206069 | 0.799553 | -0.607373 | 1.879514 | -1.655481 | 0.024347 | 4.908465 | 1.709886 | 1.196523 | 3 |
5816 rows × 224 columns
df.quality.value_counts()
2 2544 1 1919 3 986 0 191 4 176 Name: quality, dtype: int64
print("Final testing of accuracy")
get_accuracy_rate(df)
Final testing of accuracy Accuracy: [ 0.55670103 0.57523646 0.58297506 0.57695615 0.58125537]
0.57479427242615233
# Ну а теперь сохраняем датафрейм
df.to_csv('res.csv', sep=',', index=False, header=False)
df['quality'].unique()
array([1, 2, 0, 3, 4], dtype=int64)
from sklearn.model_selection import train_test_split
targetColumn = 'quality'
FeatureColumns = df.columns.tolist()
FeatureColumns.remove(targetColumn)
# Разделяем на X и y
X = df[FeatureColumns].values
y = df[targetColumn].values
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.4, random_state=42) # ;)
# Random Forests с 25 estimator'ами
rf_clf = RandomForestClassifier(n_estimators=25)
# Задаем кросс-валидацию на 5 фолдов
kf = KFold(n_splits=5, shuffle=True)
# Считаем верность (accuracy) для каждого из фолдов
print("Accuracy: {0}".format(cross_val_score(rf_clf, X_train, y_train, scoring='accuracy', cv=kf.split(X_train,y_train))))
np.mean(cross_val_score(rf_clf, X_train, y_train, scoring='accuracy', cv=kf.split(X_train,y_train)))
from sklearn.metrics import accuracy_score
rf_clf.fit(X_train, y_train)
print("Accuracy: {0}".format(accuracy_score(y_test, rf_clf.predict(X_test))))
Accuracy: [ 0.55873926 0.53295129 0.55014327 0.51002865 0.56241033] Accuracy: 0.5457670820799312
import numpy as np
np.savetxt("x_train.csv", X_train, fmt='%10.8f',delimiter=";")
np.savetxt("x_test.csv", X_test, fmt='%10.8f', delimiter=";")
np.savetxt("y_train.csv", y_train, fmt='%d', delimiter=";")
np.savetxt("y_test.csv", y_test, fmt='%d', delimiter=";")
X_train.shape
(3489, 223)