In [ ]:
# import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
import random
from itertools import permutations

# Dataset: https://archive.ics.uci.edu/ml/datasets/Wine+Quality
# Загружаем два компонента 
inputData1 = pd.read_csv('winequality-red.csv', delimiter=';')
# Учитываем признак того, что вино красное
inputData1['red'] = 1.0

inputData2 = pd.read_csv('winequality-white.csv', delimiter=';')
inputData2['red'] = 0.0
# Учитываем признак того, что вино белое (не красное)

# Объединяем в единый фрейм
inputData = pd.concat([inputData1,inputData2])

targetColumn = 'quality'
In [2]:
inputData.head()
Out[2]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality red
0 7.4 0.70 0.00 1.9 0.076 11.0 34.0 0.9978 3.51 0.56 9.4 5 1.0
1 7.8 0.88 0.00 2.6 0.098 25.0 67.0 0.9968 3.20 0.68 9.8 5 1.0
2 7.8 0.76 0.04 2.3 0.092 15.0 54.0 0.9970 3.26 0.65 9.8 5 1.0
3 11.2 0.28 0.56 1.9 0.075 17.0 60.0 0.9980 3.16 0.58 9.8 6 1.0
4 7.4 0.70 0.00 1.9 0.076 11.0 34.0 0.9978 3.51 0.56 9.4 5 1.0
In [3]:
inputData.shape
Out[3]:
(6497, 13)
In [4]:
def get_accuracy_rate(targetDF):
    targetColumn = 'quality'
    
    FeatureColumns = targetDF.columns.tolist()
    FeatureColumns.remove(targetColumn)
    
    # Разделяем на X и y
    X = targetDF[FeatureColumns].values
    y = targetDF[targetColumn].values

    # Random Forests с 100 estimator'ами
    rf_clf = RandomForestClassifier(n_estimators=100)

    # Задаем кросс-валидацию на 5 фолдов
    kf = KFold(n_splits=5, shuffle=True)

    # Считаем верность (accuracy) для каждого из фолдов
    print("Accuracy: {0}".format(cross_val_score(rf_clf, X, y, scoring='accuracy', cv=kf.split(X,y))))
    return np.mean(cross_val_score(rf_clf, X, y, scoring='accuracy', cv=kf.split(X,y)))
In [5]:
get_accuracy_rate(inputData)
Accuracy: [ 0.69076923  0.68923077  0.68745189  0.69515012  0.68899153]
Out[5]:
0.68308320009474743
In [10]:
inputData.quality.value_counts()
Out[10]:
6    2836
5    2138
7    1079
4     216
8     193
3      30
9       5
Name: quality, dtype: int64
In [11]:
# Вспомогательный фрейм, временно отбрасываем столбцы quality и red
df = inputData.drop(['quality', 'red'], axis=1)


# Половину нормализуем, половину преобразуем в z-scores
columns_list = df.columns.tolist()
half_of_list = int(len(columns_list)/2)

# Нормализация
df_normalized_columns = df[columns_list[:half_of_list]] # Половину столбцов
df_norm = (df_normalized_columns - df_normalized_columns.mean()) / (df_normalized_columns.max() - df_normalized_columns.min())
In [13]:
get_accuracy_rate(df_norm)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-13-6cf556646405> in <module>()
----> 1 get_accuracy_rate(df_norm)

<ipython-input-4-a667f2e3c8a0> in get_accuracy_rate(targetDF)
      3 
      4     FeatureColumns = targetDF.columns.tolist()
----> 5     FeatureColumns.remove(targetColumn)
      6 
      7     # Разделяем на X и y

ValueError: list.remove(x): x not in list
In [14]:
# Преобразование в z-scores:
df_z_statistics_columns = df[columns_list[half_of_list:]] # Вторую половину
df_z_statistics = (df_z_statistics_columns - df_z_statistics_columns.mean())/df_z_statistics_columns.std()

# Умножим бинарный столбец на случайное число (таким образом, для 1 получится случайное число, а 0 не изменится)
inputData['red'] = inputData['red'].apply(lambda x : x*random.random())

# Производим повторную конкатенацию в единый frame
new_df = pd.concat([df_norm, df_z_statistics, inputData['red'], inputData['quality']], axis=1)
In [15]:
listOfColumns = inputData.columns.tolist()
listOfColumns.remove('quality')
listOfColumns.remove('red')
In [16]:
for iterationNumber in range(10):
    # Добавляем случайные (шумовые) столбцы
    name_for_column = 'elem'+'_'+str(iterationNumber)
    new_df[name_for_column] = 1
    new_df[name_for_column] = new_df[name_for_column].apply(lambda x : x* random.random())
    
    name_for_column = 'elem1'+'_'+str(iterationNumber)
    new_df[name_for_column] = 1
    new_df[name_for_column] = new_df[name_for_column].apply(lambda x : x* random.gauss(5,2)*random.random()+random.random())

    name_for_column = 'elem2'+'_'+str(iterationNumber)
    new_df[name_for_column] = new_df['elem1'+'_'+str(iterationNumber)]
    new_df[name_for_column] = new_df[name_for_column].apply(lambda x : x**2* random.gauss(4,3)*random.random())
    print("Iteration : #{0}".format(iterationNumber))
    print("Mean accuracy: {0}".format(get_accuracy_rate(new_df)))
Iteration : #0
Accuracy: [ 0.66923077  0.66923077  0.67051578  0.66281755  0.6443418 ]
Mean accuracy: 0.655535026943803
Iteration : #1
Accuracy: [ 0.62076923  0.66538462  0.64126251  0.63125481  0.61508853]
Mean accuracy: 0.6398332445076094
Iteration : #2
Accuracy: [ 0.62        0.62846154  0.59661278  0.61431871  0.617398  ]
Mean accuracy: 0.6192109907029075
Iteration : #3
Accuracy: [ 0.59538462  0.62769231  0.61200924  0.61431871  0.59276366]
Mean accuracy: 0.6056631728548587
Iteration : #4
Accuracy: [ 0.60384615  0.58769231  0.59584296  0.59199384  0.62509623]
Mean accuracy: 0.5924256528690709
Iteration : #5
Accuracy: [ 0.58076923  0.58538462  0.59507313  0.59353349  0.59815242]
Mean accuracy: 0.5947373719429146
Iteration : #6
Accuracy: [ 0.58153846  0.58692308  0.61354888  0.58275597  0.5604311 ]
Mean accuracy: 0.5782675430804762
Iteration : #7
Accuracy: [ 0.58615385  0.56923077  0.58198614  0.57120862  0.59199384]
Mean accuracy: 0.578883401433055
Iteration : #8
Accuracy: [ 0.58        0.57461538  0.57505774  0.5804465   0.59507313]
Mean accuracy: 0.5770326286492569
Iteration : #9
Accuracy: [ 0.57769231  0.60769231  0.54272517  0.55966128  0.56966898]
Mean accuracy: 0.568572037662107
In [17]:
# Играем с коллинеарностью
#new_df['collinear_1'] = random.random() * new_df['volatile acidity'] + random.random()*new_df['total sulfur dioxide'] + random.random()
name_1 = random.choice(columns_list)
name_2 = random.choice(columns_list)
new_df['collinear_1'] = random.random() * new_df[name_1] + random.random()*new_df[name_2] + random.random()
new_df['collinear_1'] = new_df['collinear_1'].apply(lambda x : x+random.random())

name_3 = random.choice(columns_list)
name_4 = random.choice(columns_list)
name_5 = random.choice(columns_list)
new_df['collinear_2'] = random.random() * new_df[name_3] + random.gauss(random.randint(5,10), random.randint(10,21))*new_df[name_4] + random.gauss(random.randint(5,50), random.randint(2,21))*new_df[name_5]
new_df['collinear_2'] = new_df['collinear_2'].apply(lambda x : x+random.random())

print("Collinear: {0} and {1}".format(name_1, name_2))
print("Collinear: {0}, {1} and {2}".format(name_3, name_4, name_5))
print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))

name_1 = random.choice(columns_list)
name_2 = random.choice(columns_list)
new_df['collinear_3'] = random.random() * new_df[name_1] + random.random()*new_df[name_2] + random.random()
new_df['collinear_3'] = new_df['collinear_3'].apply(lambda x : x+random.gauss(random.randint(10,25), random.randint(5,21)))

name_3 = random.choice(columns_list)
name_4 = random.choice(columns_list)
name_5 = random.choice(columns_list)
new_df['collinear_4'] = random.random() * new_df[name_3] + random.random()*new_df[name_4] + random.random()*new_df[name_5]
new_df['collinear_4'] = new_df['collinear_4'].apply(lambda x : x+random.random())

print("Collinear: {0} and {1}".format(name_1, name_2))
print("Collinear: {0}, {1} and {2}".format(name_3, name_4, name_5))
print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))

name_1 = random.choice(columns_list)
name_2 = random.choice(columns_list)
new_df['collinear_5'] = random.random() * new_df[name_1] + random.random()*new_df[name_2] + random.random()
new_df['collinear_5'] = new_df['collinear_5'].apply(lambda x : x+random.random())

name_3 = random.choice(columns_list)
name_4 = random.choice(columns_list)
name_5 = random.choice(columns_list)
new_df['collinear_6'] = random.random() * new_df[name_3] + random.random()*new_df[name_4] + random.random()*new_df[name_5]
new_df['collinear_6'] = new_df['collinear_6'].apply(lambda x : x+random.random())

print("Collinear: {0} and {1}".format(name_1, name_2))
print("Collinear: {0}, {1} and {2}".format(name_3, name_4, name_5))
print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))
Collinear: residual sugar and citric acid
Collinear: sulphates, free sulfur dioxide and density
Accuracy: [ 0.57307692  0.59846154  0.60662048  0.57274827  0.56812933]
Mean accuracy: 0.5701080120802985
Collinear: free sulfur dioxide and pH
Collinear: free sulfur dioxide, total sulfur dioxide and fixed acidity
Accuracy: [ 0.56923077  0.56692308  0.56351039  0.58198614  0.55273287]
Mean accuracy: 0.5670283650145082
Collinear: pH and chlorides
Collinear: citric acid, pH and pH
Accuracy: [ 0.57846154  0.57692308  0.58583526  0.56120092  0.59969207]
Mean accuracy: 0.5708830461301593
In [18]:
for iterationNumber in range(5):

    # Делаем коллинеарность по 2,3 и 4 
    # Добавляем каждого по половине из количества исходных столбцов

    
    mod_dataframe = new_df
    while (get_accuracy_rate(mod_dataframe) >= get_accuracy_rate(new_df)):
        mod_dataframe = new_df
        candidates = [i for i in permutations(listOfColumns,3)]
        for (name1, name2, name3) in random.sample(candidates,k=2*len(inputData.columns)):
            mod_dataframe['name__'+name1+'_'+name2+'_'+name3] = random.random()*mod_dataframe[name1]+random.random()*mod_dataframe[name2]+random.random()*mod_dataframe[name3]
            mod_dataframe['name__'+name1+'_'+name2+'_'+name3] = mod_dataframe['name__'+name1+'_'+name2+'_'+name3].apply(lambda x : x + random.random())
    
    new_df = mod_dataframe
    
    
    print("Iteration #{0}, 3-permutations: ".format(iterationNumber))
    print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))
    
    while (get_accuracy_rate(mod_dataframe) >= get_accuracy_rate(new_df)):
        mod_dataframe = new_df
        candidates = [i for i in permutations(listOfColumns,2)]
        for (name1, name2) in random.sample(candidates,k=2*len(inputData.columns)):
            mod_dataframe['name__'+name1+'_'+name2] = random.random()*mod_dataframe[name1]+random.random()*mod_dataframe[name2]
            mod_dataframe['name__'+name1+'_'+name2] = mod_dataframe['name__'+name1+'_'+name2].apply(lambda x : x*2 + random.random())
    new_df = mod_dataframe
    print("Iteration #{0}, 2-permutations: ".format(iterationNumber))
    print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))
        
    while (get_accuracy_rate(mod_dataframe) >= get_accuracy_rate(new_df)):
        mod_dataframe = new_df
        candidates = [i for i in permutations(listOfColumns,4)]
        for (name1, name2, name3, name4) in random.sample(candidates,k=2*len(inputData.columns)):
            mod_dataframe['name__'+name1+'_'+name2+'_'+name3+'_'+name4] = random.random()*mod_dataframe[name1]+random.random()*new_df[name2]+random.random()*mod_dataframe[name3]+random.random()*mod_dataframe[name4]#+random.random()*mod_dataframe[name2]+random.random()*mod_dataframe[name4]
            mod_dataframe['name__'+name1+'_'+name2+'_'+name3+'_'+name4] = mod_dataframe['name__'+name1+'_'+name2+'_'+name3+'_'+name4].apply(lambda x : x*2.3 + random.random())
    
    new_df = mod_dataframe
    print("Iteration #{0}, 4-permutations: ".format(iterationNumber))
    print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))
Accuracy: [ 0.56461538  0.57461538  0.57428791  0.57505774  0.58968437]
Accuracy: [ 0.55        0.57615385  0.57351809  0.56428022  0.59815242]
Iteration #0, 3-permutations: 
Accuracy: [ 0.58461538  0.56692308  0.56581986  0.5604311   0.55966128]
Mean accuracy: 0.5747228045241902
Accuracy: [ 0.57923077  0.58692308  0.58121632  0.55273287  0.57505774]
Accuracy: [ 0.57        0.58615385  0.57736721  0.56428022  0.56505004]
Accuracy: [ 0.56846154  0.57461538  0.55812163  0.56966898  0.57351809]
Accuracy: [ 0.54769231  0.57846154  0.55119323  0.5704388   0.58121632]
Accuracy: [ 0.54923077  0.55307692  0.56581986  0.5873749   0.57659738]
Accuracy: [ 0.57230769  0.58        0.56735951  0.59199384  0.56735951]
Iteration #0, 2-permutations: 
Accuracy: [ 0.57769231  0.57615385  0.57274827  0.56274057  0.56812933]
Mean accuracy: 0.5708826908272636
Accuracy: [ 0.56923077  0.58538462  0.54580446  0.56889915  0.56812933]
Accuracy: [ 0.58307692  0.56538462  0.5604311   0.56889915  0.57736721]
Accuracy: [ 0.57153846  0.56923077  0.5804465   0.55658199  0.56197075]
Accuracy: [ 0.56615385  0.56538462  0.57505774  0.5873749   0.56812933]
Accuracy: [ 0.59        0.57538462  0.59353349  0.56197075  0.56505004]
Accuracy: [ 0.55846154  0.58769231  0.5904542   0.57351809  0.57274827]
Accuracy: [ 0.57        0.58769231  0.56658968  0.56197075  0.58506543]
Accuracy: [ 0.58153846  0.58153846  0.58275597  0.58429561  0.5604311 ]
Accuracy: [ 0.55307692  0.58076923  0.59276366  0.5973826   0.57351809]
Accuracy: [ 0.58615385  0.56461538  0.57890685  0.57659738  0.59276366]
Iteration #0, 4-permutations: 
Accuracy: [ 0.57230769  0.56538462  0.56274057  0.58429561  0.57274827]
Mean accuracy: 0.5756481316989399
Accuracy: [ 0.55846154  0.58692308  0.58121632  0.59815242  0.5704388 ]
Accuracy: [ 0.57384615  0.57384615  0.56351039  0.60200154  0.57274827]
Iteration #1, 3-permutations: 
Accuracy: [ 0.59769231  0.58615385  0.57967667  0.57736721  0.5704388 ]
Mean accuracy: 0.5756500266477171
Accuracy: [ 0.57        0.55923077  0.58121632  0.56812933  0.58891455]
Accuracy: [ 0.58076923  0.56307692  0.5973826   0.57197844  0.5704388 ]
Iteration #1, 2-permutations: 
Accuracy: [ 0.6         0.56846154  0.5704388   0.53733641  0.57736721]
Mean accuracy: 0.5751861195002073
Accuracy: [ 0.59076923  0.58        0.54965358  0.5604311   0.59122402]
Accuracy: [ 0.59307692  0.58538462  0.58891455  0.55812163  0.55812163]
Accuracy: [ 0.57692308  0.57846154  0.55504234  0.58891455  0.5904542 ]
Accuracy: [ 0.59384615  0.58384615  0.58583526  0.55812163  0.58583526]
Iteration #1, 4-permutations: 
Accuracy: [ 0.59076923  0.56769231  0.56658968  0.5604311   0.56966898]
Mean accuracy: 0.5794996150885295
Accuracy: [ 0.57307692  0.56846154  0.56812933  0.5873749   0.58968437]
Accuracy: [ 0.57615385  0.58692308  0.56966898  0.58506543  0.53733641]
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-18-ab31e2c20667> in <module>()
      7 
      8     mod_dataframe = new_df
----> 9     while (get_accuracy_rate(mod_dataframe) >= get_accuracy_rate(new_df)):
     10         mod_dataframe = new_df
     11         candidates = [i for i in permutations(listOfColumns,3)]

<ipython-input-4-a667f2e3c8a0> in get_accuracy_rate(targetDF)
     17     # Считаем верность (accuracy) для каждого из фолдов
     18     print("Accuracy: {0}".format(cross_val_score(rf_clf, X, y, scoring='accuracy', cv=kf.split(X,y))))
---> 19     return np.mean(cross_val_score(rf_clf, X, y, scoring='accuracy', cv=kf.split(X,y)))

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch)
    138                                               train, test, verbose, None,
    139                                               fit_params)
--> 140                       for train, test in cv_iter)
    141     return np.array(scores)[:, 0]
    142 

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
    756             # was dispatched. In particular this covers the edge
    757             # case of Parallel used with an exhausted iterator.
--> 758             while self.dispatch_one_batch(iterator):
    759                 self._iterating = True
    760             else:

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
    606                 return False
    607             else:
--> 608                 self._dispatch(tasks)
    609                 return True
    610 

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
    569         dispatch_timestamp = time.time()
    570         cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 571         job = self._backend.apply_async(batch, callback=cb)
    572         self._jobs.append(job)
    573 

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
    107     def apply_async(self, func, callback=None):
    108         """Schedule a func to be run"""
--> 109         result = ImmediateResult(func)
    110         if callback:
    111             callback(result)

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
    324         # Don't delay the application, to avoid keeping the input
    325         # arguments in memory
--> 326         self.results = batch()
    327 
    328     def get(self):

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
    236             estimator.fit(X_train, **fit_params)
    237         else:
--> 238             estimator.fit(X_train, y_train, **fit_params)
    239 
    240     except Exception as e:

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py in fit(self, X, y, sample_weight)
    324                     t, self, X, y, sample_weight, i, len(trees),
    325                     verbose=self.verbose, class_weight=self.class_weight)
--> 326                 for i, t in enumerate(trees))
    327 
    328             # Collect newly grown trees

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
    756             # was dispatched. In particular this covers the edge
    757             # case of Parallel used with an exhausted iterator.
--> 758             while self.dispatch_one_batch(iterator):
    759                 self._iterating = True
    760             else:

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
    606                 return False
    607             else:
--> 608                 self._dispatch(tasks)
    609                 return True
    610 

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
    569         dispatch_timestamp = time.time()
    570         cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 571         job = self._backend.apply_async(batch, callback=cb)
    572         self._jobs.append(job)
    573 

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
    107     def apply_async(self, func, callback=None):
    108         """Schedule a func to be run"""
--> 109         result = ImmediateResult(func)
    110         if callback:
    111             callback(result)

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
    324         # Don't delay the application, to avoid keeping the input
    325         # arguments in memory
--> 326         self.results = batch()
    327 
    328     def get(self):

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py in _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees, verbose, class_weight)
    118             curr_sample_weight *= compute_sample_weight('balanced', y, indices)
    119 
--> 120         tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False)
    121     else:
    122         tree.fit(X, y, sample_weight=sample_weight, check_input=False)

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\tree\tree.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
    737             sample_weight=sample_weight,
    738             check_input=check_input,
--> 739             X_idx_sorted=X_idx_sorted)
    740         return self
    741 

C:\Users\Oleg\Anaconda3\lib\site-packages\sklearn\tree\tree.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
    348                                            self.min_impurity_split)
    349 
--> 350         builder.build(self.tree_, X, y, sample_weight, X_idx_sorted)
    351 
    352         if self.n_outputs_ == 1:

KeyboardInterrupt: 
In [19]:
print("Final testing of accuracy")
print('Mean accuracy: {0}'.format(get_accuracy_rate(new_df)))
Final testing of accuracy
Accuracy: [ 0.59846154  0.56230769  0.57351809  0.59430331  0.55427252]
Mean accuracy: 0.5765708533191212
In [20]:
targetColumn = 'quality'


# Чтобы нельзя было увидеть исходные столбцы из wine quality, перемешиваем их
new_df[targetColumn]

# Берем названия столбцов
cols = list(new_df.columns)
# Перемешиваем их
random.shuffle(cols)

# Задаем новый порядок
df = new_df.reindex_axis(cols, axis=1)

# Теперь столбец с целевой переменной перемещаем на последнее место
df = df.drop(['quality'], axis=1)
df = pd.concat([df,new_df[targetColumn]], axis=1)
df
Out[20]:
name__alcohol_chlorides_citric acid_residual sugar name__total sulfur dioxide_citric acid name__total sulfur dioxide_chlorides name__citric acid_chlorides elem_0 name__density_sulphates_fixed acidity_total sulfur dioxide name__citric acid_alcohol name__volatile acidity_residual sugar_citric acid_total sulfur dioxide name__residual sugar_sulphates elem1_0 ... name__total sulfur dioxide_volatile acidity_pH_alcohol name__density_total sulfur dioxide name__total sulfur dioxide_citric acid_residual sugar_sulphates name__density_residual sugar_alcohol_citric acid name__free sulfur dioxide_total sulfur dioxide name__total sulfur dioxide_sulphates_fixed acidity_pH name__citric acid_sulphates_fixed acidity_pH name__chlorides_fixed acidity_density_volatile acidity name__volatile acidity_alcohol_citric acid_residual sugar quality
0 -0.478283 -2.213967 -2.640030 0.684143 0.963702 1.222948 -1.152197 -2.027630 0.661563 0.910785 ... -1.906174 -1.059609 -2.187578 0.655273 -3.801710 -1.356129 4.404720 1.686248 -0.425134 5
1 -0.416882 -0.788398 -1.619442 0.140811 0.125972 2.235487 -1.014809 -0.948248 0.544026 2.898419 ... -0.937044 -0.557045 -1.195460 -0.085663 -1.534467 -0.335009 2.509282 1.628263 -0.111945 5
2 -0.202222 -1.071511 -1.579510 0.091978 0.567123 1.799232 -0.379224 -1.826647 0.747934 1.883060 ... -1.182087 -0.407708 -2.243586 -0.072177 -2.081265 -1.016039 2.228239 1.274932 0.407152 5
3 -0.248368 -0.760257 -1.101823 0.722705 0.447410 2.193620 -0.654403 -1.398025 0.477155 3.149005 ... -2.416570 0.331195 -1.507227 0.746818 -2.397424 -1.181693 0.937475 1.966068 0.680110 6
4 -0.413262 -2.363772 -2.433693 0.257073 0.109966 0.964414 -0.987758 -2.587727 0.782671 3.737424 ... -1.163915 -0.512178 -2.933797 0.293850 -3.468229 -0.885008 3.725170 1.792081 -0.521255 5
5 -0.555860 -1.617497 -1.771817 0.915180 0.274020 1.247003 -1.279413 -2.254374 0.423601 1.259898 ... -1.802123 -0.640739 -2.373853 0.429295 -3.270010 -0.692253 4.557135 1.575228 -0.186711 5
6 -1.143486 -1.344627 -1.543131 0.796627 0.434179 0.788886 -1.634137 -1.226786 0.457391 1.556075 ... -1.519889 -0.403382 -1.666689 -0.218545 -2.569563 -1.316604 0.473790 0.551849 -0.170027 5
7 0.187024 -2.333007 -2.302782 0.827787 0.922521 -1.568044 -0.763121 -2.481517 -0.153139 3.547803 ... -1.856568 -3.012415 -3.115052 0.349773 -3.507581 -2.567682 1.704580 0.749956 -0.294431 7
8 -1.002961 -2.303354 -2.673976 0.177249 0.250748 0.685932 -0.578056 -2.864814 0.756897 1.835178 ... -2.061171 -1.537410 -2.768741 -0.116310 -3.987075 -1.522621 2.815789 1.017975 -0.155973 7
9 0.722679 0.465044 0.187940 0.624961 0.466162 3.240046 0.622276 -0.020450 1.168646 4.016444 ... 0.408039 1.273450 0.132694 0.913820 -1.732049 1.135831 5.004930 1.460171 0.616403 5
10 -0.805252 -1.438636 -1.689252 0.608807 0.141400 0.898329 -1.093433 -1.280877 0.571205 2.088654 ... -2.408880 -0.528962 -1.275497 -0.249468 -2.296349 -1.150133 1.029836 1.282106 0.137737 5
11 0.700490 0.108888 -0.012820 0.982347 0.359799 3.909861 0.981651 -0.081168 1.126112 3.598302 ... 0.768855 1.153206 0.292383 0.747113 -0.944767 1.856977 5.683687 1.002447 0.310331 5
12 -0.258602 -0.841112 -1.222429 0.860943 0.299262 -0.365507 -0.107277 -1.210638 0.081789 1.592301 ... -0.308574 -1.188232 -2.069158 -0.316118 -1.845358 -0.714589 4.396622 0.492017 -0.098624 5
13 -1.369402 -1.949055 -2.290166 1.074353 0.791086 6.408129 -1.888737 -2.533301 3.163933 1.175871 ... -2.949081 -1.330478 -1.284520 -0.328896 -3.317492 1.785229 13.585969 1.793077 0.161766 5
14 -1.126438 1.263476 1.709850 0.872946 0.619282 5.725044 -1.903149 1.571477 1.494160 1.312797 ... -1.082434 2.990815 1.597808 0.085041 3.687123 3.104751 4.624860 2.103116 -0.411927 5
15 -0.498509 1.154674 2.120706 1.170033 0.081316 5.329532 -1.075350 1.560374 1.100451 4.937626 ... -0.573198 3.276460 2.100919 0.247025 3.315187 2.962000 5.191529 1.717098 0.341791 5
16 0.188322 0.317919 0.521554 0.397273 0.859256 3.003631 0.530147 0.392047 0.999356 0.874371 ... 0.037734 0.570264 0.893967 0.959701 0.275360 1.703801 3.939884 1.009635 0.591173 7
17 -0.578201 -1.147809 -1.012134 1.628251 0.987506 4.754865 -1.394030 -1.900020 1.996386 5.337035 ... -3.013082 -0.901929 -0.659227 -0.357837 -2.710896 1.359244 8.425891 1.284350 -0.083949 5
18 -1.197140 -2.490853 -2.223153 0.708309 0.592840 0.200974 -1.385509 -2.316716 0.557840 2.118574 ... -2.705586 -1.628289 -3.064470 0.170940 -3.890830 -2.064242 1.691804 1.512927 -0.264051 4
19 -0.669481 -1.234895 -1.081497 1.514516 0.584530 4.081584 -0.822537 -1.333693 1.726978 0.224845 ... -3.202157 -0.489831 -0.512077 -0.381910 -2.279208 0.050886 5.080082 1.642790 -0.020820 6
20 -0.475792 -1.254881 -1.292806 0.246659 0.824237 0.726824 -0.526297 -1.821336 0.935710 2.710390 ... -1.843603 -0.545160 -1.160431 -0.110632 -0.519993 -0.512707 3.269135 0.610509 -0.418407 6
21 -0.062695 -0.484099 -0.787870 0.767299 0.048866 2.302608 -0.344390 -0.911288 0.409977 4.017808 ... -0.445627 0.500191 -0.458833 0.984103 -1.392748 0.372676 5.049873 1.739980 0.128020 5
22 -0.128106 -2.072688 -2.407602 0.978550 0.111442 2.608775 -1.299084 -1.919077 1.566608 2.349346 ... -2.896897 -1.639309 -2.186183 -0.097270 -3.085381 -0.802771 4.713787 0.943972 -0.320782 5
23 -0.792323 -0.593678 -1.003570 0.398502 0.736443 1.111517 -1.268817 -1.456753 0.797764 1.185308 ... -2.601814 -0.187135 -1.812473 -0.091447 -2.808533 -1.183920 0.002838 0.669112 0.280655 5
24 -0.429528 -1.679373 -2.493998 0.770772 0.105107 0.942746 -1.062227 -2.160717 0.282306 1.732734 ... -1.561004 -1.021895 -1.803214 0.329543 -2.138758 -1.183470 4.142319 0.684148 0.169529 6
25 -1.013358 -2.002284 -3.048006 0.451947 0.289703 0.181959 -0.837330 -2.527793 0.072096 1.831610 ... -3.036272 -1.894093 -2.799900 -0.763523 -3.809952 -1.681011 1.850960 0.309223 0.153014 5
26 -0.897272 -1.901506 -3.040008 0.604336 0.615587 0.493634 -0.882756 -3.189613 0.288642 1.959315 ... -2.686791 -2.643124 -3.025610 -0.007652 -4.601875 -2.243167 1.965979 1.288481 -0.140444 5
27 -0.392025 -1.425848 -1.711070 0.991837 0.354453 2.279911 -0.820984 -2.770463 1.040287 1.768816 ... -3.238078 -0.964924 -1.878471 0.476292 -3.846969 -0.539989 4.786496 1.259148 0.009888 5
28 -1.015235 -1.757910 -2.531480 0.531653 0.087781 1.147167 -1.619059 -2.708835 0.142814 4.882426 ... -2.244693 -1.085859 -3.030047 0.292079 -2.613734 -1.475827 3.974091 1.045569 0.344409 5
29 -0.812856 -2.036425 -3.038716 0.863130 0.956309 0.527511 -0.899260 -2.744348 0.966399 2.762723 ... -1.530598 -2.566893 -3.165582 0.660980 -4.459248 -2.010311 2.936815 0.914611 0.356791 6
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4868 0.773966 1.031942 0.814174 0.113335 0.106613 0.705251 0.950414 0.685295 1.179255 1.447798 ... 0.984347 -0.160937 1.258517 0.204865 1.627361 0.874435 2.783931 0.229651 0.994845 6
4869 -0.325304 0.007284 -0.989292 -0.019937 0.536001 0.454124 -0.266204 -0.944899 0.452231 2.912857 ... -1.234651 0.243895 -0.680293 0.638123 -2.957975 -0.596431 -0.579771 1.118092 0.860843 6
4870 1.407362 1.248991 0.729838 0.440455 0.747578 -1.951163 2.144557 0.772573 0.285851 1.161407 ... 2.189510 0.382576 0.647076 0.158944 0.629816 -0.202777 -2.118623 -0.604631 1.123709 7
4871 2.209935 0.365248 -0.401793 0.770510 0.166730 -2.931917 3.056815 -0.695417 0.397739 3.855181 ... 3.163627 -1.618340 0.070879 0.112822 -0.636209 -0.279786 2.274530 -1.271899 0.629962 6
4872 -0.035135 2.563889 2.508937 0.247788 0.540455 2.139895 -0.474984 3.388282 0.396316 1.747787 ... 0.078527 3.047676 2.619323 0.384270 4.100411 2.160890 0.538127 0.913770 0.811652 5
4873 1.858546 0.561262 0.682497 0.113391 0.647477 -1.844306 2.073612 1.046149 0.304821 1.533352 ... 3.567208 -0.648470 0.257282 0.488681 1.258308 0.281621 1.203277 -0.806852 1.005950 6
4874 -0.140961 0.488357 1.059394 0.027545 0.063533 -0.576996 -0.044634 0.692409 0.226179 4.860501 ... 0.414404 0.516456 0.473963 -0.501014 -1.046891 1.510466 2.433562 -0.555042 -0.232443 6
4875 -0.180113 0.076192 -0.103752 0.022736 0.380758 -1.202363 -0.372775 -0.076515 -0.118406 4.955946 ... -1.884645 -0.621701 -0.380639 -0.649475 -1.295568 -1.189119 -2.459199 0.397683 0.470148 6
4876 1.304160 1.042614 0.520731 0.774318 0.034238 -1.207375 2.208312 0.993403 0.452742 3.761774 ... 2.553453 -0.731263 0.459425 0.714278 1.312428 0.699373 2.440311 -0.632819 0.585020 7
4877 -1.951085 -0.384324 -0.828416 0.245470 0.025559 -1.673372 -1.826046 -0.265926 -0.255501 1.127825 ... -2.363450 -1.378423 -1.115043 -1.252807 -2.147823 -0.885678 -1.898321 -0.452237 -0.766517 5
4878 -1.166221 -1.028839 -0.754207 0.296731 0.868769 -2.601292 -1.077188 -0.931504 -0.068753 3.829208 ... -1.146448 -1.553153 -1.463527 -0.775106 -2.462946 -1.325951 -1.992374 -0.045361 0.295007 4
4879 -0.219551 2.035592 2.177237 0.975339 0.230322 0.897686 -0.426011 2.538158 0.527566 3.882505 ... -0.266849 2.540680 2.332699 -0.039273 5.890210 1.471003 -0.485617 0.459435 -0.032751 6
4880 -0.617287 2.049617 2.721893 0.599718 0.831959 0.817070 -1.171510 2.654859 0.156911 1.345925 ... 0.094293 2.183317 1.947614 -0.415179 5.401485 1.695110 -0.880692 0.316263 0.706347 6
4881 -1.005690 0.761560 0.589218 0.642400 0.225156 0.568350 -1.478329 0.890466 0.982594 2.982989 ... -2.140410 0.544678 0.416322 -0.154365 0.896756 -0.512274 -2.148811 0.528597 -0.033937 6
4882 0.956398 1.863328 1.528597 0.142277 0.256223 -1.753222 1.111464 2.347314 -0.264179 2.883249 ... 1.718214 0.797376 1.392095 0.195698 3.362565 1.132632 -1.297307 -0.031256 0.751454 5
4883 1.569808 0.943914 2.032330 0.824035 0.281259 -3.391974 1.532926 1.851298 -0.125040 2.025174 ... 3.312068 -0.287759 1.488011 0.349068 4.319608 0.967502 -1.664008 -0.994605 0.875385 6
4884 -0.304538 2.027566 2.400392 0.016879 0.829558 0.788623 -0.324252 2.889908 0.009044 1.920823 ... 0.157820 2.758599 2.966721 0.033440 5.758555 2.180229 -0.976330 0.425362 0.620639 5
4885 -0.873283 1.831221 2.558328 0.255377 0.461025 1.164142 -1.301629 2.776247 0.458644 3.889055 ... -0.167620 1.900748 2.170648 -0.198759 6.147723 1.995351 -0.416972 0.338289 0.187383 6
4886 2.703794 0.852767 0.941143 0.276276 0.773274 1.801400 3.045503 0.790318 1.393247 0.351957 ... 3.278612 -1.030422 1.753329 1.414937 2.549355 3.287896 7.031204 -0.627692 0.888318 7
4887 2.781300 -1.264810 -1.164251 0.305303 0.724363 -2.122917 3.673328 -1.275776 1.385913 0.931251 ... 3.329545 -3.622192 -1.550825 1.091990 -3.105554 -1.025556 1.292526 -1.024504 1.738765 7
4888 -0.562415 0.739697 0.638122 0.062751 0.256872 -0.362067 -1.821780 1.095450 0.659374 1.834312 ... -1.998783 0.240934 1.050916 -0.326037 1.877147 0.684427 -1.807519 -0.190896 -0.145854 5
4889 -0.797154 0.533663 0.888649 0.789960 0.009050 -0.013914 -1.043141 0.525176 0.308703 0.700887 ... -1.424222 0.840558 0.341886 -0.206782 0.914131 -0.238012 -2.260413 0.163318 0.178770 6
4890 1.870422 0.327469 -0.035135 0.290066 0.697058 -2.836534 1.930947 0.328936 0.213337 1.013143 ... 1.408807 -2.594730 -0.422990 -0.219193 -0.522468 -0.731795 -3.004844 -0.716293 1.002671 6
4891 0.893812 0.810451 0.631597 0.097856 0.950967 -2.195252 0.328870 0.550190 0.555023 0.994869 ... 1.163509 -0.831607 0.110677 -0.141000 1.301248 -0.102037 -0.696146 -0.146933 0.699947 6
4892 -0.535997 0.528190 0.595106 0.873245 0.829751 -0.245070 -0.669575 0.350100 0.327846 3.961168 ... -1.018608 0.030936 0.576389 -0.108447 0.642000 1.002172 1.395517 0.368097 0.514939 5
4893 1.565767 -0.566277 -0.198058 0.770311 0.041606 -1.841681 1.814109 -0.853114 0.217202 1.537309 ... 0.963009 -1.525094 -0.343275 0.434351 -0.892645 -0.691755 0.449269 -0.159772 0.509648 6
4894 -0.639722 1.547914 2.415930 0.051300 0.675902 0.562113 -0.992682 2.867673 0.155048 8.198587 ... -0.150793 1.956783 1.919438 0.280612 4.436135 1.171161 -1.410279 0.852113 0.656496 5
4895 -0.647004 -0.148548 0.580455 0.106387 0.033165 -0.832290 -1.259207 -0.006483 0.527104 4.594427 ... -2.020286 -0.452110 -0.027390 -1.215087 0.385619 -0.638500 -2.649501 -0.396957 -0.041138 6
4896 3.010521 0.275813 -0.143146 0.338426 0.281199 -4.269512 4.277830 0.164078 -0.423296 2.223884 ... 4.649211 -2.564197 0.417968 0.367732 -0.686613 0.329296 -0.622103 -0.633970 1.492943 7
4897 2.293042 -0.108104 -0.126011 0.583681 0.613773 -4.368150 2.809822 -0.639298 -0.481201 4.477054 ... 2.599396 -1.711998 -0.351413 -0.104888 -1.038270 -1.145385 -1.614386 -0.701789 1.187200 6

6497 rows × 224 columns

In [21]:
df = df.sample(frac=0.9).reset_index(drop=True)
df
Out[21]:
name__alcohol_chlorides_citric acid_residual sugar name__total sulfur dioxide_citric acid name__total sulfur dioxide_chlorides name__citric acid_chlorides elem_0 name__density_sulphates_fixed acidity_total sulfur dioxide name__citric acid_alcohol name__volatile acidity_residual sugar_citric acid_total sulfur dioxide name__residual sugar_sulphates elem1_0 ... name__total sulfur dioxide_volatile acidity_pH_alcohol name__density_total sulfur dioxide name__total sulfur dioxide_citric acid_residual sugar_sulphates name__density_residual sugar_alcohol_citric acid name__free sulfur dioxide_total sulfur dioxide name__total sulfur dioxide_sulphates_fixed acidity_pH name__citric acid_sulphates_fixed acidity_pH name__chlorides_fixed acidity_density_volatile acidity name__volatile acidity_alcohol_citric acid_residual sugar quality
0 0.227592 1.558518 1.664460 -0.027473 0.330031 0.425976 0.135259 1.938229 -0.438415 2.641077 ... 0.434843 1.469472 1.606509 0.237339 0.973398 0.594251 -2.123956 0.921411 0.855172 5
1 -0.045473 2.788140 2.877576 0.215871 0.066918 3.723036 -0.676680 2.660169 1.155078 1.836417 ... 0.803771 2.806482 3.646644 0.008963 5.400050 3.804687 4.651396 0.073775 0.168840 5
2 -1.712185 2.780299 3.217684 0.713956 0.838745 3.194167 -2.385246 3.304605 0.312159 0.852291 ... -1.875952 4.266674 3.311602 0.351215 7.241129 2.389160 -1.715098 1.386217 0.387089 5
3 -0.419863 3.237882 4.196849 0.854662 0.371440 3.626434 -0.722377 4.006264 0.795518 1.193159 ... 2.373420 5.088340 3.918420 0.855602 4.837012 3.568480 2.502341 1.444522 0.041461 6
4 -0.307260 0.258421 0.198612 0.415486 0.855955 0.366932 -1.100628 0.366125 0.374110 8.471863 ... -1.700884 0.870573 0.284465 -0.420926 -1.540604 0.632883 -0.794523 1.079984 0.369052 4
5 1.183370 0.334783 0.099776 0.135584 0.052050 -0.169885 1.143624 -0.065276 1.179131 3.918653 ... 1.015351 -1.304507 0.880431 0.184012 1.490661 1.363457 1.483631 -0.193233 0.949995 6
6 -0.244344 0.177226 0.148857 0.541161 0.955383 -0.035611 -0.312978 0.215212 0.274592 3.001541 ... -0.850271 1.416221 0.952724 0.881361 -0.589111 -0.506174 -3.405627 1.186127 0.442047 5
7 -0.758581 0.209856 -0.118122 0.756227 0.963111 2.267244 -1.959541 -0.130295 1.035297 5.330133 ... -2.358864 0.811680 -0.539444 0.058470 -0.112394 0.115939 -0.004706 1.010447 -0.294211 5
8 0.509738 0.796781 0.694178 1.159435 0.109525 -1.442529 0.167842 0.052686 0.576750 0.906169 ... -0.127120 -1.081137 0.636389 -0.620195 0.196229 -0.173579 0.287197 0.152346 0.567403 5
9 -1.105694 0.058315 0.257915 0.983759 0.067465 -1.020146 -1.000149 0.457775 0.161189 1.858575 ... -2.365750 0.224420 0.127513 -0.218428 -0.031064 -0.597352 -4.039884 0.540300 -0.214238 6
10 0.517795 0.657187 0.958631 0.959237 0.406489 0.863070 0.192019 1.013132 0.678814 3.641739 ... 0.724614 0.734983 0.864792 -0.310225 1.169010 1.584385 2.394734 0.682109 -0.037966 7
11 -0.216544 2.487724 3.757297 0.833667 0.246426 1.869204 -0.276657 4.246059 0.051775 3.213570 ... 0.239199 3.715593 3.536151 0.390983 5.636058 2.648358 -0.787942 0.261716 0.134986 5
12 -0.030823 1.669713 1.449701 0.308632 0.870826 0.965102 -1.088238 1.160680 0.292854 1.508543 ... -0.267321 1.008892 1.009107 0.013575 3.137530 1.605813 2.328965 -0.173506 0.119533 8
13 2.532512 0.663371 1.040911 0.532441 0.930141 -1.942990 2.524843 0.462912 0.277203 2.694953 ... 3.339025 -1.467694 0.363239 0.294848 0.799586 -0.071826 -1.295806 -0.092188 1.515673 7
14 -0.927300 2.290394 2.567119 -0.003484 0.587684 3.771613 -1.158052 3.286506 0.637932 1.149233 ... -0.688389 3.786359 3.192368 0.088963 1.101538 1.706938 -1.852492 0.988406 0.120376 6
15 -0.805252 -1.438636 -1.689252 0.608807 0.141400 0.898329 -1.093433 -1.280877 0.571205 2.088654 ... -2.408880 -0.528962 -1.275497 -0.249468 -2.296349 -1.150133 1.029836 1.282106 0.137737 5
16 3.707683 0.207071 0.947326 0.778530 0.536602 -0.319422 4.828039 0.798528 1.612137 5.169873 ... 6.808884 -0.867807 0.630836 2.098455 -0.823225 1.941429 6.819429 -0.218895 1.198769 7
17 0.549017 1.362632 1.238093 0.097687 0.413920 -1.666465 0.549350 1.630658 0.052506 5.341274 ... 0.923685 -0.179301 0.986284 -0.060968 2.808849 0.024984 -3.028244 -0.603218 0.089566 6
18 -1.256569 -2.564801 -2.550288 0.299483 0.820501 -0.709686 -0.681529 -2.672096 0.472935 1.443661 ... -2.808752 -2.298254 -2.942024 0.096998 -4.577862 -2.510457 2.660393 0.760032 -0.497542 5
19 -0.935267 1.413965 2.034552 0.161635 0.295060 3.336941 -2.246983 1.608296 0.469460 5.965373 ... -1.517290 3.610432 1.835860 0.299163 2.466863 0.821008 -0.571721 1.643549 0.692445 6
20 -0.678544 1.209503 2.059097 0.123954 0.808909 0.918322 -0.955396 1.369504 -0.071198 3.310036 ... 0.261020 1.919125 0.919221 0.179152 2.786442 0.826415 -0.812206 0.495839 0.094861 5
21 0.117119 1.616873 2.945610 0.803248 0.203891 0.532842 -0.183320 2.481310 0.361501 0.862744 ... 3.279540 2.511152 2.188161 -0.349228 -0.099568 3.831945 6.926805 0.011939 0.154438 5
22 2.156631 0.762914 0.756830 0.625000 0.406208 -2.475535 3.567960 0.954996 0.134723 3.315622 ... 3.434240 -0.950215 0.542298 1.159385 1.573037 0.415830 -1.968778 -0.136317 1.362478 6
23 0.118243 0.882021 0.305616 0.627140 0.882556 -1.322782 0.801350 0.289063 0.831394 0.951858 ... 0.952706 -0.904282 0.379914 -0.526693 -1.330874 0.319565 1.609707 -0.340182 0.282026 7
24 -1.777068 0.290613 0.681867 0.174307 0.750957 -1.773042 -2.131557 0.046237 0.394325 4.272851 ... -2.830815 -0.192334 -0.064730 -0.987951 -1.618146 -0.786654 -2.991774 0.589283 0.140826 4
25 -0.861646 2.328985 2.566253 0.690703 0.809659 3.626765 -0.835995 3.659660 0.519721 0.774169 ... -0.151479 4.178866 3.252275 0.814671 4.614573 2.509126 -1.425625 0.596027 0.418696 7
26 2.037610 -0.473717 -1.471609 0.762277 0.957321 -3.351200 1.802759 -1.565907 0.543127 3.446179 ... 0.910942 -2.667700 -1.001588 0.119780 -1.647659 -2.282807 -2.464412 -0.197085 1.139601 4
27 1.859263 -1.699892 -2.522410 0.484512 0.969631 2.367803 2.496390 -2.986239 0.572276 3.952370 ... 0.139259 -1.258217 -1.781532 1.921199 -4.517802 -2.037521 2.060177 1.528924 0.537580 7
28 0.713291 -1.411833 -1.958340 0.584091 0.138193 2.690837 1.085428 -2.659101 0.780953 1.850247 ... -0.287627 -0.207652 -1.391198 1.812283 -2.077882 -0.555998 3.625295 2.178251 0.108438 5
29 -0.131261 0.272915 1.232472 0.793575 0.021082 0.624774 -0.221999 0.672610 0.769102 0.853623 ... 0.172306 0.711089 0.874524 0.551561 4.382707 -0.113409 -2.353924 0.743680 0.779778 6
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5817 1.684336 0.148344 -0.638565 0.346559 0.262823 -1.177912 2.383119 -0.389892 0.746837 0.739020 ... 1.918008 -0.713744 0.193917 0.957180 -0.274454 0.007778 -0.517266 -0.298535 1.256392 6
5818 -1.339294 3.050986 4.393677 0.151229 0.520696 4.030173 -1.911230 4.799026 0.538913 1.296810 ... 0.166820 5.827812 4.308435 0.429215 5.217967 3.736739 -1.211057 1.116808 0.209065 7
5819 -0.775040 0.369025 -0.125675 0.406338 0.593824 0.468962 -1.622197 0.336658 0.520481 1.432850 ... -2.062937 1.211595 0.423135 0.173661 0.682381 0.177941 -0.534050 0.728148 -0.063169 6
5820 -0.969049 2.183934 3.320431 0.166064 0.289940 4.262753 -1.416369 3.461153 0.611477 0.533747 ... -1.328558 4.308086 3.342853 0.765311 3.356561 2.146287 -1.664236 1.089743 0.829043 6
5821 -0.549015 1.905231 2.171229 0.164107 0.256748 2.720944 -0.684559 2.581841 0.125723 2.223438 ... 1.666210 3.606771 2.610019 0.835962 4.155324 2.963773 3.502350 1.233858 0.992361 6
5822 -0.184794 3.401827 4.012294 0.800918 0.763852 5.524412 -1.069056 4.502148 1.504858 2.624384 ... 0.237726 5.634735 4.819634 0.408350 7.521802 4.243802 1.657415 1.061499 0.262839 6
5823 0.837030 0.362535 0.899495 -0.013025 0.066164 -0.943357 0.403683 -0.161115 0.249230 2.731013 ... 1.486147 -0.674847 0.033081 -0.275181 0.129740 0.462483 1.559184 -0.161609 0.038261 6
5824 0.553997 -1.417418 -2.868272 0.848740 0.436089 1.799074 0.665291 -2.862720 0.678211 1.837529 ... -2.008403 -1.268380 -1.966829 1.542900 -4.556147 -2.065627 0.527535 1.727122 0.591156 5
5825 3.573800 0.159316 0.220005 0.894024 0.044911 -3.571741 3.995029 0.275360 0.193808 1.766974 ... 4.606328 -1.505350 0.545157 1.146069 3.102784 -0.556956 -3.288948 -0.527583 1.023172 7
5826 -1.013447 -2.321277 -3.017300 0.333081 0.290490 2.441209 -1.583040 -3.613429 0.914697 2.074566 ... -3.155182 -1.892768 -3.156555 0.146704 -4.097368 -1.584924 4.986059 1.345938 -0.165180 5
5827 0.306293 0.913355 0.920927 0.928401 0.362644 0.859068 0.562698 0.804431 0.292529 4.605179 ... 1.242005 2.398811 0.928890 1.253982 3.264975 0.055238 -2.171233 1.175908 0.311707 5
5828 -0.668103 1.948764 2.007476 0.443412 0.446517 0.639765 -0.976233 2.694725 0.759375 6.267733 ... -0.399241 2.747313 2.283765 -0.225974 2.812130 1.361302 -1.749146 0.979406 -0.334729 6
5829 0.724978 3.218822 3.754466 0.096578 0.045546 1.982709 0.008561 4.284980 0.166749 7.317244 ... 2.158178 4.596267 3.876167 0.332410 3.243396 3.449588 1.030720 0.915898 0.995796 6
5830 -0.970513 0.752263 0.682730 0.731552 0.800870 0.183587 -1.139633 1.287314 -0.407099 2.622624 ... -0.680387 1.002067 0.640098 -0.069868 -1.363895 -0.784909 -2.967880 1.188553 0.746109 5
5831 3.853778 0.323418 -0.636761 0.564543 0.051277 6.295425 4.050108 -0.912915 1.350268 3.435931 ... 3.513643 1.584078 0.415694 4.256205 -1.530000 1.339207 4.001286 2.938228 1.932350 5
5832 -0.096259 2.470160 3.236854 0.349927 0.656872 0.968289 -0.254539 2.636414 0.188441 4.112849 ... 0.768548 2.392730 2.509947 -0.069369 2.103849 1.872789 -0.844271 0.348204 -0.240732 5
5833 -0.340376 -0.600902 -2.255388 0.667252 0.301831 2.615126 -0.671852 -1.725816 0.908508 1.598279 ... -2.356687 -0.104468 -1.342173 0.259757 -1.120634 -1.024434 -0.054126 1.700223 0.008373 4
5834 -0.507955 4.235877 4.756860 0.879236 0.004114 4.523185 -0.751009 5.430181 0.453104 2.154023 ... 1.927748 6.751035 4.520947 0.792219 5.435910 5.531239 2.973025 1.533935 0.012274 6
5835 2.317845 -0.094592 -0.251396 0.524351 0.253310 -0.756686 3.756340 0.346190 0.976396 1.309117 ... 3.156594 -0.757255 0.138546 1.589919 -1.284629 1.153767 2.314617 -0.137656 0.994792 7
5836 -0.593844 -2.465929 -2.725095 0.662868 0.077390 -0.039329 -1.218243 -2.512400 0.880912 3.538369 ... -2.166060 -1.527828 -2.954220 0.477999 -3.874563 -1.447480 2.658658 1.070658 0.016114 5
5837 2.896888 -0.468643 -0.597451 -0.060037 0.605553 -2.372631 3.662627 -0.258713 0.301636 0.469918 ... 3.485849 -2.369108 -0.926800 1.759057 -1.818055 -0.612234 0.185349 0.119998 1.193040 6
5838 -0.973390 0.846689 1.501724 0.749495 0.430618 2.397338 -2.322811 0.863082 0.148679 1.922318 ... -3.389113 2.885768 0.906865 -0.245648 -0.103736 -0.134827 -4.735280 1.091735 0.286391 7
5839 -0.970243 -1.451962 -2.427624 0.451720 0.451164 0.728259 -0.952172 -2.435741 0.097194 0.905717 ... -2.353980 -1.560815 -2.883206 -0.016729 -4.260586 -1.848093 1.653681 0.807726 0.135076 5
5840 -1.081001 -0.290023 -0.472889 0.662982 0.584349 -1.689980 -1.338542 0.321087 0.425478 6.158132 ... -2.709867 -0.594374 -0.429635 -0.499436 -1.682714 -1.671447 -4.122221 0.319736 -0.195001 6
5841 0.742556 2.129944 1.785111 0.586710 0.636589 -1.007158 1.727429 2.054153 0.916939 3.965481 ... 1.940891 1.026992 1.698250 0.183743 5.725875 1.926919 -1.491753 -0.188857 0.656961 6
5842 2.862729 1.558536 1.367980 0.151518 0.457180 -2.143215 2.679898 1.907546 0.103726 3.200558 ... 3.821308 -0.535265 1.849478 0.905898 2.039208 1.480508 -0.336158 -0.409510 0.621124 6
5843 0.299162 -0.072308 -0.738608 0.917164 0.497839 0.316393 0.709187 -0.398455 0.773037 2.731865 ... -1.170987 -0.113737 -0.078206 0.350289 -0.865735 -1.029711 -3.015142 1.001908 1.101463 6
5844 3.180215 0.056742 -0.012276 0.152967 0.458279 -1.674190 3.883508 -0.400242 0.502771 4.632986 ... 3.746248 -2.272361 0.038005 0.689289 0.593933 -0.256932 1.852634 -0.285590 0.917205 7
5845 1.337500 -0.824857 -0.434262 0.047159 0.656366 -3.105874 1.110251 -0.623407 -0.016263 4.013435 ... -0.517778 -2.311745 -0.442607 0.192987 -0.542765 -1.897293 -3.731458 -0.866768 0.282215 6
5846 1.953950 -1.253213 -1.125297 1.012160 0.229267 2.976473 1.813165 -1.079942 1.188837 5.262199 ... 2.206069 0.799553 -0.607373 1.879514 -1.655481 0.024347 4.908465 1.709886 1.196523 7

5847 rows × 224 columns

In [22]:
df.quality.value_counts()
Out[22]:
6    2544
5    1919
7     986
4     191
8     176
3      26
9       5
Name: quality, dtype: int64
In [25]:
df = df.drop(df[df.quality == 9 ].index)
df = df.drop(df[df.quality == 3 ].index)
df
Out[25]:
name__alcohol_chlorides_citric acid_residual sugar name__total sulfur dioxide_citric acid name__total sulfur dioxide_chlorides name__citric acid_chlorides elem_0 name__density_sulphates_fixed acidity_total sulfur dioxide name__citric acid_alcohol name__volatile acidity_residual sugar_citric acid_total sulfur dioxide name__residual sugar_sulphates elem1_0 ... name__total sulfur dioxide_volatile acidity_pH_alcohol name__density_total sulfur dioxide name__total sulfur dioxide_citric acid_residual sugar_sulphates name__density_residual sugar_alcohol_citric acid name__free sulfur dioxide_total sulfur dioxide name__total sulfur dioxide_sulphates_fixed acidity_pH name__citric acid_sulphates_fixed acidity_pH name__chlorides_fixed acidity_density_volatile acidity name__volatile acidity_alcohol_citric acid_residual sugar quality
0 0.227592 1.558518 1.664460 -0.027473 0.330031 0.425976 0.135259 1.938229 -0.438415 2.641077 ... 0.434843 1.469472 1.606509 0.237339 0.973398 0.594251 -2.123956 0.921411 0.855172 5
1 -0.045473 2.788140 2.877576 0.215871 0.066918 3.723036 -0.676680 2.660169 1.155078 1.836417 ... 0.803771 2.806482 3.646644 0.008963 5.400050 3.804687 4.651396 0.073775 0.168840 5
2 -1.712185 2.780299 3.217684 0.713956 0.838745 3.194167 -2.385246 3.304605 0.312159 0.852291 ... -1.875952 4.266674 3.311602 0.351215 7.241129 2.389160 -1.715098 1.386217 0.387089 5
3 -0.419863 3.237882 4.196849 0.854662 0.371440 3.626434 -0.722377 4.006264 0.795518 1.193159 ... 2.373420 5.088340 3.918420 0.855602 4.837012 3.568480 2.502341 1.444522 0.041461 6
4 -0.307260 0.258421 0.198612 0.415486 0.855955 0.366932 -1.100628 0.366125 0.374110 8.471863 ... -1.700884 0.870573 0.284465 -0.420926 -1.540604 0.632883 -0.794523 1.079984 0.369052 4
5 1.183370 0.334783 0.099776 0.135584 0.052050 -0.169885 1.143624 -0.065276 1.179131 3.918653 ... 1.015351 -1.304507 0.880431 0.184012 1.490661 1.363457 1.483631 -0.193233 0.949995 6
6 -0.244344 0.177226 0.148857 0.541161 0.955383 -0.035611 -0.312978 0.215212 0.274592 3.001541 ... -0.850271 1.416221 0.952724 0.881361 -0.589111 -0.506174 -3.405627 1.186127 0.442047 5
7 -0.758581 0.209856 -0.118122 0.756227 0.963111 2.267244 -1.959541 -0.130295 1.035297 5.330133 ... -2.358864 0.811680 -0.539444 0.058470 -0.112394 0.115939 -0.004706 1.010447 -0.294211 5
8 0.509738 0.796781 0.694178 1.159435 0.109525 -1.442529 0.167842 0.052686 0.576750 0.906169 ... -0.127120 -1.081137 0.636389 -0.620195 0.196229 -0.173579 0.287197 0.152346 0.567403 5
9 -1.105694 0.058315 0.257915 0.983759 0.067465 -1.020146 -1.000149 0.457775 0.161189 1.858575 ... -2.365750 0.224420 0.127513 -0.218428 -0.031064 -0.597352 -4.039884 0.540300 -0.214238 6
10 0.517795 0.657187 0.958631 0.959237 0.406489 0.863070 0.192019 1.013132 0.678814 3.641739 ... 0.724614 0.734983 0.864792 -0.310225 1.169010 1.584385 2.394734 0.682109 -0.037966 7
11 -0.216544 2.487724 3.757297 0.833667 0.246426 1.869204 -0.276657 4.246059 0.051775 3.213570 ... 0.239199 3.715593 3.536151 0.390983 5.636058 2.648358 -0.787942 0.261716 0.134986 5
12 -0.030823 1.669713 1.449701 0.308632 0.870826 0.965102 -1.088238 1.160680 0.292854 1.508543 ... -0.267321 1.008892 1.009107 0.013575 3.137530 1.605813 2.328965 -0.173506 0.119533 8
13 2.532512 0.663371 1.040911 0.532441 0.930141 -1.942990 2.524843 0.462912 0.277203 2.694953 ... 3.339025 -1.467694 0.363239 0.294848 0.799586 -0.071826 -1.295806 -0.092188 1.515673 7
14 -0.927300 2.290394 2.567119 -0.003484 0.587684 3.771613 -1.158052 3.286506 0.637932 1.149233 ... -0.688389 3.786359 3.192368 0.088963 1.101538 1.706938 -1.852492 0.988406 0.120376 6
15 -0.805252 -1.438636 -1.689252 0.608807 0.141400 0.898329 -1.093433 -1.280877 0.571205 2.088654 ... -2.408880 -0.528962 -1.275497 -0.249468 -2.296349 -1.150133 1.029836 1.282106 0.137737 5
16 3.707683 0.207071 0.947326 0.778530 0.536602 -0.319422 4.828039 0.798528 1.612137 5.169873 ... 6.808884 -0.867807 0.630836 2.098455 -0.823225 1.941429 6.819429 -0.218895 1.198769 7
17 0.549017 1.362632 1.238093 0.097687 0.413920 -1.666465 0.549350 1.630658 0.052506 5.341274 ... 0.923685 -0.179301 0.986284 -0.060968 2.808849 0.024984 -3.028244 -0.603218 0.089566 6
18 -1.256569 -2.564801 -2.550288 0.299483 0.820501 -0.709686 -0.681529 -2.672096 0.472935 1.443661 ... -2.808752 -2.298254 -2.942024 0.096998 -4.577862 -2.510457 2.660393 0.760032 -0.497542 5
19 -0.935267 1.413965 2.034552 0.161635 0.295060 3.336941 -2.246983 1.608296 0.469460 5.965373 ... -1.517290 3.610432 1.835860 0.299163 2.466863 0.821008 -0.571721 1.643549 0.692445 6
20 -0.678544 1.209503 2.059097 0.123954 0.808909 0.918322 -0.955396 1.369504 -0.071198 3.310036 ... 0.261020 1.919125 0.919221 0.179152 2.786442 0.826415 -0.812206 0.495839 0.094861 5
21 0.117119 1.616873 2.945610 0.803248 0.203891 0.532842 -0.183320 2.481310 0.361501 0.862744 ... 3.279540 2.511152 2.188161 -0.349228 -0.099568 3.831945 6.926805 0.011939 0.154438 5
22 2.156631 0.762914 0.756830 0.625000 0.406208 -2.475535 3.567960 0.954996 0.134723 3.315622 ... 3.434240 -0.950215 0.542298 1.159385 1.573037 0.415830 -1.968778 -0.136317 1.362478 6
23 0.118243 0.882021 0.305616 0.627140 0.882556 -1.322782 0.801350 0.289063 0.831394 0.951858 ... 0.952706 -0.904282 0.379914 -0.526693 -1.330874 0.319565 1.609707 -0.340182 0.282026 7
24 -1.777068 0.290613 0.681867 0.174307 0.750957 -1.773042 -2.131557 0.046237 0.394325 4.272851 ... -2.830815 -0.192334 -0.064730 -0.987951 -1.618146 -0.786654 -2.991774 0.589283 0.140826 4
25 -0.861646 2.328985 2.566253 0.690703 0.809659 3.626765 -0.835995 3.659660 0.519721 0.774169 ... -0.151479 4.178866 3.252275 0.814671 4.614573 2.509126 -1.425625 0.596027 0.418696 7
26 2.037610 -0.473717 -1.471609 0.762277 0.957321 -3.351200 1.802759 -1.565907 0.543127 3.446179 ... 0.910942 -2.667700 -1.001588 0.119780 -1.647659 -2.282807 -2.464412 -0.197085 1.139601 4
27 1.859263 -1.699892 -2.522410 0.484512 0.969631 2.367803 2.496390 -2.986239 0.572276 3.952370 ... 0.139259 -1.258217 -1.781532 1.921199 -4.517802 -2.037521 2.060177 1.528924 0.537580 7
28 0.713291 -1.411833 -1.958340 0.584091 0.138193 2.690837 1.085428 -2.659101 0.780953 1.850247 ... -0.287627 -0.207652 -1.391198 1.812283 -2.077882 -0.555998 3.625295 2.178251 0.108438 5
29 -0.131261 0.272915 1.232472 0.793575 0.021082 0.624774 -0.221999 0.672610 0.769102 0.853623 ... 0.172306 0.711089 0.874524 0.551561 4.382707 -0.113409 -2.353924 0.743680 0.779778 6
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5817 1.684336 0.148344 -0.638565 0.346559 0.262823 -1.177912 2.383119 -0.389892 0.746837 0.739020 ... 1.918008 -0.713744 0.193917 0.957180 -0.274454 0.007778 -0.517266 -0.298535 1.256392 6
5818 -1.339294 3.050986 4.393677 0.151229 0.520696 4.030173 -1.911230 4.799026 0.538913 1.296810 ... 0.166820 5.827812 4.308435 0.429215 5.217967 3.736739 -1.211057 1.116808 0.209065 7
5819 -0.775040 0.369025 -0.125675 0.406338 0.593824 0.468962 -1.622197 0.336658 0.520481 1.432850 ... -2.062937 1.211595 0.423135 0.173661 0.682381 0.177941 -0.534050 0.728148 -0.063169 6
5820 -0.969049 2.183934 3.320431 0.166064 0.289940 4.262753 -1.416369 3.461153 0.611477 0.533747 ... -1.328558 4.308086 3.342853 0.765311 3.356561 2.146287 -1.664236 1.089743 0.829043 6
5821 -0.549015 1.905231 2.171229 0.164107 0.256748 2.720944 -0.684559 2.581841 0.125723 2.223438 ... 1.666210 3.606771 2.610019 0.835962 4.155324 2.963773 3.502350 1.233858 0.992361 6
5822 -0.184794 3.401827 4.012294 0.800918 0.763852 5.524412 -1.069056 4.502148 1.504858 2.624384 ... 0.237726 5.634735 4.819634 0.408350 7.521802 4.243802 1.657415 1.061499 0.262839 6
5823 0.837030 0.362535 0.899495 -0.013025 0.066164 -0.943357 0.403683 -0.161115 0.249230 2.731013 ... 1.486147 -0.674847 0.033081 -0.275181 0.129740 0.462483 1.559184 -0.161609 0.038261 6
5824 0.553997 -1.417418 -2.868272 0.848740 0.436089 1.799074 0.665291 -2.862720 0.678211 1.837529 ... -2.008403 -1.268380 -1.966829 1.542900 -4.556147 -2.065627 0.527535 1.727122 0.591156 5
5825 3.573800 0.159316 0.220005 0.894024 0.044911 -3.571741 3.995029 0.275360 0.193808 1.766974 ... 4.606328 -1.505350 0.545157 1.146069 3.102784 -0.556956 -3.288948 -0.527583 1.023172 7
5826 -1.013447 -2.321277 -3.017300 0.333081 0.290490 2.441209 -1.583040 -3.613429 0.914697 2.074566 ... -3.155182 -1.892768 -3.156555 0.146704 -4.097368 -1.584924 4.986059 1.345938 -0.165180 5
5827 0.306293 0.913355 0.920927 0.928401 0.362644 0.859068 0.562698 0.804431 0.292529 4.605179 ... 1.242005 2.398811 0.928890 1.253982 3.264975 0.055238 -2.171233 1.175908 0.311707 5
5828 -0.668103 1.948764 2.007476 0.443412 0.446517 0.639765 -0.976233 2.694725 0.759375 6.267733 ... -0.399241 2.747313 2.283765 -0.225974 2.812130 1.361302 -1.749146 0.979406 -0.334729 6
5829 0.724978 3.218822 3.754466 0.096578 0.045546 1.982709 0.008561 4.284980 0.166749 7.317244 ... 2.158178 4.596267 3.876167 0.332410 3.243396 3.449588 1.030720 0.915898 0.995796 6
5830 -0.970513 0.752263 0.682730 0.731552 0.800870 0.183587 -1.139633 1.287314 -0.407099 2.622624 ... -0.680387 1.002067 0.640098 -0.069868 -1.363895 -0.784909 -2.967880 1.188553 0.746109 5
5831 3.853778 0.323418 -0.636761 0.564543 0.051277 6.295425 4.050108 -0.912915 1.350268 3.435931 ... 3.513643 1.584078 0.415694 4.256205 -1.530000 1.339207 4.001286 2.938228 1.932350 5
5832 -0.096259 2.470160 3.236854 0.349927 0.656872 0.968289 -0.254539 2.636414 0.188441 4.112849 ... 0.768548 2.392730 2.509947 -0.069369 2.103849 1.872789 -0.844271 0.348204 -0.240732 5
5833 -0.340376 -0.600902 -2.255388 0.667252 0.301831 2.615126 -0.671852 -1.725816 0.908508 1.598279 ... -2.356687 -0.104468 -1.342173 0.259757 -1.120634 -1.024434 -0.054126 1.700223 0.008373 4
5834 -0.507955 4.235877 4.756860 0.879236 0.004114 4.523185 -0.751009 5.430181 0.453104 2.154023 ... 1.927748 6.751035 4.520947 0.792219 5.435910 5.531239 2.973025 1.533935 0.012274 6
5835 2.317845 -0.094592 -0.251396 0.524351 0.253310 -0.756686 3.756340 0.346190 0.976396 1.309117 ... 3.156594 -0.757255 0.138546 1.589919 -1.284629 1.153767 2.314617 -0.137656 0.994792 7
5836 -0.593844 -2.465929 -2.725095 0.662868 0.077390 -0.039329 -1.218243 -2.512400 0.880912 3.538369 ... -2.166060 -1.527828 -2.954220 0.477999 -3.874563 -1.447480 2.658658 1.070658 0.016114 5
5837 2.896888 -0.468643 -0.597451 -0.060037 0.605553 -2.372631 3.662627 -0.258713 0.301636 0.469918 ... 3.485849 -2.369108 -0.926800 1.759057 -1.818055 -0.612234 0.185349 0.119998 1.193040 6
5838 -0.973390 0.846689 1.501724 0.749495 0.430618 2.397338 -2.322811 0.863082 0.148679 1.922318 ... -3.389113 2.885768 0.906865 -0.245648 -0.103736 -0.134827 -4.735280 1.091735 0.286391 7
5839 -0.970243 -1.451962 -2.427624 0.451720 0.451164 0.728259 -0.952172 -2.435741 0.097194 0.905717 ... -2.353980 -1.560815 -2.883206 -0.016729 -4.260586 -1.848093 1.653681 0.807726 0.135076 5
5840 -1.081001 -0.290023 -0.472889 0.662982 0.584349 -1.689980 -1.338542 0.321087 0.425478 6.158132 ... -2.709867 -0.594374 -0.429635 -0.499436 -1.682714 -1.671447 -4.122221 0.319736 -0.195001 6
5841 0.742556 2.129944 1.785111 0.586710 0.636589 -1.007158 1.727429 2.054153 0.916939 3.965481 ... 1.940891 1.026992 1.698250 0.183743 5.725875 1.926919 -1.491753 -0.188857 0.656961 6
5842 2.862729 1.558536 1.367980 0.151518 0.457180 -2.143215 2.679898 1.907546 0.103726 3.200558 ... 3.821308 -0.535265 1.849478 0.905898 2.039208 1.480508 -0.336158 -0.409510 0.621124 6
5843 0.299162 -0.072308 -0.738608 0.917164 0.497839 0.316393 0.709187 -0.398455 0.773037 2.731865 ... -1.170987 -0.113737 -0.078206 0.350289 -0.865735 -1.029711 -3.015142 1.001908 1.101463 6
5844 3.180215 0.056742 -0.012276 0.152967 0.458279 -1.674190 3.883508 -0.400242 0.502771 4.632986 ... 3.746248 -2.272361 0.038005 0.689289 0.593933 -0.256932 1.852634 -0.285590 0.917205 7
5845 1.337500 -0.824857 -0.434262 0.047159 0.656366 -3.105874 1.110251 -0.623407 -0.016263 4.013435 ... -0.517778 -2.311745 -0.442607 0.192987 -0.542765 -1.897293 -3.731458 -0.866768 0.282215 6
5846 1.953950 -1.253213 -1.125297 1.012160 0.229267 2.976473 1.813165 -1.079942 1.188837 5.262199 ... 2.206069 0.799553 -0.607373 1.879514 -1.655481 0.024347 4.908465 1.709886 1.196523 7

5816 rows × 224 columns

In [26]:
df.quality.value_counts()
Out[26]:
6    2544
5    1919
7     986
4     191
8     176
Name: quality, dtype: int64
In [29]:
(df[targetColumn].apply(lambda x : x-4)).value_counts()
Out[29]:
2    2544
1    1919
3     986
0     191
4     176
Name: quality, dtype: int64
In [30]:
df[targetColumn] = df[targetColumn].apply(lambda x : x-4)
df
Out[30]:
name__alcohol_chlorides_citric acid_residual sugar name__total sulfur dioxide_citric acid name__total sulfur dioxide_chlorides name__citric acid_chlorides elem_0 name__density_sulphates_fixed acidity_total sulfur dioxide name__citric acid_alcohol name__volatile acidity_residual sugar_citric acid_total sulfur dioxide name__residual sugar_sulphates elem1_0 ... name__total sulfur dioxide_volatile acidity_pH_alcohol name__density_total sulfur dioxide name__total sulfur dioxide_citric acid_residual sugar_sulphates name__density_residual sugar_alcohol_citric acid name__free sulfur dioxide_total sulfur dioxide name__total sulfur dioxide_sulphates_fixed acidity_pH name__citric acid_sulphates_fixed acidity_pH name__chlorides_fixed acidity_density_volatile acidity name__volatile acidity_alcohol_citric acid_residual sugar quality
0 0.227592 1.558518 1.664460 -0.027473 0.330031 0.425976 0.135259 1.938229 -0.438415 2.641077 ... 0.434843 1.469472 1.606509 0.237339 0.973398 0.594251 -2.123956 0.921411 0.855172 1
1 -0.045473 2.788140 2.877576 0.215871 0.066918 3.723036 -0.676680 2.660169 1.155078 1.836417 ... 0.803771 2.806482 3.646644 0.008963 5.400050 3.804687 4.651396 0.073775 0.168840 1
2 -1.712185 2.780299 3.217684 0.713956 0.838745 3.194167 -2.385246 3.304605 0.312159 0.852291 ... -1.875952 4.266674 3.311602 0.351215 7.241129 2.389160 -1.715098 1.386217 0.387089 1
3 -0.419863 3.237882 4.196849 0.854662 0.371440 3.626434 -0.722377 4.006264 0.795518 1.193159 ... 2.373420 5.088340 3.918420 0.855602 4.837012 3.568480 2.502341 1.444522 0.041461 2
4 -0.307260 0.258421 0.198612 0.415486 0.855955 0.366932 -1.100628 0.366125 0.374110 8.471863 ... -1.700884 0.870573 0.284465 -0.420926 -1.540604 0.632883 -0.794523 1.079984 0.369052 0
5 1.183370 0.334783 0.099776 0.135584 0.052050 -0.169885 1.143624 -0.065276 1.179131 3.918653 ... 1.015351 -1.304507 0.880431 0.184012 1.490661 1.363457 1.483631 -0.193233 0.949995 2
6 -0.244344 0.177226 0.148857 0.541161 0.955383 -0.035611 -0.312978 0.215212 0.274592 3.001541 ... -0.850271 1.416221 0.952724 0.881361 -0.589111 -0.506174 -3.405627 1.186127 0.442047 1
7 -0.758581 0.209856 -0.118122 0.756227 0.963111 2.267244 -1.959541 -0.130295 1.035297 5.330133 ... -2.358864 0.811680 -0.539444 0.058470 -0.112394 0.115939 -0.004706 1.010447 -0.294211 1
8 0.509738 0.796781 0.694178 1.159435 0.109525 -1.442529 0.167842 0.052686 0.576750 0.906169 ... -0.127120 -1.081137 0.636389 -0.620195 0.196229 -0.173579 0.287197 0.152346 0.567403 1
9 -1.105694 0.058315 0.257915 0.983759 0.067465 -1.020146 -1.000149 0.457775 0.161189 1.858575 ... -2.365750 0.224420 0.127513 -0.218428 -0.031064 -0.597352 -4.039884 0.540300 -0.214238 2
10 0.517795 0.657187 0.958631 0.959237 0.406489 0.863070 0.192019 1.013132 0.678814 3.641739 ... 0.724614 0.734983 0.864792 -0.310225 1.169010 1.584385 2.394734 0.682109 -0.037966 3
11 -0.216544 2.487724 3.757297 0.833667 0.246426 1.869204 -0.276657 4.246059 0.051775 3.213570 ... 0.239199 3.715593 3.536151 0.390983 5.636058 2.648358 -0.787942 0.261716 0.134986 1
12 -0.030823 1.669713 1.449701 0.308632 0.870826 0.965102 -1.088238 1.160680 0.292854 1.508543 ... -0.267321 1.008892 1.009107 0.013575 3.137530 1.605813 2.328965 -0.173506 0.119533 4
13 2.532512 0.663371 1.040911 0.532441 0.930141 -1.942990 2.524843 0.462912 0.277203 2.694953 ... 3.339025 -1.467694 0.363239 0.294848 0.799586 -0.071826 -1.295806 -0.092188 1.515673 3
14 -0.927300 2.290394 2.567119 -0.003484 0.587684 3.771613 -1.158052 3.286506 0.637932 1.149233 ... -0.688389 3.786359 3.192368 0.088963 1.101538 1.706938 -1.852492 0.988406 0.120376 2
15 -0.805252 -1.438636 -1.689252 0.608807 0.141400 0.898329 -1.093433 -1.280877 0.571205 2.088654 ... -2.408880 -0.528962 -1.275497 -0.249468 -2.296349 -1.150133 1.029836 1.282106 0.137737 1
16 3.707683 0.207071 0.947326 0.778530 0.536602 -0.319422 4.828039 0.798528 1.612137 5.169873 ... 6.808884 -0.867807 0.630836 2.098455 -0.823225 1.941429 6.819429 -0.218895 1.198769 3
17 0.549017 1.362632 1.238093 0.097687 0.413920 -1.666465 0.549350 1.630658 0.052506 5.341274 ... 0.923685 -0.179301 0.986284 -0.060968 2.808849 0.024984 -3.028244 -0.603218 0.089566 2
18 -1.256569 -2.564801 -2.550288 0.299483 0.820501 -0.709686 -0.681529 -2.672096 0.472935 1.443661 ... -2.808752 -2.298254 -2.942024 0.096998 -4.577862 -2.510457 2.660393 0.760032 -0.497542 1
19 -0.935267 1.413965 2.034552 0.161635 0.295060 3.336941 -2.246983 1.608296 0.469460 5.965373 ... -1.517290 3.610432 1.835860 0.299163 2.466863 0.821008 -0.571721 1.643549 0.692445 2
20 -0.678544 1.209503 2.059097 0.123954 0.808909 0.918322 -0.955396 1.369504 -0.071198 3.310036 ... 0.261020 1.919125 0.919221 0.179152 2.786442 0.826415 -0.812206 0.495839 0.094861 1
21 0.117119 1.616873 2.945610 0.803248 0.203891 0.532842 -0.183320 2.481310 0.361501 0.862744 ... 3.279540 2.511152 2.188161 -0.349228 -0.099568 3.831945 6.926805 0.011939 0.154438 1
22 2.156631 0.762914 0.756830 0.625000 0.406208 -2.475535 3.567960 0.954996 0.134723 3.315622 ... 3.434240 -0.950215 0.542298 1.159385 1.573037 0.415830 -1.968778 -0.136317 1.362478 2
23 0.118243 0.882021 0.305616 0.627140 0.882556 -1.322782 0.801350 0.289063 0.831394 0.951858 ... 0.952706 -0.904282 0.379914 -0.526693 -1.330874 0.319565 1.609707 -0.340182 0.282026 3
24 -1.777068 0.290613 0.681867 0.174307 0.750957 -1.773042 -2.131557 0.046237 0.394325 4.272851 ... -2.830815 -0.192334 -0.064730 -0.987951 -1.618146 -0.786654 -2.991774 0.589283 0.140826 0
25 -0.861646 2.328985 2.566253 0.690703 0.809659 3.626765 -0.835995 3.659660 0.519721 0.774169 ... -0.151479 4.178866 3.252275 0.814671 4.614573 2.509126 -1.425625 0.596027 0.418696 3
26 2.037610 -0.473717 -1.471609 0.762277 0.957321 -3.351200 1.802759 -1.565907 0.543127 3.446179 ... 0.910942 -2.667700 -1.001588 0.119780 -1.647659 -2.282807 -2.464412 -0.197085 1.139601 0
27 1.859263 -1.699892 -2.522410 0.484512 0.969631 2.367803 2.496390 -2.986239 0.572276 3.952370 ... 0.139259 -1.258217 -1.781532 1.921199 -4.517802 -2.037521 2.060177 1.528924 0.537580 3
28 0.713291 -1.411833 -1.958340 0.584091 0.138193 2.690837 1.085428 -2.659101 0.780953 1.850247 ... -0.287627 -0.207652 -1.391198 1.812283 -2.077882 -0.555998 3.625295 2.178251 0.108438 1
29 -0.131261 0.272915 1.232472 0.793575 0.021082 0.624774 -0.221999 0.672610 0.769102 0.853623 ... 0.172306 0.711089 0.874524 0.551561 4.382707 -0.113409 -2.353924 0.743680 0.779778 2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5817 1.684336 0.148344 -0.638565 0.346559 0.262823 -1.177912 2.383119 -0.389892 0.746837 0.739020 ... 1.918008 -0.713744 0.193917 0.957180 -0.274454 0.007778 -0.517266 -0.298535 1.256392 2
5818 -1.339294 3.050986 4.393677 0.151229 0.520696 4.030173 -1.911230 4.799026 0.538913 1.296810 ... 0.166820 5.827812 4.308435 0.429215 5.217967 3.736739 -1.211057 1.116808 0.209065 3
5819 -0.775040 0.369025 -0.125675 0.406338 0.593824 0.468962 -1.622197 0.336658 0.520481 1.432850 ... -2.062937 1.211595 0.423135 0.173661 0.682381 0.177941 -0.534050 0.728148 -0.063169 2
5820 -0.969049 2.183934 3.320431 0.166064 0.289940 4.262753 -1.416369 3.461153 0.611477 0.533747 ... -1.328558 4.308086 3.342853 0.765311 3.356561 2.146287 -1.664236 1.089743 0.829043 2
5821 -0.549015 1.905231 2.171229 0.164107 0.256748 2.720944 -0.684559 2.581841 0.125723 2.223438 ... 1.666210 3.606771 2.610019 0.835962 4.155324 2.963773 3.502350 1.233858 0.992361 2
5822 -0.184794 3.401827 4.012294 0.800918 0.763852 5.524412 -1.069056 4.502148 1.504858 2.624384 ... 0.237726 5.634735 4.819634 0.408350 7.521802 4.243802 1.657415 1.061499 0.262839 2
5823 0.837030 0.362535 0.899495 -0.013025 0.066164 -0.943357 0.403683 -0.161115 0.249230 2.731013 ... 1.486147 -0.674847 0.033081 -0.275181 0.129740 0.462483 1.559184 -0.161609 0.038261 2
5824 0.553997 -1.417418 -2.868272 0.848740 0.436089 1.799074 0.665291 -2.862720 0.678211 1.837529 ... -2.008403 -1.268380 -1.966829 1.542900 -4.556147 -2.065627 0.527535 1.727122 0.591156 1
5825 3.573800 0.159316 0.220005 0.894024 0.044911 -3.571741 3.995029 0.275360 0.193808 1.766974 ... 4.606328 -1.505350 0.545157 1.146069 3.102784 -0.556956 -3.288948 -0.527583 1.023172 3
5826 -1.013447 -2.321277 -3.017300 0.333081 0.290490 2.441209 -1.583040 -3.613429 0.914697 2.074566 ... -3.155182 -1.892768 -3.156555 0.146704 -4.097368 -1.584924 4.986059 1.345938 -0.165180 1
5827 0.306293 0.913355 0.920927 0.928401 0.362644 0.859068 0.562698 0.804431 0.292529 4.605179 ... 1.242005 2.398811 0.928890 1.253982 3.264975 0.055238 -2.171233 1.175908 0.311707 1
5828 -0.668103 1.948764 2.007476 0.443412 0.446517 0.639765 -0.976233 2.694725 0.759375 6.267733 ... -0.399241 2.747313 2.283765 -0.225974 2.812130 1.361302 -1.749146 0.979406 -0.334729 2
5829 0.724978 3.218822 3.754466 0.096578 0.045546 1.982709 0.008561 4.284980 0.166749 7.317244 ... 2.158178 4.596267 3.876167 0.332410 3.243396 3.449588 1.030720 0.915898 0.995796 2
5830 -0.970513 0.752263 0.682730 0.731552 0.800870 0.183587 -1.139633 1.287314 -0.407099 2.622624 ... -0.680387 1.002067 0.640098 -0.069868 -1.363895 -0.784909 -2.967880 1.188553 0.746109 1
5831 3.853778 0.323418 -0.636761 0.564543 0.051277 6.295425 4.050108 -0.912915 1.350268 3.435931 ... 3.513643 1.584078 0.415694 4.256205 -1.530000 1.339207 4.001286 2.938228 1.932350 1
5832 -0.096259 2.470160 3.236854 0.349927 0.656872 0.968289 -0.254539 2.636414 0.188441 4.112849 ... 0.768548 2.392730 2.509947 -0.069369 2.103849 1.872789 -0.844271 0.348204 -0.240732 1
5833 -0.340376 -0.600902 -2.255388 0.667252 0.301831 2.615126 -0.671852 -1.725816 0.908508 1.598279 ... -2.356687 -0.104468 -1.342173 0.259757 -1.120634 -1.024434 -0.054126 1.700223 0.008373 0
5834 -0.507955 4.235877 4.756860 0.879236 0.004114 4.523185 -0.751009 5.430181 0.453104 2.154023 ... 1.927748 6.751035 4.520947 0.792219 5.435910 5.531239 2.973025 1.533935 0.012274 2
5835 2.317845 -0.094592 -0.251396 0.524351 0.253310 -0.756686 3.756340 0.346190 0.976396 1.309117 ... 3.156594 -0.757255 0.138546 1.589919 -1.284629 1.153767 2.314617 -0.137656 0.994792 3
5836 -0.593844 -2.465929 -2.725095 0.662868 0.077390 -0.039329 -1.218243 -2.512400 0.880912 3.538369 ... -2.166060 -1.527828 -2.954220 0.477999 -3.874563 -1.447480 2.658658 1.070658 0.016114 1
5837 2.896888 -0.468643 -0.597451 -0.060037 0.605553 -2.372631 3.662627 -0.258713 0.301636 0.469918 ... 3.485849 -2.369108 -0.926800 1.759057 -1.818055 -0.612234 0.185349 0.119998 1.193040 2
5838 -0.973390 0.846689 1.501724 0.749495 0.430618 2.397338 -2.322811 0.863082 0.148679 1.922318 ... -3.389113 2.885768 0.906865 -0.245648 -0.103736 -0.134827 -4.735280 1.091735 0.286391 3
5839 -0.970243 -1.451962 -2.427624 0.451720 0.451164 0.728259 -0.952172 -2.435741 0.097194 0.905717 ... -2.353980 -1.560815 -2.883206 -0.016729 -4.260586 -1.848093 1.653681 0.807726 0.135076 1
5840 -1.081001 -0.290023 -0.472889 0.662982 0.584349 -1.689980 -1.338542 0.321087 0.425478 6.158132 ... -2.709867 -0.594374 -0.429635 -0.499436 -1.682714 -1.671447 -4.122221 0.319736 -0.195001 2
5841 0.742556 2.129944 1.785111 0.586710 0.636589 -1.007158 1.727429 2.054153 0.916939 3.965481 ... 1.940891 1.026992 1.698250 0.183743 5.725875 1.926919 -1.491753 -0.188857 0.656961 2
5842 2.862729 1.558536 1.367980 0.151518 0.457180 -2.143215 2.679898 1.907546 0.103726 3.200558 ... 3.821308 -0.535265 1.849478 0.905898 2.039208 1.480508 -0.336158 -0.409510 0.621124 2
5843 0.299162 -0.072308 -0.738608 0.917164 0.497839 0.316393 0.709187 -0.398455 0.773037 2.731865 ... -1.170987 -0.113737 -0.078206 0.350289 -0.865735 -1.029711 -3.015142 1.001908 1.101463 2
5844 3.180215 0.056742 -0.012276 0.152967 0.458279 -1.674190 3.883508 -0.400242 0.502771 4.632986 ... 3.746248 -2.272361 0.038005 0.689289 0.593933 -0.256932 1.852634 -0.285590 0.917205 3
5845 1.337500 -0.824857 -0.434262 0.047159 0.656366 -3.105874 1.110251 -0.623407 -0.016263 4.013435 ... -0.517778 -2.311745 -0.442607 0.192987 -0.542765 -1.897293 -3.731458 -0.866768 0.282215 2
5846 1.953950 -1.253213 -1.125297 1.012160 0.229267 2.976473 1.813165 -1.079942 1.188837 5.262199 ... 2.206069 0.799553 -0.607373 1.879514 -1.655481 0.024347 4.908465 1.709886 1.196523 3

5816 rows × 224 columns

In [31]:
df.quality.value_counts()
Out[31]:
2    2544
1    1919
3     986
0     191
4     176
Name: quality, dtype: int64
In [32]:
print("Final testing of accuracy")
get_accuracy_rate(df)
Final testing of accuracy
Accuracy: [ 0.55670103  0.57523646  0.58297506  0.57695615  0.58125537]
Out[32]:
0.57479427242615233
In [33]:
# Ну а теперь сохраняем датафрейм
df.to_csv('res.csv', sep=',', index=False, header=False)
In [34]:
df['quality'].unique()
Out[34]:
array([1, 2, 0, 3, 4], dtype=int64)
In [35]:
from sklearn.model_selection import train_test_split
In [36]:
targetColumn = 'quality'
    
FeatureColumns = df.columns.tolist()
FeatureColumns.remove(targetColumn)
    
    # Разделяем на X и y
X = df[FeatureColumns].values
y = df[targetColumn].values
In [37]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.4, random_state=42) # ;)
In [38]:
# Random Forests с 25 estimator'ами
rf_clf = RandomForestClassifier(n_estimators=25)
    # Задаем кросс-валидацию на 5 фолдов
kf = KFold(n_splits=5, shuffle=True)
    # Считаем верность (accuracy) для каждого из фолдов
print("Accuracy: {0}".format(cross_val_score(rf_clf, X_train, y_train, scoring='accuracy', cv=kf.split(X_train,y_train))))
np.mean(cross_val_score(rf_clf, X_train, y_train, scoring='accuracy', cv=kf.split(X_train,y_train)))

from sklearn.metrics import accuracy_score
rf_clf.fit(X_train, y_train)
print("Accuracy: {0}".format(accuracy_score(y_test, rf_clf.predict(X_test))))
Accuracy: [ 0.55873926  0.53295129  0.55014327  0.51002865  0.56241033]
Accuracy: 0.5457670820799312
In [39]:
import numpy as np
np.savetxt("x_train.csv", X_train, fmt='%10.8f',delimiter=";")
np.savetxt("x_test.csv", X_test, fmt='%10.8f', delimiter=";")

np.savetxt("y_train.csv", y_train, fmt='%d', delimiter=";")
np.savetxt("y_test.csv", y_test, fmt='%d', delimiter=";")
In [40]:
X_train.shape
Out[40]:
(3489, 223)
In [ ]: