# pip install --pre pycaret
# !pip install -U pycaret -q
Google Colab 사용자의 경우 다음의 코드를 실행합니다.
from pycaret.utils import enable_colab
enable_colab()
샘플 데이터셋 로드
from pycaret.datasets import get_data
dataset = get_data('juice')
| 0 |
1 |
CH |
237 |
1 |
1.75 |
1.99 |
0.00 |
0.0 |
0 |
0 |
0.500000 |
1.99 |
1.75 |
0.24 |
No |
0.000000 |
0.000000 |
0.24 |
1 |
| 1 |
2 |
CH |
239 |
1 |
1.75 |
1.99 |
0.00 |
0.3 |
0 |
1 |
0.600000 |
1.69 |
1.75 |
-0.06 |
No |
0.150754 |
0.000000 |
0.24 |
1 |
| 2 |
3 |
CH |
245 |
1 |
1.86 |
2.09 |
0.17 |
0.0 |
0 |
0 |
0.680000 |
2.09 |
1.69 |
0.40 |
No |
0.000000 |
0.091398 |
0.23 |
1 |
| 3 |
4 |
MM |
227 |
1 |
1.69 |
1.69 |
0.00 |
0.0 |
0 |
0 |
0.400000 |
1.69 |
1.69 |
0.00 |
No |
0.000000 |
0.000000 |
0.00 |
1 |
| 4 |
5 |
CH |
228 |
7 |
1.69 |
1.69 |
0.00 |
0.0 |
0 |
0 |
0.956535 |
1.69 |
1.69 |
0.00 |
Yes |
0.000000 |
0.000000 |
0.00 |
0 |
dataset['STORE'].value_counts()
0 356
2 222
3 196
1 157
4 139
Name: STORE, dtype: int64
import pandas as pd
dataset = pd.read_excel('엑셀파일')
머신러닝 파이프라인 진행
from pycaret.classification import *
s = setup(data=dataset, target='STORE', session_id=123)
best = compare_models(n_select=5)
best = blend_models(best)
predictions = predict_model(best)
save_model(best, 'my-model')
| 0 |
Session id |
123 |
| 1 |
Target |
STORE |
| 2 |
Target type |
Multiclass |
| 3 |
Original data shape |
(1070, 19) |
| 4 |
Transformed data shape |
(1070, 19) |
| 5 |
Transformed train set shape |
(748, 19) |
| 6 |
Transformed test set shape |
(322, 19) |
| 7 |
Ordinal features |
2 |
| 8 |
Numeric features |
16 |
| 9 |
Categorical features |
2 |
| 10 |
Preprocess |
True |
| 11 |
Imputation type |
simple |
| 12 |
Numeric imputation |
mean |
| 13 |
Categorical imputation |
constant |
| 14 |
Maximum one-hot encoding |
5 |
| 15 |
Encoding method |
None |
| 16 |
Low variance threshold |
0 |
| 17 |
Fold Generator |
StratifiedKFold |
| 18 |
Fold Number |
10 |
| 19 |
CPU Jobs |
-1 |
| 20 |
Use GPU |
False |
| 21 |
Log Experiment |
False |
| 22 |
Experiment Name |
clf-default-name |
| 23 |
USI |
8a4d |
| nb |
Naive Bayes |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
0.0160 |
| dt |
Decision Tree Classifier |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
0.0170 |
| qda |
Quadratic Discriminant Analysis |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
0.0170 |
| gbc |
Gradient Boosting Classifier |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
0.0870 |
| lightgbm |
Light Gradient Boosting Machine |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
0.1000 |
| rf |
Random Forest Classifier |
0.9986 |
1.0000 |
0.9986 |
0.9987 |
0.9986 |
0.9983 |
0.9983 |
0.0480 |
| et |
Extra Trees Classifier |
0.9973 |
1.0000 |
0.9973 |
0.9975 |
0.9973 |
0.9966 |
0.9966 |
0.0430 |
| lr |
Logistic Regression |
0.9933 |
0.9999 |
0.9933 |
0.9940 |
0.9933 |
0.9914 |
0.9915 |
0.0450 |
| ridge |
Ridge Classifier |
0.8463 |
0.0000 |
0.8463 |
0.8650 |
0.8434 |
0.8027 |
0.8078 |
0.0140 |
| ada |
Ada Boost Classifier |
0.6872 |
0.8923 |
0.6872 |
0.5626 |
0.5981 |
0.5883 |
0.6574 |
0.0270 |
| knn |
K Neighbors Classifier |
0.6310 |
0.8453 |
0.6310 |
0.6437 |
0.6213 |
0.5166 |
0.5235 |
0.0310 |
| lda |
Linear Discriminant Analysis |
0.5828 |
0.8579 |
0.5828 |
0.5879 |
0.5774 |
0.4563 |
0.4598 |
0.0170 |
| svm |
SVM - Linear Kernel |
0.4089 |
0.0000 |
0.4089 |
0.3794 |
0.3187 |
0.2527 |
0.3159 |
0.0170 |
| dummy |
Dummy Classifier |
0.3329 |
0.5000 |
0.3329 |
0.1108 |
0.1663 |
0.0000 |
0.0000 |
0.0160 |
| Fold |
|
|
|
|
|
|
|
| 0 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
| 1 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
| 2 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
| 3 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
| 4 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
| 5 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
| 6 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
| 7 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
| 8 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
| 9 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
| Mean |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
| Std |
0.0000 |
0.0000 |
0.0000 |
0.0000 |
0.0000 |
0.0000 |
0.0000 |
| 0 |
Voting Classifier |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
1.0000 |
Transformation Pipeline and Model Successfully Saved
(Pipeline(memory=Memory(location=C:\Users\TEDDY-~1\AppData\Local\Temp\joblib),
steps=[('numerical_imputer',
TransformerWrapper(exclude=None,
include=['Id', 'WeekofPurchase', 'StoreID',
'PriceCH', 'PriceMM', 'DiscCH',
'DiscMM', 'SpecialCH', 'SpecialMM',
'LoyalCH', 'SalePriceMM',
'SalePriceCH', 'PriceDiff',
'PctDiscMM', 'PctDiscCH',
'ListPriceDiff'],
transformer=SimpleI...
learning_rate=0.1,
max_depth=-1,
min_child_samples=20,
min_child_weight=0.001,
min_split_gain=0.0,
n_estimators=100,
n_jobs=-1,
num_leaves=31,
objective=None,
random_state=123,
reg_alpha=0.0,
reg_lambda=0.0,
silent='warn',
subsample=1.0,
subsample_for_bin=200000,
subsample_freq=0))],
flatten_transform=True, n_jobs=-1,
verbose=False, voting='soft',
weights=None)]],
verbose=False),
'my-model.pkl')
predictions[['STORE', 'Label', 'Score']]
| 748 |
3 |
3 |
1.0 |
| 749 |
3 |
3 |
1.0 |
| 750 |
1 |
1 |
1.0 |
| 751 |
0 |
0 |
1.0 |
| 752 |
4 |
4 |
1.0 |
| ... |
... |
... |
... |
| 1065 |
0 |
0 |
1.0 |
| 1066 |
3 |
3 |
1.0 |
| 1067 |
2 |
2 |
1.0 |
| 1068 |
1 |
1 |
1.0 |
| 1069 |
4 |
4 |
1.0 |
322 rows × 3 columns