PyCaret 설치

# pip install --pre pycaret
# !pip install -U pycaret -q

Google Colab 사용자의 경우 다음의 코드를 실행합니다.

from pycaret.utils import enable_colab

enable_colab()

샘플 데이터셋 로드

from pycaret.datasets import get_data

dataset = get_data('juice')

	Id	Purchase	WeekofPurchase	StoreID	PriceCH	PriceMM	DiscCH	DiscMM	SpecialMM	LoyalCH	SalePriceMM	SalePriceCH	PriceDiff	Store7	PctDiscMM	PctDiscCH	ListPriceDiff	STORE
0	1	CH	237	1	1.75	1.99	0.00	0.0	0	0.500000	1.99	1.75	0.24	No	0.000000	0.000000	0.24	1
1	2	CH	239	1	1.75	1.99	0.00	0.3	1	0.600000	1.69	1.75	-0.06	No	0.150754	0.000000	0.24	1
2	3	CH	245	1	1.86	2.09	0.17	0.0	0	0.680000	2.09	1.69	0.40	No	0.000000	0.091398	0.23	1
3	4	MM	227	1	1.69	1.69	0.00	0.0	0	0.400000	1.69	1.69	0.00	No	0.000000	0.000000	0.00	1
4	5	CH	228	7	1.69	1.69	0.00	0.0	0	0.956535	1.69	1.69	0.00	Yes	0.000000	0.000000	0.00	0

dataset['STORE'].value_counts()

0    356
2    222
3    196
1    157
4    139
Name: STORE, dtype: int64

import pandas as pd

dataset = pd.read_excel('엑셀파일')

머신러닝 파이프라인 진행

from pycaret.classification import *

s = setup(data=dataset, target='STORE', session_id=123) 

best = compare_models(n_select=5)
best = blend_models(best)

predictions = predict_model(best)

save_model(best, 'my-model')

	Description	Value
0	Session id	123
1	Target	STORE
2	Target type	Multiclass
3	Original data shape	(1070, 19)
4	Transformed data shape	(1070, 19)
5	Transformed train set shape	(748, 19)
6	Transformed test set shape	(322, 19)
7	Ordinal features	2
8	Numeric features	16
9	Categorical features	2
10	Preprocess	True
11	Imputation type	simple
12	Numeric imputation	mean
13	Categorical imputation	constant
14	Maximum one-hot encoding	5
15	Encoding method	None
16	Low variance threshold	0
17	Fold Generator	StratifiedKFold
18	Fold Number	10
19	CPU Jobs	-1
20	Use GPU	False
21	Log Experiment	False
22	Experiment Name	clf-default-name
23	USI	8a4d

	Model	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC	TT (Sec)
nb	Naive Bayes	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	0.0160
dt	Decision Tree Classifier	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	0.0170
qda	Quadratic Discriminant Analysis	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	0.0170
gbc	Gradient Boosting Classifier	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	0.0870
lightgbm	Light Gradient Boosting Machine	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	0.1000
rf	Random Forest Classifier	0.9986	1.0000	0.9986	0.9987	0.9986	0.9983	0.9983	0.0480
et	Extra Trees Classifier	0.9973	1.0000	0.9973	0.9975	0.9973	0.9966	0.9966	0.0430
lr	Logistic Regression	0.9933	0.9999	0.9933	0.9940	0.9933	0.9914	0.9915	0.0450
ridge	Ridge Classifier	0.8463	0.0000	0.8463	0.8650	0.8434	0.8027	0.8078	0.0140
ada	Ada Boost Classifier	0.6872	0.8923	0.6872	0.5626	0.5981	0.5883	0.6574	0.0270
knn	K Neighbors Classifier	0.6310	0.8453	0.6310	0.6437	0.6213	0.5166	0.5235	0.0310
lda	Linear Discriminant Analysis	0.5828	0.8579	0.5828	0.5879	0.5774	0.4563	0.4598	0.0170
svm	SVM - Linear Kernel	0.4089	0.0000	0.4089	0.3794	0.3187	0.2527	0.3159	0.0170
dummy	Dummy Classifier	0.3329	0.5000	0.3329	0.1108	0.1663	0.0000	0.0000	0.0160

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Fold
0	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
1	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
2	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
3	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
4	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
5	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
6	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
7	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
8	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
9	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
Mean	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
Std	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000

	Model	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	Voting Classifier	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000

Transformation Pipeline and Model Successfully Saved

(Pipeline(memory=Memory(location=C:\Users\TEDDY-~1\AppData\Local\Temp\joblib),
          steps=[('numerical_imputer',
                  TransformerWrapper(exclude=None,
                                     include=['Id', 'WeekofPurchase', 'StoreID',
                                              'PriceCH', 'PriceMM', 'DiscCH',
                                              'DiscMM', 'SpecialCH', 'SpecialMM',
                                              'LoyalCH', 'SalePriceMM',
                                              'SalePriceCH', 'PriceDiff',
                                              'PctDiscMM', 'PctDiscCH',
                                              'ListPriceDiff'],
                                     transformer=SimpleI...
                                                               learning_rate=0.1,
                                                               max_depth=-1,
                                                               min_child_samples=20,
                                                               min_child_weight=0.001,
                                                               min_split_gain=0.0,
                                                               n_estimators=100,
                                                               n_jobs=-1,
                                                               num_leaves=31,
                                                               objective=None,
                                                               random_state=123,
                                                               reg_alpha=0.0,
                                                               reg_lambda=0.0,
                                                               silent='warn',
                                                               subsample=1.0,
                                                               subsample_for_bin=200000,
                                                               subsample_freq=0))],
                                   flatten_transform=True, n_jobs=-1,
                                   verbose=False, voting='soft',
                                   weights=None)]],
          verbose=False),
 'my-model.pkl')

predictions[['STORE', 'Label', 'Score']]

	STORE	Label	Score
748	3	3	1.0
749	3	3	1.0
750	1	1	1.0
751	0	0	1.0
752	4	4	1.0
...	...	...	...
1065	0	0	1.0
1066	3	3	1.0
1067	2	2	1.0
1068	1	1	1.0
1069	4	4	1.0

322 rows × 3 columns