데이터프레임(DataFrame) 커스텀 데이터셋 클래스

torchtext.legacy.data.Dataset을 확장하여 DataFrame을 바로 BucketIterator로 변환할 수 있습니다.

import urllib
import pandas as pd
from sklearn.model_selection import train_test_split

SEED = 123

# bbc-text.csv 데이터셋 다운로드
url = 'https://storage.googleapis.com/download.tensorflow.org/data/bbc-text.csv'
urllib.request.urlretrieve(url, 'bbc-text.csv')

# 데이터프레임을 로드 합니다.
df = pd.read_csv('bbc-text.csv')

# 컬럼명은 text / label 로 변경합니다
df = df.rename(columns={'category': 'label'})
df
label text
0 tech tv future in the hands of viewers with home th...
1 business worldcom boss left books alone former worldc...
2 sport tigers wary of farrell gamble leicester say ...
3 sport yeading face newcastle in fa cup premiership s...
4 entertainment ocean s twelve raids box office ocean s twelve...
... ... ...
2220 business cars pull down us retail figures us retail sal...
2221 politics kilroy unveils immigration policy ex-chatshow ...
2222 entertainment rem announce new glasgow concert us band rem h...
2223 politics how political squabbles snowball it s become c...
2224 sport souness delight at euro progress boss graeme s...

2225 rows × 2 columns

# train / validation 을 분할 합니다.
train_df, val_df = train_test_split(df, test_size=0.2, random_state=SEED)
# train DataFrame
train_df.head()
label text
1983 sport officials respond in court row australian tenn...
878 tech slow start to speedy net services faster broad...
94 politics amnesty chief laments war failure the lack of ...
1808 sport dal maso in to replace bergamasco david dal ma...
1742 tech technology gets the creative bug the hi-tech a...
# validation DataFrame
val_df.head()
label text
717 politics child access laws shake-up parents who refuse ...
798 entertainment fry set for role in hitchhiker s actor stephen...
1330 business palestinian economy in decline despite a short...
18 business japanese banking battle at an end japan s sumi...
1391 business manufacturing recovery slowing uk manufactur...
# 필요한 모듈 import
import torch
from torchtext.legacy import data
from torchtext.data.utils import get_tokenizer

# device 설정
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
print(device)
cuda:1

torchtext.legacy.data.Dataset을 상속하여 데이터프레임을 로드할 수 있습니다.

class DataFrameDataset(data.Dataset):

    def __init__(self, df, fields, is_test=False, **kwargs):
        examples = []
        for i, row in df.iterrows():
            # text, label 컬럼명은 필요시 변경하여 사용합니다
            label = row['label'] if not is_test else None
            text = row['text'] 
            examples.append(data.Example.fromlist([text, label], fields))

        super().__init__(examples, fields, **kwargs)

    @staticmethod
    def sort_key(ex):
        return len(ex.text)

    @classmethod
    def splits(cls, fields, train_df, val_df=None, test_df=None, **kwargs):
        train_data, val_data, test_data = (None, None, None)
        data_field = fields

        if train_df is not None:
            train_data = cls(train_df.copy(), data_field, **kwargs)
        if val_df is not None:
            val_data = cls(val_df.copy(), data_field, **kwargs)
        if test_df is not None:
            test_data = cls(test_df.copy(), data_field, False, **kwargs)

        return tuple(d for d in (train_data, val_data, test_data) if d is not None)
# 토크나이저 정의 (다른 토크나이저로 대체 가능)
tokenizer = get_tokenizer('basic_english')

앞선 내용과 마찬가지로 Field를 구성합니다.

TEXT = data.Field(sequential=True,    # 순서를 반영
                  tokenize=tokenizer, # tokenizer 지정
                  fix_length=120,     # 한 문장의 최대 길이 지정
                  lower=True,         # 소문자화
                  batch_first=True)   # batch 를 가장 먼저 출력


LABEL = data.Field(sequential=False)

# fiels 변수에 List(tuple(컬럼명, 변수)) 형식으로 구성 후 대입
fields = [('text', TEXT), ('label', LABEL)]
# DataFrame의 Splits로 데이터셋 분할
train_ds, val_ds = DataFrameDataset.splits(fields, train_df=train_df, val_df=val_df)
# 단어 사전 생성
TEXT.build_vocab(train_ds, 
                 max_size=1000,             # 최대 vocab_size 지정 (미지정시 전체 단어사전 개수 대입)
                 min_freq=5,                # 최소 빈도 단어수 지정
                 vectors='glove.6B.100d')   # 워드임베딩 vector 지정, None으로 지정시 vector 사용 안함

LABEL.build_vocab(train_ds)

BucketIterator를 생성합니다.

BATCH_SIZE = 32

train_iterator, test_iterator = data.BucketIterator.splits(
    (train_ds, val_ds), 
    batch_size=BATCH_SIZE,
    sort_within_batch=True,
    device=device)
# 1개 배치 추출
sample_data = next(iter(train_iterator))
# text shape 출력 (batch_size, sequence_length)
sample_data.text.shape
torch.Size([32, 120])
# label 출력 (batch)
sample_data.label
tensor([3, 3, 2, 1, 3, 3, 3, 3, 5, 3, 2, 3, 3, 2, 1, 3, 5, 3, 2, 5, 3, 4, 4, 3,
        3, 3, 1, 4, 4, 3, 3, 3], device='cuda:1')

Embedding Layer

x = sample_data.text
x.shape
torch.Size([32, 120])
# 단어 사전 개수 출력
NUM_VOCABS = len(TEXT.vocab)
print(f'Number of Vocabs: {NUM_VOCABS}')
# 개수 1000 + <unk> + <pad> : 총 1002개

EMBEDDING_DIM = 25
print(f'Embedding Dimension: {EMBEDDING_DIM}')

SEQ_LENGTH = 120
print(f'Sequence Length: {MAX_SEQ_LENGTH}')

print(f'Number of Batch Size: {BATCH_SIZE}')
Number of Vocabs: 1002
Embedding Dimension: 25
Sequence Length: 120
Number of Batch Size: 32
x.shape
torch.Size([32, 120])
import torch.nn as nn

# Number of Vocabs, Embedding Dimension as an input
embedding = nn.Embedding(num_embeddings=NUM_VOCABS, 
                         embedding_dim=EMBEDDING_DIM, 
                         padding_idx=1, 
                         device=device)
embedding
Embedding(1002, 25, padding_idx=1)
embedding_output = embedding(x)
embedding_output.shape
# batch_size, sequence_length, embedding_dim
torch.Size([32, 120, 25])

LSTM Output Shape

bidirectional=True 인 경우에는 2 * hidden_size가 output의 마지막 shape로 출력됩니다.

lstm = nn.LSTM(input_size=EMBEDDING_DIM, 
               hidden_size=64, 
               num_layers=2, 
               bidirectional=True,
               batch_first=False, 
               device=device
              )

lstm_output, (lstm_hidden, lstm_cell) = lstm(embedding_output)
lstm_output.shape
# output: sequence_length, batch_size, bidirectional(2)*hidden_size
torch.Size([32, 120, 128])

bidirectional=False 인 경우 1*hidden_size가 output의 마지막 shape로 출력됩니다.

lstm = nn.LSTM(input_size=EMBEDDING_DIM, 
               hidden_size=64, 
               num_layers=2, 
               bidirectional=False,
               batch_first=False, 
               device=device
              )

lstm_output, (lstm_hidden, lstm_cell) = lstm(embedding_output)
lstm_output.shape
# output: sequence_length, batch_size, NO bidirectional(1)*hidden_size
torch.Size([32, 120, 64])

batch_first=True로 설정하는 경우

  • 입력 텐서와 출력 텐서의 shape를 (batch, seq, feature) 형태를 가지도록 합니다. 만약 False로 설정된 경우에는 (seq, batch, feature)로 입출력이 됩니다. 일반적인 경우 batch가 첫 번째 shape에 위치하기 때문에 batch_first=True로 주로 설정합니다.
  • 하지만, hidden state, cell state에는 해당 사항이 아닙니다

batch_first=False인 경우

lstm = nn.LSTM(input_size=EMBEDDING_DIM, 
               hidden_size=64, 
               num_layers=2, 
               bidirectional=True,
               batch_first=False, 
               device=device
              )

# (32, 120, 25)
# sequence_length, batch_size, input_size

output, (hidden_state, cell_state) = lstm(embedding_output)
output.shape, hidden_state.shape, cell_state.shape
# output: sequence_length, batch_size, bidirectional(2)*hidden_size
# hidden_state: bidirectional(2)*num_layers, batch_size, hidden_size
# cell_state: bidirectional(2)*num_layers, batch_size, hidden_size
(torch.Size([32, 120, 128]),
 torch.Size([4, 120, 64]),
 torch.Size([4, 120, 64]))

batch_first=True인 경우

lstm = nn.LSTM(input_size=EMBEDDING_DIM, 
               hidden_size=64, 
               num_layers=2, 
               bidirectional=True,
               batch_first=True, 
               device=device
              )

# (32, 120, 25)
# batch_size, sequence_length, input_size

output, (hidden_state, cell_state) = lstm(embedding_output)
output.shape, hidden_state.shape, cell_state.shape
# output: batch_size, sequence_length, bidirectional(2)*hidden_size
# hidden_state: bidirectional(2)*num_layers, batch_size, hidden_size
# cell_state: bidirectional(2)*num_layers, batch_size, hidden_size
(torch.Size([32, 120, 128]), torch.Size([4, 32, 64]), torch.Size([4, 32, 64]))

정석 코딩!!

입력: embedding_output

print(f'embedding_output.shape: {embedding_output.shape}')
# batch_size, sequence_length, embedding_dim
embedding_output.shape: torch.Size([32, 120, 25])
lstm = nn.LSTM(input_size=EMBEDDING_DIM, 
               hidden_size=64, 
               num_layers=2, 
               bidirectional=True,
               batch_first=True, 
               device=device
              )
# input shape
# hidden_state_input: bidirectional(2)*num_layers, batch_size, hidden_size
# cell_state_input: bidirectional(2)*num_layers, batch_size, hidden_size
h_0 = torch.zeros(2*2, BATCH_SIZE, 64).to(device)
c_0 = torch.zeros(2*2, BATCH_SIZE, 64).to(device)

# 아래는 에러 발생의 예시
# h_0 = torch.zeros(BATCH_SIZE, 2*2, 64).to(device)
# c_0 = torch.zeros(BATCH_SIZE, 2*2, 64).to(device)

output, (hidden_state, cell_state) = lstm(embedding_output, (h_0, c_0))
output.shape, hidden_state.shape, cell_state.shape
# output: batch_size, sequence_length, bidirectional(2)*hidden_size
# hidden_state: bidirectional(2)*num_layers, batch_size, hidden_size
# cell_state: bidirectional(2)*num_layers, batch_size, hidden_size
(torch.Size([32, 120, 128]), torch.Size([4, 32, 64]), torch.Size([4, 32, 64]))

가장 마지막 Sequence의 output을 가져옵니다.

lstm_output[:, -1, :].shape
torch.Size([32, 128])

모델 생성

from tqdm import tqdm  # Progress Bar 출력
import numpy as np
import torch.nn as nn
import torch.optim as optim


class TextClassificationModel(nn.Module):
    def __init__(self, num_classes, vocab_size, embedding_dim, hidden_size, num_layers, seq_length, drop_prob=0.15):
        super(TextClassificationModel, self).__init__()
        self.num_classes = num_classes 
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.seq_length = seq_length
        
        self.embedding = nn.Embedding(num_embeddings=vocab_size, 
                                      embedding_dim=embedding_dim)
        
        self.lstm = nn.LSTM(input_size=embedding_dim, 
                            hidden_size=hidden_size, 
                            num_layers=num_layers, 
                            batch_first=True,
                            bidirectional=True,
                           )
        
        self.dropout = nn.Dropout(drop_prob)
        
        self.relu = nn.ReLU()
        
        self.fc = nn.Linear(hidden_size*2, hidden_size)
        self.output = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x, hidden_and_cell):
        x = self.embedding(x)
        output, (h, c) = self.lstm(x, hidden_and_cell)
        h = output[:, -1, :]
        o = self.dropout(h)
        o = self.relu(self.fc(o))
        o = self.dropout(o)
        return self.output(o)
config = {
    'num_classes': 5, 
    'vocab_size': NUM_VOCABS,
    'embedding_dim': 30, 
    'hidden_size': 64, 
    'num_layers': 2, 
    'seq_length': 120, 
}

model = TextClassificationModel(**config)
model.to(device)
TextClassificationModel(
  (embedding): Embedding(1002, 30)
  (lstm): LSTM(30, 64, num_layers=2, batch_first=True, bidirectional=True)
  (dropout): Dropout(p=0.15, inplace=False)
  (relu): ReLU()
  (fc): Linear(in_features=128, out_features=64, bias=True)
  (output): Linear(in_features=64, out_features=5, bias=True)
)
# loss 정의: CrossEntropyLoss
loss_fn = nn.CrossEntropyLoss()

# 옵티마이저 정의: bert.paramters()와 learning_rate 설정
optimizer = optim.Adam(model.parameters(), lr=1e-3)
def model_train(model, data_loader, loss_fn, optimizer, config, device):
    # 모델을 훈련모드로 설정합니다. training mode 일 때 Gradient 가 업데이트 됩니다. 반드시 train()으로 모드 변경을 해야 합니다.
    model.train()
    
    # loss와 accuracy 계산을 위한 임시 변수 입니다. 0으로 초기화합니다.
    running_loss = 0
    corr = 0
    counts = 0
    total_counts = 0
    
    # 예쁘게 Progress Bar를 출력하면서 훈련 상태를 모니터링 하기 위하여 tqdm으로 래핑합니다.
    prograss_bar = tqdm(data_loader, unit='batch', total=len(data_loader), mininterval=1)
    
    # mini-batch 학습을 시작합니다.
    for idx, data in enumerate(prograss_bar):
        # text, label 데이터를 device 에 올립니다. (cuda:0 혹은 cpu)
        text = data.text.to(device)
        label = data.label.to(device)
        label.sub_(1)
        
        # 누적 Gradient를 초기화 합니다.
        optimizer.zero_grad()
        
        initial_hidden = torch.zeros(2*config['num_layers'], len(text), config['hidden_size']).to(device)
        initial_cell = torch.zeros(2*config['num_layers'], len(text), config['hidden_size']).to(device)

        # Forward Propagation을 진행하여 결과를 얻습니다.
        output = model(text, (initial_hidden, initial_cell))
        
        # 손실함수에 output, label 값을 대입하여 손실을 계산합니다.
        loss = loss_fn(output, label)
        
        # 오차역전파(Back Propagation)을 진행하여 미분 값을 계산합니다.
        loss.backward()
        
        # 계산된 Gradient를 업데이트 합니다.
        optimizer.step()
        
        # output의 max(dim=1)은 max probability와 max index를 반환합니다.
        # max probability는 무시하고, max index는 pred에 저장하여 label 값과 대조하여 정확도를 도출합니다.
        _, pred = output.max(dim=1)
        
        # pred.eq(lbl).sum() 은 정확히 맞춘 label의 합계를 계산합니다. item()은 tensor에서 값을 추출합니다.
        # 합계는 corr 변수에 누적합니다.
        corr += pred.eq(label).sum().item()
        counts += label.size(0)
        
        # loss 값은 1개 배치의 평균 손실(loss) 입니다. img.size(0)은 배치사이즈(batch size) 입니다.
        # loss 와 img.size(0)를 곱하면 1개 배치의 전체 loss가 계산됩니다.
        # 이를 누적한 뒤 Epoch 종료시 전체 데이터셋의 개수로 나누어 평균 loss를 산출합니다.
        running_loss += (loss.item() * label.size(0))
        
        total_counts += label.size(0)
        
        # 프로그레스바에 학습 상황 업데이트
        prograss_bar.set_description(f"training loss: {running_loss/total_counts:.5f}, training accuracy: {corr / counts:.5f}")
        
    # 누적된 정답수를 전체 개수로 나누어 주면 정확도가 산출됩니다.
    acc = corr / total_counts
    
    # 평균 손실(loss)과 정확도를 반환합니다.
    # train_loss, train_acc
    return running_loss / total_counts, acc
def model_evaluate(model, data_loader, loss_fn, config, device):
    # model.eval()은 모델을 평가모드로 설정을 바꾸어 줍니다. 
    # dropout과 같은 layer의 역할 변경을 위하여 evaluation 진행시 꼭 필요한 절차 입니다.
    model.eval()
    
    # Gradient가 업데이트 되는 것을 방지 하기 위하여 반드시 필요합니다.
    with torch.no_grad():
        # loss와 accuracy 계산을 위한 임시 변수 입니다. 0으로 초기화합니다.
        corr = 0
        running_loss = 0
        total_counts = 0
        
        # 배치별 evaluation을 진행합니다.
        for data in data_loader:
            # text, label 데이터를 device 에 올립니다. (cuda:0 혹은 cpu)
            text = data.text.to(device)
            label = data.label.to(device)
            label.data.sub_(1)
            
            initial_hidden = torch.zeros(2*config['num_layers'], len(text), config['hidden_size']).to(device)
            initial_cell = torch.zeros(2*config['num_layers'], len(text), config['hidden_size']).to(device)
            
            # 모델에 Forward Propagation을 하여 결과를 도출합니다.
            output = model(text, (initial_hidden, initial_cell))
            
            # output의 max(dim=1)은 max probability와 max index를 반환합니다.
            # max probability는 무시하고, max index는 pred에 저장하여 label 값과 대조하여 정확도를 도출합니다.
            _, pred = output.max(dim=1)
            
            # pred.eq(lbl).sum() 은 정확히 맞춘 label의 합계를 계산합니다. item()은 tensor에서 값을 추출합니다.
            # 합계는 corr 변수에 누적합니다.
            corr += torch.sum(pred.eq(label)).item()
            
            # loss 값은 1개 배치의 평균 손실(loss) 입니다. img.size(0)은 배치사이즈(batch size) 입니다.
            # loss 와 img.size(0)를 곱하면 1개 배치의 전체 loss가 계산됩니다.
            # 이를 누적한 뒤 Epoch 종료시 전체 데이터셋의 개수로 나누어 평균 loss를 산출합니다.
            running_loss += loss_fn(output, label).item() * label.size(0)
            
            total_counts += label.size(0)
        
        # validation 정확도를 계산합니다.
        # 누적한 정답숫자를 전체 데이터셋의 숫자로 나누어 최종 accuracy를 산출합니다.
        acc = corr / total_counts
        
        # 결과를 반환합니다.
        # val_loss, val_acc
        return running_loss / total_counts, acc
# 최대 Epoch을 지정합니다.
num_epochs = 50

# checkpoint로 저장할 모델의 이름을 정의 합니다.
model_name = 'LSTM-Text-Classification'

min_loss = np.inf

# Epoch 별 훈련 및 검증을 수행합니다.
for epoch in range(num_epochs):
    # Model Training
    # 훈련 손실과 정확도를 반환 받습니다.
    train_loss, train_acc = model_train(model, train_iterator, loss_fn, optimizer, config, device)

    # 검증 손실과 검증 정확도를 반환 받습니다.
    val_loss, val_acc = model_evaluate(model, test_iterator, loss_fn, config, device)   
    
    # val_loss 가 개선되었다면 min_loss를 갱신하고 model의 가중치(weights)를 저장합니다.
    if val_loss < min_loss:
        print(f'[INFO] val_loss has been improved from {min_loss:.5f} to {val_loss:.5f}. Saving Model!')
        min_loss = val_loss
        torch.save(model.state_dict(), f'{model_name}.pth')
    
    # Epoch 별 결과를 출력합니다.
    print(f'epoch {epoch+1:02d}, loss: {train_loss:.5f}, acc: {train_acc:.5f}, val_loss: {val_loss:.5f}, val_accuracy: {val_acc:.5f}')
training loss: 1.61319, training accuracy: 0.18371: 100% 56/56 [00:01<00:00, 50.33batch/s]
[INFO] val_loss has been improved from inf to 1.60184. Saving Model!
epoch 01, loss: 1.61319, acc: 0.18371, val_loss: 1.60184, val_accuracy: 0.24270
training loss: 1.60056, training accuracy: 0.25056: 100% 56/56 [00:01<00:00, 50.71batch/s]
[INFO] val_loss has been improved from 1.60184 to 1.58833. Saving Model!
epoch 02, loss: 1.60056, acc: 0.25056, val_loss: 1.58833, val_accuracy: 0.25393
training loss: 1.56228, training accuracy: 0.30618: 100% 56/56 [00:01<00:00, 51.45batch/s]
[INFO] val_loss has been improved from 1.58833 to 1.56553. Saving Model!
epoch 03, loss: 1.56228, acc: 0.30618, val_loss: 1.56553, val_accuracy: 0.30112
training loss: 1.48211, training accuracy: 0.35506: 100% 56/56 [00:01<00:00, 51.08batch/s]
[INFO] val_loss has been improved from 1.56553 to 1.46125. Saving Model!
epoch 04, loss: 1.48211, acc: 0.35506, val_loss: 1.46125, val_accuracy: 0.39551
training loss: 1.36006, training accuracy: 0.40506: 100% 56/56 [00:01<00:00, 51.70batch/s]
[INFO] val_loss has been improved from 1.46125 to 1.35223. Saving Model!
epoch 05, loss: 1.36006, acc: 0.40506, val_loss: 1.35223, val_accuracy: 0.42022
training loss: 1.24720, training accuracy: 0.46629: 100% 56/56 [00:01<00:00, 51.67batch/s]
[INFO] val_loss has been improved from 1.35223 to 1.34175. Saving Model!
epoch 06, loss: 1.24720, acc: 0.46629, val_loss: 1.34175, val_accuracy: 0.41798
training loss: 1.17133, training accuracy: 0.50393: 100% 56/56 [00:01<00:00, 51.09batch/s]
[INFO] val_loss has been improved from 1.34175 to 1.24888. Saving Model!
epoch 07, loss: 1.17133, acc: 0.50393, val_loss: 1.24888, val_accuracy: 0.48539
training loss: 1.06987, training accuracy: 0.53539: 100% 56/56 [00:01<00:00, 50.98batch/s]
[INFO] val_loss has been improved from 1.24888 to 1.23465. Saving Model!
epoch 08, loss: 1.06987, acc: 0.53539, val_loss: 1.23465, val_accuracy: 0.48315
training loss: 1.03832, training accuracy: 0.55730: 100% 56/56 [00:01<00:00, 50.75batch/s]
epoch 09, loss: 1.03832, acc: 0.55730, val_loss: 1.44860, val_accuracy: 0.37079
training loss: 0.97365, training accuracy: 0.60056: 100% 56/56 [00:01<00:00, 51.32batch/s]
[INFO] val_loss has been improved from 1.23465 to 1.17362. Saving Model!
epoch 10, loss: 0.97365, acc: 0.60056, val_loss: 1.17362, val_accuracy: 0.52809
training loss: 0.96916, training accuracy: 0.60169: 100% 56/56 [00:01<00:00, 51.38batch/s]
epoch 11, loss: 0.96916, acc: 0.60169, val_loss: 1.26211, val_accuracy: 0.51236
training loss: 0.85329, training accuracy: 0.64719: 100% 56/56 [00:01<00:00, 50.25batch/s]
epoch 12, loss: 0.85329, acc: 0.64719, val_loss: 1.20658, val_accuracy: 0.54157
training loss: 0.79733, training accuracy: 0.67416: 100% 56/56 [00:01<00:00, 50.07batch/s]
epoch 13, loss: 0.79733, acc: 0.67416, val_loss: 1.20200, val_accuracy: 0.55281
training loss: 0.73991, training accuracy: 0.71180: 100% 56/56 [00:01<00:00, 50.20batch/s]
[INFO] val_loss has been improved from 1.17362 to 1.11846. Saving Model!
epoch 14, loss: 0.73991, acc: 0.71180, val_loss: 1.11846, val_accuracy: 0.58876
training loss: 0.69715, training accuracy: 0.74438: 100% 56/56 [00:01<00:00, 50.19batch/s]
epoch 15, loss: 0.69715, acc: 0.74438, val_loss: 1.24671, val_accuracy: 0.53483
training loss: 0.72440, training accuracy: 0.72303: 100% 56/56 [00:01<00:00, 49.97batch/s]
epoch 16, loss: 0.72440, acc: 0.72303, val_loss: 1.56377, val_accuracy: 0.50112
training loss: 0.72304, training accuracy: 0.72640: 100% 56/56 [00:01<00:00, 50.78batch/s]
epoch 17, loss: 0.72304, acc: 0.72640, val_loss: 1.13378, val_accuracy: 0.62022
training loss: 0.54999, training accuracy: 0.79607: 100% 56/56 [00:01<00:00, 50.09batch/s]
[INFO] val_loss has been improved from 1.11846 to 1.10839. Saving Model!
epoch 18, loss: 0.54999, acc: 0.79607, val_loss: 1.10839, val_accuracy: 0.62697
training loss: 0.68430, training accuracy: 0.77247: 100% 56/56 [00:01<00:00, 50.52batch/s]
epoch 19, loss: 0.68430, acc: 0.77247, val_loss: 1.69284, val_accuracy: 0.42697
training loss: 0.99228, training accuracy: 0.59494: 100% 56/56 [00:01<00:00, 50.39batch/s]
[INFO] val_loss has been improved from 1.10839 to 1.03827. Saving Model!
epoch 20, loss: 0.99228, acc: 0.59494, val_loss: 1.03827, val_accuracy: 0.58202
training loss: 0.62338, training accuracy: 0.77416: 100% 56/56 [00:01<00:00, 51.12batch/s]
[INFO] val_loss has been improved from 1.03827 to 1.01389. Saving Model!
epoch 21, loss: 0.62338, acc: 0.77416, val_loss: 1.01389, val_accuracy: 0.63820
training loss: 0.55111, training accuracy: 0.79382: 100% 56/56 [00:01<00:00, 50.02batch/s]
epoch 22, loss: 0.55111, acc: 0.79382, val_loss: 1.15105, val_accuracy: 0.63146
training loss: 0.48066, training accuracy: 0.83202: 100% 56/56 [00:01<00:00, 50.34batch/s]
epoch 23, loss: 0.48066, acc: 0.83202, val_loss: 1.05983, val_accuracy: 0.68989
training loss: 0.41633, training accuracy: 0.85955: 100% 56/56 [00:01<00:00, 50.61batch/s]
[INFO] val_loss has been improved from 1.01389 to 0.96205. Saving Model!
epoch 24, loss: 0.41633, acc: 0.85955, val_loss: 0.96205, val_accuracy: 0.71236
training loss: 0.34057, training accuracy: 0.88427: 100% 56/56 [00:01<00:00, 49.97batch/s]
epoch 25, loss: 0.34057, acc: 0.88427, val_loss: 1.00524, val_accuracy: 0.71461
training loss: 0.35567, training accuracy: 0.88034: 100% 56/56 [00:01<00:00, 50.45batch/s]
epoch 26, loss: 0.35567, acc: 0.88034, val_loss: 1.01842, val_accuracy: 0.72809
training loss: 0.29582, training accuracy: 0.91067: 100% 56/56 [00:01<00:00, 50.79batch/s]
epoch 27, loss: 0.29582, acc: 0.91067, val_loss: 0.98013, val_accuracy: 0.72809
training loss: 0.26330, training accuracy: 0.91742: 100% 56/56 [00:01<00:00, 51.33batch/s]
epoch 28, loss: 0.26330, acc: 0.91742, val_loss: 1.09240, val_accuracy: 0.72584
training loss: 0.23466, training accuracy: 0.93539: 100% 56/56 [00:01<00:00, 51.02batch/s]
epoch 29, loss: 0.23466, acc: 0.93539, val_loss: 1.10061, val_accuracy: 0.72360
training loss: 0.21358, training accuracy: 0.93427: 100% 56/56 [00:01<00:00, 50.20batch/s]
epoch 30, loss: 0.21358, acc: 0.93427, val_loss: 1.05212, val_accuracy: 0.74831
training loss: 0.22328, training accuracy: 0.92584: 100% 56/56 [00:01<00:00, 51.47batch/s]
epoch 31, loss: 0.22328, acc: 0.92584, val_loss: 1.16936, val_accuracy: 0.71236
training loss: 0.18148, training accuracy: 0.94494: 100% 56/56 [00:01<00:00, 49.86batch/s]
epoch 32, loss: 0.18148, acc: 0.94494, val_loss: 1.06799, val_accuracy: 0.73034
training loss: 0.13620, training accuracy: 0.95955: 100% 56/56 [00:01<00:00, 50.26batch/s]
epoch 33, loss: 0.13620, acc: 0.95955, val_loss: 1.20454, val_accuracy: 0.74831
training loss: 0.11609, training accuracy: 0.96348: 100% 56/56 [00:01<00:00, 50.50batch/s]
epoch 34, loss: 0.11609, acc: 0.96348, val_loss: 1.21092, val_accuracy: 0.72809
training loss: 0.23600, training accuracy: 0.91910: 100% 56/56 [00:01<00:00, 50.67batch/s]
epoch 35, loss: 0.23600, acc: 0.91910, val_loss: 1.13882, val_accuracy: 0.69663
training loss: 0.15419, training accuracy: 0.95506: 100% 56/56 [00:01<00:00, 51.74batch/s]
epoch 36, loss: 0.15419, acc: 0.95506, val_loss: 1.05352, val_accuracy: 0.75955
training loss: 0.10157, training accuracy: 0.96910: 100% 56/56 [00:01<00:00, 51.81batch/s]
epoch 37, loss: 0.10157, acc: 0.96910, val_loss: 1.28235, val_accuracy: 0.73034
training loss: 0.07899, training accuracy: 0.97865: 100% 56/56 [00:01<00:00, 50.07batch/s]
epoch 38, loss: 0.07899, acc: 0.97865, val_loss: 1.27113, val_accuracy: 0.73933
training loss: 0.06964, training accuracy: 0.98034: 100% 56/56 [00:01<00:00, 50.03batch/s]
epoch 39, loss: 0.06964, acc: 0.98034, val_loss: 1.31469, val_accuracy: 0.75056
training loss: 0.11386, training accuracy: 0.96011: 100% 56/56 [00:01<00:00, 50.54batch/s]
epoch 40, loss: 0.11386, acc: 0.96011, val_loss: 1.29488, val_accuracy: 0.75056
training loss: 0.07366, training accuracy: 0.98034: 100% 56/56 [00:01<00:00, 50.50batch/s]
epoch 41, loss: 0.07366, acc: 0.98034, val_loss: 1.29582, val_accuracy: 0.74831
training loss: 0.04986, training accuracy: 0.98876: 100% 56/56 [00:01<00:00, 50.77batch/s]
epoch 42, loss: 0.04986, acc: 0.98876, val_loss: 1.31590, val_accuracy: 0.75506
training loss: 0.04120, training accuracy: 0.98989: 100% 56/56 [00:01<00:00, 50.43batch/s]
epoch 43, loss: 0.04120, acc: 0.98989, val_loss: 1.37592, val_accuracy: 0.74382
training loss: 0.04997, training accuracy: 0.98708: 100% 56/56 [00:01<00:00, 50.23batch/s]
epoch 44, loss: 0.04997, acc: 0.98708, val_loss: 1.37052, val_accuracy: 0.74157
training loss: 0.03830, training accuracy: 0.98820: 100% 56/56 [00:01<00:00, 50.11batch/s]
epoch 45, loss: 0.03830, acc: 0.98820, val_loss: 1.41376, val_accuracy: 0.76404
training loss: 0.15161, training accuracy: 0.95506: 100% 56/56 [00:01<00:00, 51.33batch/s]
epoch 46, loss: 0.15161, acc: 0.95506, val_loss: 1.40687, val_accuracy: 0.73258
training loss: 0.12108, training accuracy: 0.96517: 100% 56/56 [00:01<00:00, 50.77batch/s]
epoch 47, loss: 0.12108, acc: 0.96517, val_loss: 1.44284, val_accuracy: 0.72809
training loss: 0.10142, training accuracy: 0.97360: 100% 56/56 [00:01<00:00, 50.15batch/s]
epoch 48, loss: 0.10142, acc: 0.97360, val_loss: 1.38564, val_accuracy: 0.72360
training loss: 0.26720, training accuracy: 0.93315: 100% 56/56 [00:01<00:00, 50.09batch/s]
epoch 49, loss: 0.26720, acc: 0.93315, val_loss: 1.26490, val_accuracy: 0.69888
training loss: 0.15398, training accuracy: 0.95225: 100% 56/56 [00:01<00:00, 50.28batch/s]
epoch 50, loss: 0.15398, acc: 0.95225, val_loss: 1.19034, val_accuracy: 0.72360