Chapter 7: 현미경 영상을 위한 딥러닝

이 장에서는 현미경으로 촬영한 이미지를 분석하는 데 사용되는 딥러닝 기술, 특히 세포 계수(Cell Counting)와 세포 분할(Segmentation)에 대해 다룹니다.

세포 계수 (Cell Counting)

현미경 이미지 내의 세포 수를 세는 것은 생물학에서 기본적인 작업입니다. 본 예제에서는 딥러닝 기술을 활용하여 이 과정을 자동화합니다.

import deepchem as dc
import tensorflow as tf
import tensorflow.keras.layers as layers
import numpy as np
import os
import re

RETRAIN = False

# Load the datasets.
image_dir = "BBBC005_v1_images"
files = []
labels = []
for f in os.listdir(image_dir):
    if f.endswith(".TIF"):
        files.append(os.path.join(image_dir, f))
        labels.append(int(re.findall("_C(.*?)_", f)[0]))
dataset = dc.data.ImageDataset(files, np.array(labels))
splitter = dc.splits.RandomSplitter()
train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(
    dataset, seed=123
)

# Create the model.
features = tf.keras.Input(shape=(520, 696, 1))
prev_layer = features
for num_outputs in [16, 32, 64, 128, 256]:
    prev_layer = layers.Conv2D(
        num_outputs, kernel_size=5, strides=2, activation=tf.nn.relu
    )(prev_layer)
output = layers.Dense(1)(layers.Flatten()(prev_layer))
keras_model = tf.keras.Model(inputs=features, outputs=output)
learning_rate = dc.models.optimizers.ExponentialDecay(0.001, 0.9, 250)
model = dc.models.KerasModel(
    keras_model,
    loss=dc.models.losses.L2Loss(),
    learning_rate=learning_rate,
    model_dir="models/model",
)

if not os.path.exists("./models"):
    os.mkdir("models")
if not os.path.exists("./models/model"):
    os.mkdir("models/model")

if not RETRAIN:
    model.restore()

# Train it and evaluate performance on the test set.
if RETRAIN:
    print("About to fit model for 50 epochs")
    model.fit(train_dataset, nb_epoch=50)
y_pred = model.predict(test_dataset).flatten()
print(np.sqrt(np.mean((y_pred - test_dataset.y) ** 2)))

세포 분할 (Cell Segmentation)

세포 분할은 주어진 이미지 내의 각 셀 경계를 식별하고 추출하는 과정을 의미합니다.

import deepchem as dc
import tensorflow as tf
import tensorflow.keras.layers as layers
import numpy as np
import os

# import re

RETRAIN = False

# Load the datasets.
image_dir = "BBBC005_v1_images"
label_dir = "BBBC005_v1_ground_truth"
rows = ("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P")
blurs = (1, 4, 7, 10, 14, 17, 20, 23, 26, 29, 32, 35, 39, 42, 45, 48)
files = []
labels = []

for f in os.listdir(label_dir):
    if f.endswith(".TIF"):
        for row, blur in zip(rows, blurs):
            fname = f.replace("_F1", "_F%d" % blur).replace("_A", "_%s" % row)
            files.append(os.path.join(image_dir, fname))
            labels.append(os.path.join(label_dir, f))

dataset = dc.data.ImageDataset(files, labels)
splitter = dc.splits.RandomSplitter()
train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(
    dataset, seed=123
)

# Create the model.
features = tf.keras.Input(shape=(520, 696, 1))

# Downsample three times.
conv1 = layers.Conv2D(
    16,
    kernel_size=5,
    strides=2,
    activation=tf.nn.relu,
    padding="same",
)(features / 255.0)
conv2 = layers.Conv2D(
    32,
    kernel_size=5,
    strides=2,
    activation=tf.nn.relu,
    padding="same",
)(conv1)
conv3 = layers.Conv2D(
    64,
    kernel_size=5,
    strides=2,
    activation=tf.nn.relu,
    padding="same",
)(conv2)
# Do a 1x1 convolution.
conv4 = layers.Conv2D(
    64,
    kernel_size=1,
    strides=1,
)(conv3)

# Upsample three times.
concat1 = layers.Concatenate(axis=3)([conv3, conv4])
deconv1 = layers.Conv2DTranspose(
    32,
    kernel_size=5,
    strides=2,
    activation=tf.nn.relu,
    padding="same",
)(concat1)
concat2 = layers.Concatenate(axis=3)([conv2, deconv1])
deconv2 = layers.Conv2DTranspose(
    16,
    kernel_size=5,
    strides=2,
    activation=tf.nn.relu,
    padding="same",
)(concat2)
concat3 = layers.Concatenate(axis=3)([conv1, deconv2])
deconv3 = layers.Conv2DTranspose(
    1,
    kernel_size=5,
    strides=2,
    activation=tf.nn.relu,
    padding="same",
)(concat3)

# Compute the final output.
concat4 = layers.Concatenate(axis=3)([features, deconv3])
logits = layers.Conv2D(1, kernel_size=5, strides=1, padding="same")(concat4)
output = layers.Activation(tf.math.sigmoid)(logits)
keras_model = tf.keras.Model(inputs=features, outputs=[output, logits])
learning_rate = dc.models.optimizers.ExponentialDecay(0.01, 0.9, 250)
model = dc.models.KerasModel(
    keras_model,
    loss=dc.models.losses.SigmoidCrossEntropy(),
    output_types=["prediction", "loss"],
    learning_rate=learning_rate,
    model_dir="models/segmentation",
)

if not os.path.exists("./models"):
    os.mkdir("models")
if not os.path.exists("./models/segmentation"):
    os.mkdir("models/segmentation")

if not RETRAIN:
    model.restore()

# Train it and evaluate performance on the test set.
if RETRAIN:
    print("About to fit model for 50 epochs")
    model.fit(train_dataset, nb_epoch=50, checkpoint_interval=100)
scores = []
for x, y, w, id in test_dataset.itersamples():
    y_pred = model.predict_on_batch([x]).squeeze()
    scores.append(np.mean((y > 0) == (y_pred > 0.5)))
print(np.mean(scores))