Hi,
i hope you can help me out. I’m trying to understand how Deep Learning works and I’m into adapting the MNIST-Tutorial from the Tensorflow website: https://www.tensorflow.org/tutorials/layers
I want to use my own images which consists of 5000 images of dog and another 5000 of cats. 4000 of each one is for training purposes. I scaled each one to a size of 100x100 pixels. The only difference between the Tensorflow tutorial and my task is that I use images that are not grayscale, they are bigger and i want to predict only between 2 classes and not 10. What do i have to change?
If i start the training procedure, I receive a NanLossDuringTrainingError. I tried to adjust the learning rate and the batch size, but without success. I’ll paste the code of my 3 python files below. Thanks for you help
input.py
import glob
import ntpath
from enum import Enum
from random import shuffle
import cv2
class Label(Enum):
DOG = 1
CAT = 2
class ImageSet:
def __init__(self, image_height, image_width, color_depth=3):
self.__files = []
self.__IMAGE_WIDTH = image_width
self.__IMAGE_HEIGHT = image_height
self.__IMAGE_COLOR_DEPTH = color_depth
self.__label_array = []
def read_files(self, path, label):
files = get_filepaths(path)
self.__label_array.clear()
for file in files:
print("decoding image {0}".format(file))
self.__files.append(ImageFile(file, label, not self.get_image_color_depth() > 0))
self.__label_array.append(label.value - 1)
def get_label_array(self):
return self.__label_array
def get_files(self):
return self.__files
def get_image_width(self):
return self.__IMAGE_WIDTH
def get_image_height(self):
return self.__IMAGE_HEIGHT
def get_image_color_depth(self):
return self.__IMAGE_COLOR_DEPTH
def shuffle_set(self):
if len(self.__files) > 0:
shuffle(self.__files)
self.__label_array.clear()
for file in self.__files:
self.__label_array.append(file.get_label().value - 1)
def get_image_set(self):
images = []
for file in self.__files:
images.append(file.get_decoded_image())
return images
class ImageFile:
def __init__(self, filename, label=Label(1), grayscale=False):
self.__filename = filename
self.__label = label
self.__decoded_image = cv2.imread(self.__filename, 0 if grayscale else 1) #np.fromfile(self.__filename, dtype=np.int32, count = -1) #tf.image.decode_jpeg(self.__filename)
def get_full_filename(self):
return self.__filename
def get_name(self):
return get_filename(self.__filename)
def get_label(self):
return self.__label
def get_decoded_image(self):
return self.__decoded_image
def get_filename(path):
head, tail = ntpath.split(path)
return tail or ntpath.basename(head)
def get_filepaths(path):
return glob.glob(path + "/*.*")
layers.py
from enum import Enum
import tensorflow as tf
class LayerType(Enum):
CONVOLUTIONAL = 1
INPUT = 2
POOLING = 3
DENSE = 4
LOGITS = 5
"""Input Layer"""
class Input:
def __init__(self, pixel_width, pixel_height, channels, features):
self.__pixel_width = pixel_width
self.__pixel_height = pixel_height
self.__channels = channels
self.__features = features
def CreateLayer(self):
input_layer = tf.reshape(self.__features["x"], [-1, self.__pixel_width, self.__pixel_height, self.__channels])
return input_layer
"""Convolutional Layer"""
class Convolutional():
def __init__(self, layer, layer_type = LayerType.POOLING):
# wird nur gebraucht, wenn ein zweites pooling erforderlich ist. Beim ersten pooling ist self.__pool gleich None = Null
self.__layer_type = LayerType.INPUT if layer_type == LayerType.INPUT else LayerType.POOLING
self.__layer = layer
# return convolutional layer
def CreateLayer(self):
if self.__layer_type == LayerType.INPUT:
conv = tf.layers.conv2d(inputs=self.__layer, filters=16, kernel_size=[5, 5],
padding="same", activation=tf.nn.relu)
elif self.__layer_type == LayerType.POOLING:
conv = tf.layers.conv2d(inputs=self.__layer, filters=48, kernel_size=[5, 5],
padding="same", activation=tf.nn.relu)
else:
raise Exception("Can only pass LayerType.INPUT or LayerType.POOLING to Convolutional layer!")
return conv
"""Pooling Layer"""
class Pooling:
def __init__(self, conv):
self.__conv = conv
# return pooling layer
def CreateLayer(self):
pool = tf.layers.max_pooling2d(inputs=self.__conv, pool_size=[2, 2], strides=2)
return pool
"""Dense Layer"""
# Dense Layer mit 1024 neuronen und 1024 ReLu activation function
class Dense:
def __init__(self, pool, mode = True):
self.__pool = pool
self.__mode = mode
def GetMode (self):
return self.__mode
def CreateLayer(self):
# multipliziert mit 64 channels aus pooling
pool_flat = tf.reshape(self.__pool, [-1, 25 * 25 * 48])
dense = tf.layers.dense(inputs=pool_flat, units=1024, activation=tf.nn.relu)
# rate=0.4: 40% der Ausgabedaten werden zufällig aus den Trainingsdaten raus gehauen
return tf.layers.dropout(inputs=dense, rate=0.4, training=self.__mode == tf.estimator.ModeKeys.TRAIN)
"""Logits Layer"""
class Logits:
def __init__(self, dense):
self.dense = dense
def CreateLayer(self):
logits = tf.layers.dense(inputs=self.dense, units=2)
return logits
####################################################################################################
def cnn_model_fn(features, labels, mode):
mode = tf.estimator.ModeKeys.TRAIN
"""Model function for CNN."""
# Input Layer
network = Input(100, 100, 3, features)
# Convolutional Layer #1
network = Convolutional(network.CreateLayer(), LayerType.INPUT) # Convultional 1
# Pooling Layer #1
network = Pooling(network.CreateLayer()) # Pooling 1
# Convolutional Layer #2 and Pooling Layer #2
network = Convolutional(network.CreateLayer()) # Convultional 2
network = Pooling(network.CreateLayer()) # Pooling 2
# Dense Layer
network = Dense(network.CreateLayer())
# Logits Layer
network = Logits(network.CreateLayer()).CreateLayer()
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=network, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(network, name="softmax_tensor")
}
labels = tf.to_int32(labels)
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=network)
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode)
eval_metric_ops = {"accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
cnn_test.py
import input
import layers
import tensorflow as tf
import numpy as np
def main(unused_argv):
training_set = input.ImageSet(100, 100, 3)
training_set.read_files("/home/snsnbier/Schreibtisch/Cats_dogs/Dataset_norm_100x100/training_set/cats/", input.Label.CAT)
training_set.read_files("/home/snsnbier/Schreibtisch/Cats_dogs/Dataset_norm_100x100/training_set/dogs/", input.Label.DOG)
training_set.shuffle_set()
test_set = input.ImageSet(100, 100, 3)
test_set.read_files("/home/snsnbier/Schreibtisch/Cats_dogs/Dataset_norm_100x100/test_set/cats/", input.Label.CAT)
test_set.read_files("/home/snsnbier/Schreibtisch/Cats_dogs/Dataset_norm_100x100/test_set/dogs/", input.Label.DOG)
test_set.shuffle_set()
print("{0} classes:".format(len(input.Label)))
for label in input.Label:
print(label)
print()
print("Training set -> {0} Images ({1}%)".format(
len(training_set.get_files()),
100 * len(training_set.get_files()) / (len(training_set.get_files()) + len(test_set.get_files()))))
print("Test set -> {0} Images ({1}%)".format(
len(test_set.get_files()),
100 * len(test_set.get_files()) / (len(training_set.get_files()) + len(test_set.get_files()))))
batch_size = 128
train_data = np.asarray(training_set.get_image_set(), dtype=np.float32)
train_labels = np.asarray(training_set.get_label_array(), dtype=np.float32)
eval_data = np.asarray(test_set.get_image_set(), dtype=np.float32)
eval_labels = np.asarray(test_set.get_label_array(), dtype=np.float32)
estimator = tf.estimator.Estimator(model_fn=layers.cnn_model_fn, model_dir="/tmp/cnn")
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50)
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": train_data},
y=train_labels,
batch_size=batch_size,
num_epochs=None,
shuffle=True)
estimator.train(
input_fn=train_input_fn,
steps=20000,
hooks=[logging_hook])
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": eval_data},
y=eval_labels,
num_epochs=1,
shuffle=False)
eval_results = estimator.evaluate(input_fn=eval_input_fn)
print(eval_results)
if __name__ == "__main__":
tf.app.run()