Tensorflow NanLossDuringTrainingError

Neralem · March 19, 2018, 5:50pm

Hi,

i hope you can help me out. I’m trying to understand how Deep Learning works and I’m into adapting the MNIST-Tutorial from the Tensorflow website: https://www.tensorflow.org/tutorials/layers

I want to use my own images which consists of 5000 images of dog and another 5000 of cats. 4000 of each one is for training purposes. I scaled each one to a size of 100x100 pixels. The only difference between the Tensorflow tutorial and my task is that I use images that are not grayscale, they are bigger and i want to predict only between 2 classes and not 10. What do i have to change?
If i start the training procedure, I receive a NanLossDuringTrainingError. I tried to adjust the learning rate and the batch size, but without success. I’ll paste the code of my 3 python files below. Thanks for you help

input.py

import glob
import ntpath
from enum import Enum
from random import shuffle
import cv2

class Label(Enum):
    DOG = 1
    CAT = 2


class ImageSet:
    def __init__(self, image_height, image_width, color_depth=3):
        self.__files = []
        self.__IMAGE_WIDTH = image_width
        self.__IMAGE_HEIGHT = image_height
        self.__IMAGE_COLOR_DEPTH = color_depth
        self.__label_array = []

    def read_files(self, path, label):
        files = get_filepaths(path)
        self.__label_array.clear()
        for file in files:
            print("decoding image {0}".format(file))
            self.__files.append(ImageFile(file, label, not self.get_image_color_depth() > 0))
            self.__label_array.append(label.value - 1)

    def get_label_array(self):
        return self.__label_array

    def get_files(self):
        return self.__files

    def get_image_width(self):
        return self.__IMAGE_WIDTH

    def get_image_height(self):
        return self.__IMAGE_HEIGHT

    def get_image_color_depth(self):
        return self.__IMAGE_COLOR_DEPTH

    def shuffle_set(self):
        if len(self.__files) > 0:
            shuffle(self.__files)
            self.__label_array.clear()
            for file in self.__files:
                self.__label_array.append(file.get_label().value - 1)

    def get_image_set(self):
        images = []
        for file in self.__files:
            images.append(file.get_decoded_image())
        return images


class ImageFile:
    def __init__(self, filename, label=Label(1), grayscale=False):
        self.__filename = filename
        self.__label = label
        self.__decoded_image = cv2.imread(self.__filename, 0 if grayscale else 1) #np.fromfile(self.__filename, dtype=np.int32, count = -1) #tf.image.decode_jpeg(self.__filename)

    def get_full_filename(self):
        return self.__filename

    def get_name(self):
        return get_filename(self.__filename)

    def get_label(self):
        return self.__label

    def get_decoded_image(self):
        return self.__decoded_image


def get_filename(path):
    head, tail = ntpath.split(path)
    return tail or ntpath.basename(head)


def get_filepaths(path):
    return glob.glob(path + "/*.*")

layers.py

from enum import Enum
import tensorflow as tf

class LayerType(Enum):
    CONVOLUTIONAL = 1
    INPUT = 2
    POOLING = 3
    DENSE = 4
    LOGITS = 5

"""Input Layer"""
class Input:

    def __init__(self, pixel_width, pixel_height, channels, features):
        self.__pixel_width = pixel_width
        self.__pixel_height = pixel_height
        self.__channels = channels
        self.__features = features

    def CreateLayer(self):
      input_layer = tf.reshape(self.__features["x"], [-1, self.__pixel_width, self.__pixel_height, self.__channels])

      return input_layer


"""Convolutional Layer"""
class Convolutional():

    def __init__(self, layer, layer_type = LayerType.POOLING):
        # wird nur gebraucht, wenn ein zweites pooling erforderlich ist. Beim ersten pooling ist self.__pool gleich None = Null
        self.__layer_type = LayerType.INPUT if layer_type == LayerType.INPUT else LayerType.POOLING
        self.__layer = layer

    # return convolutional layer
    def CreateLayer(self):
        if self.__layer_type == LayerType.INPUT:
            conv = tf.layers.conv2d(inputs=self.__layer, filters=16, kernel_size=[5, 5],
                                    padding="same", activation=tf.nn.relu)
        elif self.__layer_type == LayerType.POOLING:
            conv = tf.layers.conv2d(inputs=self.__layer, filters=48, kernel_size=[5, 5],
                                    padding="same", activation=tf.nn.relu)
        else:
            raise Exception("Can only pass LayerType.INPUT or LayerType.POOLING to Convolutional layer!")

        return conv


"""Pooling Layer"""
class Pooling:

    def __init__(self, conv):
        self.__conv = conv

    # return pooling layer
    def CreateLayer(self):
        pool = tf.layers.max_pooling2d(inputs=self.__conv, pool_size=[2, 2], strides=2)

        return pool


"""Dense Layer"""
# Dense Layer mit 1024 neuronen und 1024 ReLu activation function
class Dense:

    def __init__(self, pool, mode = True):
        self.__pool = pool
        self.__mode = mode

    def GetMode (self):
        return self.__mode

    def CreateLayer(self):
        # multipliziert mit 64 channels aus pooling
        pool_flat = tf.reshape(self.__pool, [-1, 25 * 25 * 48])
        dense = tf.layers.dense(inputs=pool_flat, units=1024, activation=tf.nn.relu)

        # rate=0.4: 40% der Ausgabedaten werden zufällig aus den Trainingsdaten raus gehauen
        return tf.layers.dropout(inputs=dense, rate=0.4, training=self.__mode == tf.estimator.ModeKeys.TRAIN)

"""Logits Layer"""
class Logits:
    def __init__(self, dense):
        self.dense = dense

    def CreateLayer(self):
        logits = tf.layers.dense(inputs=self.dense, units=2)

        return logits




####################################################################################################

def cnn_model_fn(features, labels, mode):
    mode = tf.estimator.ModeKeys.TRAIN
    """Model function for CNN."""
    # Input Layer
    network = Input(100, 100, 3, features)

    # Convolutional Layer #1
    network = Convolutional(network.CreateLayer(), LayerType.INPUT)  # Convultional 1

    # Pooling Layer #1
    network = Pooling(network.CreateLayer())  # Pooling 1

    # Convolutional Layer #2 and Pooling Layer #2
    network = Convolutional(network.CreateLayer())  # Convultional 2
    network = Pooling(network.CreateLayer())  # Pooling 2

    # Dense Layer
    network = Dense(network.CreateLayer())

    # Logits Layer
    network = Logits(network.CreateLayer()).CreateLayer()

    predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "classes": tf.argmax(input=network, axis=1),
      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
      # `logging_hook`.
      "probabilities": tf.nn.softmax(network, name="softmax_tensor")
    }

    labels = tf.to_int32(labels)

    if mode == tf.estimator.ModeKeys.PREDICT:
      return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=network)

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())

        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {"accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])}

    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

cnn_test.py

import input
import layers
import tensorflow as tf
import numpy as np


def main(unused_argv):
    training_set = input.ImageSet(100, 100, 3)
    training_set.read_files("/home/snsnbier/Schreibtisch/Cats_dogs/Dataset_norm_100x100/training_set/cats/", input.Label.CAT)
    training_set.read_files("/home/snsnbier/Schreibtisch/Cats_dogs/Dataset_norm_100x100/training_set/dogs/", input.Label.DOG)
    training_set.shuffle_set()

    test_set = input.ImageSet(100, 100, 3)
    test_set.read_files("/home/snsnbier/Schreibtisch/Cats_dogs/Dataset_norm_100x100/test_set/cats/", input.Label.CAT)
    test_set.read_files("/home/snsnbier/Schreibtisch/Cats_dogs/Dataset_norm_100x100/test_set/dogs/", input.Label.DOG)
    test_set.shuffle_set()

    print("{0} classes:".format(len(input.Label)))
    for label in input.Label:
        print(label)
    print()

    print("Training set -> {0} Images ({1}%)".format(
          len(training_set.get_files()),
          100 * len(training_set.get_files()) / (len(training_set.get_files()) + len(test_set.get_files()))))
    print("Test set -> {0} Images ({1}%)".format(
        len(test_set.get_files()),
          100 * len(test_set.get_files()) / (len(training_set.get_files()) + len(test_set.get_files()))))

    batch_size = 128

    train_data = np.asarray(training_set.get_image_set(), dtype=np.float32)
    train_labels = np.asarray(training_set.get_label_array(), dtype=np.float32)
    eval_data = np.asarray(test_set.get_image_set(), dtype=np.float32)
    eval_labels = np.asarray(test_set.get_label_array(), dtype=np.float32)

    estimator = tf.estimator.Estimator(model_fn=layers.cnn_model_fn, model_dir="/tmp/cnn")

    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50)

    # Train the model
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_data},
        y=train_labels,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)

    estimator.train(
        input_fn=train_input_fn,
        steps=20000,
        hooks=[logging_hook])

    # Evaluate the model and print results
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": eval_data},
        y=eval_labels,
        num_epochs=1,
        shuffle=False)
    eval_results = estimator.evaluate(input_fn=eval_input_fn)
    print(eval_results)


if __name__ == "__main__":
    tf.app.run()

Topic		Replies	Views
Tensorflow NaN loss during training: trying to reshape logits and labels	0	4269	July 31, 2018
Nan in summary histogram for: gradient	2	4456	June 15, 2017
NaNs in Tensor for model, decreasing learning rate doesn't help	0	640	May 5, 2018
Loss are NaN when using KLqp or Bayesian by Backpropagation	2	1464	May 5, 2018
Multinomial on convolutionnal model returns nans (equivalent code with Pyro)	1	948	June 6, 2018

Tensorflow NanLossDuringTrainingError

Related topics