python – Tried to update a GAN made for graphs to use images, what am I doing wrong?

I’ve been trying to make a GAN in Python based on an article I read a couple of years ago. In the article they made a GAN that was 2D (from: https://blog.paperspace.com/implementing-gans-in-tensorflow/). For the last year or so I’ve been working on getting it to work for 3D and have not been able to do so. I’m currently feeding the same image to the discriminator as correct in the assumption it will try to exactly copy that image given time. But it only makes images of solid color after a while (first it’s all random noise).

I’m sure I’m not getting something, but I can’t figure it out. I feel like to problem is with the loss functions and have tried to change them but that has made it better.

The input is as follows:

iterations is how many times it should train itself,
prints after how many training sessions it should save a file of the result to the system,
output after how many training sessions it should output the generator and discriminator loss,
image is the file currently in use, in the future this will become a folder with images I want the generator to be able to imitate, but just to see if it works it’s now always the same file.

The file I’m currently feeding it is; its a square image with half of it black and the other half white

I condensed the code to two files for this post –

main.py:

import gc
import ganimage

if __name__ == '__main__':
    gc.collect()
    ganimage.run_image_gan(iterations=1000000000, prints=100, output=10, image="D:/myname/Pictures/testfile5.png")

ganimage.py:

import tensorflow as tf
import trainingData
import os
import datetime
import time
from PIL import Image as pil
import numpy as np

maxvalues = 255
grey_conversion = [.3, .6, .1]


def run_image_gan(iterations, prints, output, image):
    array = image_to_np(image)

    x = tf.placeholder(tf.float32, [array.shape[0], array.shape[1], array.shape[2]])
    z = tf.placeholder(tf.float32, [array.shape[0], array.shape[1], array.shape[2]])

    g_sample = generator(z)

    r_logits, r_rep = discriminator(x)
    f_logits, g_rep = discriminator(g_sample, reuse=True)
    disc_loss = tf.reduce_mean(tf.losses.mean_squared_error(r_logits, f_logits))
    gen_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=f_logits, labels=tf.ones_like(f_logits)))

    gen_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="GAN/Generator")
    disc_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="GAN/Discriminator")

    gen_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(gen_loss,
                                                                       var_list=gen_vars)  # G Train step #0.001
    disc_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(disc_loss,
                                                                        var_list=disc_vars)  # D Train step #0.001

    sess = tf.Session()
    tf.global_variables_initializer().run(session=sess)

    start_time = round(time.time(), 1)
    now = datetime.datetime.now()
    folder="D:/myname/Pictures/generated/" + now.strftime("%Y %B %d %Hh%Mm%Ss")
    os.mkdir(folder)

    for i in range(iterations + 1):
        x_batch = image_to_np(image)
        z_batch = trainingData.random_data_img(x_batch)
        _, dloss = sess.run([disc_step, disc_loss], feed_dict={x: x_batch, z: z_batch})
        _, gloss = sess.run([gen_step, gen_loss], feed_dict={z: z_batch})
        g_image = sess.run(g_sample, feed_dict={z: z_batch})

        if i % output == 0:
            print('Iterations: %dt Discriminator loss: %.20ft Generator loss: %.20f' % (i, dloss, gloss))

        if i % prints == 0:
            np_to_image(g_image).save(
                folder + "https://stackoverflow.com/" + 'iteration_' + str(i) + '_' + str(round(time.time() - start_time, 0)) + 's.png')


def discriminator(x, hsize=[1024, 1024], reuse=False):
    with tf.variable_scope("GAN/Discriminator", reuse=reuse):
        h1 = tf.layers.dense(x, hsize[0], activation=tf.nn.leaky_relu)
        h2 = tf.layers.dense(h1, hsize[1], activation=tf.nn.leaky_relu)
        h3 = tf.layers.dense(h2, x.shape[2], activation=tf.nn.leaky_relu)
        out = tf.layers.dense(h3, x.shape[2])

    return out, h3


def generator(z, hsize=[1024, 1024], reuse=False):
    with tf.variable_scope("GAN/Generator", reuse=reuse):
        h1 = tf.layers.dense(z, hsize[0], activation=tf.nn.leaky_relu)
        h2 = tf.layers.dense(h1, hsize[1], activation=tf.nn.leaky_relu)
        h3 = tf.layers.dense(h2, z.shape[2], activation=tf.nn.leaky_relu)
        out = tf.layers.dense(h3, z.shape[2])
    return out


def image_to_np(filename):
    image_array_raw = np.array(pil.open(filename))
    image_array = np.divide(image_array_raw, maxvalues)
    return image_array


def np_to_image(array):
    array_raw = np.multiply(array, maxvalues)
    array_img = np.round(array_raw, 0)
    array_image = array_img.astype('uint8')
    image = pil.fromarray(array_image)
    return image

EDIT: As requested a sample output of a run;

A run of 100 iterations with output and prints at 10 gives this output:

C:UsersmynameAppDataLocalProgramsPythonPython36libsite-packagestensorflowpythonframeworkdtypes.py:523: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
C:UsersmynameAppDataLocalProgramsPythonPython36libsite-packagestensorflowpythonframeworkdtypes.py:524: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
C:UsersmynameAppDataLocalProgramsPythonPython36libsite-packagestensorflowpythonframeworkdtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
C:UsersmynameAppDataLocalProgramsPythonPython36libsite-packagestensorflowpythonframeworkdtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
C:UsersmynameAppDataLocalProgramsPythonPython36libsite-packagestensorflowpythonframeworkdtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
C:UsersmynameAppDataLocalProgramsPythonPython36libsite-packagestensorflowpythonframeworkdtypes.py:532: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  np_resource = np.dtype([("resource", np.ubyte, 1)])
2022-04-17 21:33:32.454448: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: 
name: GeForce GTX 980 major: 5 minor: 2 memoryClockRate(GHz): 1.291
pciBusID: 0000:01:00.0
totalMemory: 4.00GiB freeMemory: 3.30GiB
2022-04-17 21:33:32.454927: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0
2022-04-17 21:33:33.802497: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix:
2022-04-17 21:33:33.802763: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988]      0 
2022-04-17 21:33:33.802929: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0:   N 
2022-04-17 21:33:33.803255: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 3013 MB memory) -> physical GPU (device: 0, name: GeForce GTX 980, pci bus id: 0000:01:00.0, compute capability: 5.2)
Iterations: 0    Discriminator loss: 0.00040598728810437024  Generator loss: 0.69306391477584838867
Iterations: 10   Discriminator loss: 0.00037987064570188522  Generator loss: 0.69306457042694091797
Iterations: 20   Discriminator loss: 0.00034027246874757111  Generator loss: 0.69306617975234985352
Iterations: 30   Discriminator loss: 0.00028256041696295142  Generator loss: 0.69307404756546020508
Iterations: 40   Discriminator loss: 0.00020526489242911339  Generator loss: 0.69308030605316162109
Iterations: 50   Discriminator loss: 0.00012012914521619678  Generator loss: 0.69308072328567504883
Iterations: 60   Discriminator loss: 0.00004914442615699954  Generator loss: 0.69309645891189575195
Iterations: 70   Discriminator loss: 0.00002701636549318209  Generator loss: 0.69307559728622436523
Iterations: 80   Discriminator loss: 0.00001953361970663536  Generator loss: 0.69310081005096435547
Iterations: 90   Discriminator loss: 0.00001449948649678845  Generator loss: 0.69317066669464111328
Iterations: 100  Discriminator loss: 0.00000890944556886097  Generator loss: 0.69325947761535644531

Process finished with exit code 0

And the following images:

iteration_0_1.0s.png

iteration_20_1.0s.png

iteration_40_1.0s.png

iteration_60_2.0s.png

iteration_80_2.0s.png

iteration_100_2.0s.png

The 2D version would work well, I tried several lines and it would make the same line. For this 3D one I tried letting it run 48 hours once thinking it maybe needed more time, but I always end up with the square completely filled in with the same color.

EDIT2: This should be copy paste runnable I think;

import tensorflow as tf
import os
import datetime
import time
from PIL import Image as pil
import numpy as np

maxvalues = 255
grey_conversion = [.3, .6, .1]


def run_image_gan(iterations, prints, output, image):
    array = image_to_np(image)

    x = tf.placeholder(tf.float32, [array.shape[0], array.shape[1], array.shape[2]])
    z = tf.placeholder(tf.float32, [array.shape[0], array.shape[1], array.shape[2]])

    g_sample = generator(z)

    r_logits, r_rep = discriminator(x)
    f_logits, g_rep = discriminator(g_sample, reuse=True)
    disc_loss = tf.reduce_mean(tf.losses.mean_squared_error(r_logits, f_logits))
    gen_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=f_logits, labels=tf.ones_like(f_logits)))

    gen_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="GAN/Generator")
    disc_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="GAN/Discriminator")

    gen_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(gen_loss,
                                                                       var_list=gen_vars)  # G Train step #0.001
    disc_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(disc_loss,
                                                                        var_list=disc_vars)  # D Train step #0.001

    sess = tf.Session()
    tf.global_variables_initializer().run(session=sess)

    start_time = round(time.time(), 1)
    now = datetime.datetime.now()
    folder="D:/myname/Pictures/generated/" + now.strftime("%Y %B %d %Hh%Mm%Ss")
    os.mkdir(folder)

    for i in range(iterations + 1):
        x_batch = image_to_np(image)
        print('x_batch: ', x_batch)
        np_to_image(x_batch).save(
            folder + "https://stackoverflow.com/" + 'iteration_' + str(i) + '_' + str(round(time.time() - start_time, 0)) + 's_x_batch.png')
        z_batch = random_data_img(x_batch)
        print('z_batch: ', z_batch)
        np_to_image(z_batch).save(
            folder + "https://stackoverflow.com/" + 'iteration_' + str(i) + '_' + str(round(time.time() - start_time, 0)) + 's_z_batch.png')
        _, dloss = sess.run([disc_step, disc_loss], feed_dict={x: x_batch, z: z_batch})
        _, gloss = sess.run([gen_step, gen_loss], feed_dict={z: z_batch})
        g_image = sess.run(g_sample, feed_dict={z: z_batch})

        if i % output == 0:
            print('Iterations: %dt Discriminator loss: %.20ft Generator loss: %.20f' % (i, dloss, gloss))

        if i % prints == 0:
            np_to_image(g_image).save(
                folder + "https://stackoverflow.com/" + 'iteration_' + str(i) + '_' + str(round(time.time() - start_time, 0)) + 's.png')


def discriminator(x, hsize=[1024, 1024], reuse=False):
    with tf.variable_scope("GAN/Discriminator", reuse=reuse):
        h1 = tf.layers.dense(x, hsize[0], activation=tf.nn.leaky_relu)
        h2 = tf.layers.dense(h1, hsize[1], activation=tf.nn.leaky_relu)
        h3 = tf.layers.dense(h2, x.shape[2], activation=tf.nn.leaky_relu)
        out = tf.layers.dense(h3, x.shape[2])

    return out, h3


def generator(z, hsize=[1024, 1024], reuse=False):
    with tf.variable_scope("GAN/Generator", reuse=reuse):
        h1 = tf.layers.dense(z, hsize[0], activation=tf.nn.leaky_relu)
        h2 = tf.layers.dense(h1, hsize[1], activation=tf.nn.leaky_relu)
        h3 = tf.layers.dense(h2, z.shape[2], activation=tf.nn.leaky_relu)
        out = tf.layers.dense(h3, z.shape[2])
    return out


def image_to_np(filename):
    image_array_raw = np.array(pil.open(filename))
    image_array = np.divide(image_array_raw, maxvalues)
    return image_array


def np_to_image(array):
    array_raw = np.multiply(array, maxvalues)
    array_img = np.round(array_raw, 0)
    array_image = array_img.astype('uint8')
    image = pil.fromarray(array_image)
    return image

def random_data_img(image):
    data = np.random.rand(image.shape[0], image.shape[1], image.shape[2])
    return np.array(data)

EDIT3: @Karan, you are a saint for helping as you are doing! Sadly, still can’t get it to fully work. This is where I’m at;

main:

import gc
import ganimage

if __name__ == '__main__':
    gc.collect()
    ganimage.run_image_gan(iterations=1000000, prints=10000, output=1000, batch_size=100, image="D:/myname/Pictures/testfile5.png")

ganimage:

import tensorflow as tf
import os
import datetime
import time
from PIL import Image as pil
import numpy as np

maxvalues = 255
grey_conversion = [.3, .6, .1]


def run_image_gan(iterations, prints, output, batch_size, image):
    image_to_array = image_to_np(image)

    x = tf.placeholder(tf.float32,
                       [batch_size, image_to_array.shape[0], image_to_array.shape[1], image_to_array.shape[2]])
    z = tf.placeholder(tf.float32,
                       [batch_size, image_to_array.shape[0], image_to_array.shape[1], image_to_array.shape[2]])

    g_sample = generator(z)

    r_logits, r_rep = discriminator(x)
    f_logits, g_rep = discriminator(g_sample, reuse=True)
    disc_loss = tf.reduce_mean(tf.losses.mean_squared_error(r_logits, f_logits))
    gen_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=f_logits, labels=tf.ones_like(f_logits)))

    gen_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="GAN/Generator")
    disc_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="GAN/Discriminator")

    gen_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(gen_loss,
                                                                       var_list=gen_vars)  # G Train step #0.001
    disc_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(disc_loss,
                                                                        var_list=disc_vars)  # D Train step #0.001

    sess = tf.Session()
    tf.global_variables_initializer().run(session=sess)

    start_time = round(time.time(), 1)
    now = datetime.datetime.now()
    folder="D:/myname/Pictures/generated/" + now.strftime("%Y %B %d %Hh%Mm%Ss")
    os.mkdir(folder)

    for i in range(iterations + 1):
        x_batch = []
        z_batch = []
        for j in range(batch_size):
            x_batch.append(np.array(image_to_np(image)))
            z_batch.append(np.array(random_data_img(image_to_np(image))))
        _, dloss = sess.run([disc_step, disc_loss], feed_dict={x: x_batch, z: z_batch})
        _, gloss = sess.run([gen_step, gen_loss], feed_dict={z: z_batch})
        g_image = sess.run(g_sample, feed_dict={z: z_batch})

        if i % output == 0:
            print('Iterations: %dt Discriminator loss: %.20ft Generator loss: %.20f' % (i, dloss, gloss))

        if i % prints == 0:
            for j in range(g_image.shape[0]):
                np_to_image(g_image[j, :]).save(folder + "https://stackoverflow.com/" + 'iteration_' + str(i) + '_nr_' + str(j + 1) + '_' + str(
                    round(time.time() - start_time, 0)) + 's.png')


def discriminator(x, reuse=False):
    with tf.variable_scope("GAN/Discriminator", reuse=reuse):
        h1 = tf.layers.conv2d(x, 64, [3, 3], activation=tf.nn.leaky_relu)
        h2 = tf.layers.conv2d(h1, 64, [3, 3], activation=tf.nn.leaky_relu)
        h3 = tf.math.reduce_mean(h2, 1)
        h3 = tf.math.reduce_mean(h3, 1)
        out = tf.layers.dense(h3, 1)

    return out, h3


def generator(z, reuse=False):
    with tf.variable_scope("GAN/Generator", reuse=reuse):
        h1 = tf.layers.conv2d(z, 64, [3, 3], activation=tf.nn.leaky_relu)
        h2 = tf.layers.conv2d(h1, 64, [3, 3], activation=tf.nn.leaky_relu)
        out = tf.layers.dense(h2, z.shape[-1])
    return out


def image_to_np(filename):
    image_array_raw = np.array(pil.open(filename))
    image_array = np.divide(image_array_raw, maxvalues)
    return image_array


def np_to_image(array):
    array_raw = np.multiply(array, maxvalues)
    array_img = np.round(array_raw, 0)
    array_image = array_img.astype('uint8')
    image = pil.fromarray(array_image)
    return image


def random_data_img(image):
    data = np.random.rand(image.shape[0], image.shape[1], image.shape[2])
    return np.array(data)

Leave a Comment