Commit 2967d54d authored by Eva Lina Fesefeldt's avatar Eva Lina Fesefeldt
Browse files

MNIST LambdaMax Plot

parent 5f0faef9

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.
# mit gegebenem Optimierer trainieren, Gewichte im Minimum abspeichern
import numpy as np
from numpy.linalg import norm
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.python.keras import optimizers
from tensorflow.python.keras.backend import learning_phase
from scipy.sparse import diags
from scipy.linalg import norm, eig, eigh, eigvalsh, eigvals
from helper import vector_flat_shape_to_trainable_shape
from hessian_lanczos import lanczos_sym_knn
from generate_dataset import generate_tictactoe
from helper import matrix_trainable_shape_to_flat_shape
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("opt", type=str) # "sgd_with_decay", "sgd", "adam", "adagrad"
parser.add_argument("--batch_size", type=int, dest="batch_size")
parser.add_argument("--LR", type=float, dest="LR")
parser.add_argument("--epochs", type=int, dest="epochs")
args = parser.parse_args()
optimizer_str = args.opt
if args.batch_size is not None:
batch_size = args.batch_size
else:
batch_size = 32
print("Setting batch size to default (32)")
if args.LR is not None:
LR = args.LR
else:
LR = 0.01
print("Setting learning rate to default (0.01)")
if args.epochs is not None:
epochs = args.epochs
else:
epochs = 10000
print("Setting batch sizeepochs to default (10000)")
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
loss_array = np.zeros((100,))
lambdamax_array = np.zeros((100,))
filename_loss = "results/MSE_loss_" + optimizer_str + "_batch" + str(batch_size) + "_LR" + str(LR) + "_epochs" + str(epochs)
filename_lambdamax= "results/lambdamax_" + optimizer_str + "_batch" + str(batch_size) + "_LR" + str(LR) + "_epochs" + str(epochs)
# MNIST laden
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# normalisieren
x_train, x_test = x_train / 255.0, x_test / 255.0
# Reshape
input_dim = 28*28
x_train = tf.reshape(x_train, (60000, 28*28))
y_train = tf.one_hot(y_train, depth=10)
for size_hidden_layer in range(50,251):
number_of_parameters = input_dim*size_hidden_layer + size_hidden_layer + 10 * size_hidden_layer + 10
model = Sequential()
model.add(Dense(size_hidden_layer, input_dim = input_dim, activation='sigmoid'))
model.add(Dense(10, input_dim=size_hidden_layer, activation='sigmoid'))
# Gewichte und Biase initialisieren, um nachher Vergleichbarkeit zu haben
filename_W1 = "initializers/W_1_n" + str(size_hidden_layer) + ".npy"
filename_b1 = "initializers/b_1_n" + str(size_hidden_layer) + ".npy"
filename_W2 = "initializers/W_2_n" + str(size_hidden_layer) + ".npy"
filename_b2 = "initializers/b_2_n" + str(size_hidden_layer) + ".npy"
W_1 = np.load(filename_W1)
b_1 = np.load(filename_b1)
W_2 = np.load(filename_W2)
b_2 = np.load(filename_b2)
list_of_weights_and_biases = [W_1, b_1, W_2, b_2]
model.set_weights(list_of_weights_and_biases)
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# Klassifizierer trainieren
loss_fn = tf.keras.losses.MeanSquaredError()
if optimizer_str == "sgd":
opt = tf.keras.optimizers.SGD(learning_rate=LR)
if optimizer_str == "adam":
opt = tf.keras.optimizers.Adam(learning_rate=LR)
if optimizer_str == "sgd_with_decay":
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=LR, decay_steps=10000, decay_rate=0.9)
opt = tf.keras.optimizers.SGD(learning_rate=lr_schedule)
if optimizer_str == "adagrad":
opt = tf.keras.optimizers.Adagrad(learning_rate=LR)
model.compile(optimizer = opt, loss=loss_fn)
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0)
# Loss speichern
weights_and_bias = model.get_weights()
predictions = model.predict(x_train)
loss_array[size_hidden_layer-1] = loss_fn(y_train, predictions).numpy()
m = 30
# Lanczos-Verfahren für KNN
r_numpy = np.reshape(np.random.rand(number_of_parameters), (number_of_parameters,))
r_flat_shape = tf.constant(r_numpy, dtype='float32')
r = vector_flat_shape_to_trainable_shape(r_flat_shape)
S = np.eye(m+1,m+1)
a1, b1, Q = lanczos_sym_knn(model, x_train, y_train, r, loss_fn, S, m)
T = diags([b1, a1, b1], [-1, 0, 1], shape=(m+1, m), format='csr').toarray()
eig_T = eigvalsh(T[:m,:m])
lambda_max = max(abs(eig_T))
lambdamax_array[size_hidden_layer-1] = lambda_max
np.save(filename_loss, loss_array)
np.save(filename_lambdamax, lambdamax_array)
\ No newline at end of file
......@@ -2,11 +2,12 @@ import numpy as np
from numpy.linalg import norm
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras
from generate_dataset import generate_tictactoe
from helper import matrix_trainable_shape_to_flat_shape
for size_hidden_layer in range(1,100):
for size_hidden_layer in range(1,30):
# Erstelle Daten
train_set, train_labels = generate_tictactoe()
norms = norm(train_labels, ord=1, axis=0)
......@@ -14,8 +15,8 @@ for size_hidden_layer in range(1,100):
number_of_parameters = 9*size_hidden_layer + size_hidden_layer + 3 * size_hidden_layer + 3
# KNN erzeugen
from tf.keras.models import Sequential
from tf.keras.layers import Dense
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(size_hidden_layer, input_dim = 9,activation='sigmoid'))
......
import numpy as np
from scipy.special import comb
import matplotlib.pyplot as plt
import math
# Finde alle Vektoren x \in {0,1}^n mit genau 5 Einträgen = 1 (weiß)
# Gibt eine Matrix der Größe (N,n) zurück, deren Zeilen die gesuchten Vektoren sind
def find_combinations(n,k):
# Rekursionsanfang
if k==0:
return np.zeros(n)
if n==1 & k==1:
return np.array([1])
if n==1 & k==0:
return np.array([0])
# Anzahl der möglichen Kombinationen: k aus n auswählen, Matrix anlegen
N = int(comb(n,k))
X = np.zeros((N,n))
# Setze den ersten Eintrag auf 1 (weiß) und rufe das Subproblem auf
number_of_combinations_problem_1 = int(comb(n-1,k-1))
X[0:number_of_combinations_problem_1,0] = 1
X[0:number_of_combinations_problem_1,1:n] = find_combinations(n-1,k-1)
if number_of_combinations_problem_1 == N:
return X
# Belasse den ersten Eintrag bei 0 (schwarz) und rufe das Subproblem auf
X[number_of_combinations_problem_1:,1:n] = find_combinations(n-1,k)
return X
# (weiß gewinnt, schwarz gewinnt, niemand gewinnt)
def winner_one_line(x1,x2,x3):
if x1 != x2 or x2 != x3:
return np.array([0,0,1]).T
if x1 == 1:
return np.array([1,0,0]).T
return np.array([0,1,0]).T
def one_tictactoe_label(x):
strikes = np.zeros((3, 8))
# Alle Möglichkeiten zu gewinnen
strikes[:,0] = winner_one_line(x[0], x[4], x[8]) # Diagonale
strikes[:,1] = winner_one_line(x[2], x[4], x[6]) # Antidiagonale
strikes[:,2] = winner_one_line(x[0], x[1], x[2]) # Horizontal 1
strikes[:,3] = winner_one_line(x[3], x[4], x[5]) # Horizontal 2
strikes[:,4] = winner_one_line(x[6], x[7], x[8]) # Horizontal 3
strikes[:,5] = winner_one_line(x[0], x[3], x[6]) # Vertikal 1
strikes[:,6] = winner_one_line(x[1], x[4], x[7]) # Vertikal 2
strikes[:,7] = winner_one_line(x[2], x[5], x[8]) # Vertikal 3
# Eine Farbe gewinnt, falls sie mindestens einen Strike hat und die andere Farbe keine Strikes hat
strikes_white = np.sum(strikes[0,:])
strikes_black = np.sum(strikes[1,:])
# Weiß gewinnt
if strikes_black == 0 and strikes_white > 0:
return np.array([1,0,0])
# Schwarz gewinnt
if strikes_white == 0 and strikes_black > 0:
return np.array([0,1,0])
return np.array([0,0,1])
def tictactoe_labels(X):
N,n = X.shape
labels = np.zeros((N,3))
for i in range(N):
labels[i,:] = one_tictactoe_label(X[i,:])
return labels.astype(float)
def generate_tictactoe():
n = 9
k = 5
N = int(comb(n,k))
X = np.zeros((N,n))
X = find_combinations(n,k).astype('float32')
labels = tictactoe_labels(X).astype('float32')
return X, labels
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from autograd_minimize.tf_wrapper import tf_function_factory
from autograd_minimize import minimize
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
for i in range(1000):
size_hidden_layer = 20
model = Sequential()
model.add(Dense(size_hidden_layer, input_dim = 9,activation='sigmoid'))
model.add(Dense(3, input_dim=size_hidden_layer, activation='sigmoid'))
list_weights_bias = model.get_weights()
filename_W_1 = "initializersn_20/W_1_n20_" + str(i) + ".npy"
filename_b_1 = "initializersn_20/b_1_n20_" + str(i) + ".npy"
filename_W_2 = "initializersn_20/W_2_n20_" + str(i) + ".npy"
filename_b_2 = "initializersn_20/b_2_n20_" + str(i) + ".npy"
np.save(filename_W_1, list_weights_bias[0])
np.save(filename_b_1, list_weights_bias[1])
np.save(filename_W_2, list_weights_bias[2])
np.save(filename_b_2, list_weights_bias[3])
import tensorflow as tf
import numpy as np
def reelles_skalarprodukt_trainable_shape(v_1, v_2):
sum = 0
for i in range(len(v_1)):
sum += np.sum(v_1[i]*v_2[i])
return sum
# Todo umschreiben allgemeines Model
def vector_flat_shape_to_trainable_shape(v):
try:
p,m = v.shape
except:
p = v.shape[0]
n = int((p-3)/13) # Größe der versteckten Schicht
slice1 = 9*n
slice2 = 9*n+n
slice3 = 9*n+n+n*3
v1 = tf.reshape(v[:slice1], (9,n))
v2 = tf.reshape(v[slice1:slice2], (n,))
v3 = tf.reshape(v[slice2:slice3], (n,3))
v4 = tf.reshape(v[slice3:], (3,))
unit_vector_trainable_shape = [v1, v2, v3, v4]
return unit_vector_trainable_shape
# Todo umschreiben für allgemeines n, allgemeines Model
def vector_trainable_shape_to_flat_shape(list):
p = 0
#Number of Parameters p
for i in range(len(list)):
try:
n,m = list[i].shape
except:
n = list[i].shape[0]
m = 1
finally:
p += n*m
n = int((p-3)/13) # Größe der versteckten Schicht
v1 = list[0]
v2 = list[1]
v3 = list[2]
v4 = list[3]
slice1 = 9*n
slice2 = 9*n+n
slice3 = 9*n+n+n*3
v = np.zeros((p,))
v[:slice1] = np.reshape(v1, (9*n,))
v[slice1:slice2] = np.reshape(v2, (n,))
v[slice2:slice3] = np.reshape(v3, (3*n,))
v[slice3:] = np.reshape(v4, (3,))
return v
def matrix_trainable_shape_to_flat_shape(model, h):
layer1 = model.layers[0]
layer2 = model.layers[1]
n_params = tf.reduce_prod(layer1.kernel.shape) + tf.reduce_prod(layer2.kernel.shape) + tf.reduce_prod(layer1.bias.shape) + tf.reduce_prod(layer2.bias.shape)
#h[0] ist die Ableitung des Gradienten nach den Gewichten Layer 1
n_params_D_weights_1 = tf.reduce_prod(layer1.kernel.shape)
H_weights_1 = tf.reshape(h[0], [n_params, n_params_D_weights_1])
#h[1] ist die Ableitung des Gradienten nach den Biasen Layer 1
n_params_D_bias_1 = tf.reduce_prod(layer1.bias.shape)
H_bias_1 = tf.reshape(h[1], [n_params, n_params_D_bias_1])
#h[2] ist die Ableitung des Gradienten nach den Gewichten Layer 2
n_params_D_weights_2 = tf.reduce_prod(layer2.kernel.shape)
H_weights_2 = tf.reshape(h[2], [n_params, n_params_D_weights_2])
#h[3] ist die Ableitung des Gradienten nach den Biasen Layer 2
n_params_D_bias_2 = tf.reduce_prod(layer2.bias.shape)
H_bias_2 = tf.reshape(h[3], [n_params, n_params_D_bias_2])
# Hesse-Matrix zusammensetzen ToDo vorher allokieren
h_mat = tf.concat([H_weights_1, H_bias_1, H_weights_2, H_bias_2], axis = 1)
return h_mat
def matrix_flat_shape_to_trainable_shape(model, A):
layer1 = model.layers[0]
layer2 = model.layers[1]
n_params, m = A.shape
A_trainable = []
#A_trainable.append(tf.reshape())
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment