Commit 2866fca5 authored by Eva Lina Fesefeldt's avatar Eva Lina Fesefeldt
Browse files

Minimalbeispiel und konvergente Version IN-CG

parent 648d9205
import tensorflow as tf
import numpy as np
def reelles_skalarprodukt_trainable_shape(v_1, v_2):
sum = 0
for i in range(len(v_1)):
......
......@@ -15,6 +15,7 @@ from helper import reelles_skalarprodukt_trainable_shape
def norm_trainable_shape(x):
return sqrt(reelles_skalarprodukt_trainable_shape(x,x))
# Simple CG method
def CG_trainable(model, train_set, train_labels, loss_fn, b, x_0, m=None, tau=None):
......@@ -67,6 +68,7 @@ def CG_trainable(model, train_set, train_labels, loss_fn, b, x_0, m=None, tau=No
return x
# Recreate the hessianlearn CG method
def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x_0, abs_tol=1e-12, rel_tol=1e-09, m=None):
......@@ -86,7 +88,7 @@ def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x
rho_minus = reelles_skalarprodukt_trainable_shape(r,r)
# Tolerance bestimmen
# Tolerance
r_tol2 = rho_minus * rel_tol * rel_tol
a_tol2 = abs_tol * abs_tol
......@@ -113,7 +115,7 @@ def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x
for i in range(len(weights_and_biases_list)):
p[i] = r[i] + rho/rho_minus * p[i]
# Auf negative Krümmung testen
# Test for negative curvature
Ap = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn)
pAp = reelles_skalarprodukt_trainable_shape(p,Ap)
......@@ -125,11 +127,11 @@ def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x
return x
# Erstelle Daten
# Generate Data
train_set, train_labels = generate_tictactoe()
dataset = tf.data.Dataset.from_tensor_slices((train_set, train_labels))
# Modell erzeugen
# Generate the model
size_hidden_layer = 1
number_of_epochs = 10000
number_of_parameters = 9*size_hidden_layer + size_hidden_layer + 3 * size_hidden_layer + 3
......@@ -138,10 +140,10 @@ from keras.layers import Dense
from keras import backend as k
model = Sequential()
model.add(Dense(size_hidden_layer, input_dim = 9,activation='sigmoid'))
model.add(Dense(3, input_dim=size_hidden_layer, activation='sigmoid'))
model.add(Dense(size_hidden_layer, input_dim = 9,activation='relu'))
model.add(Dense(3, input_dim=size_hidden_layer, activation='relu'))
# Gewichte und Biase initialisieren, um nachher Vergleichbarkeit zu haben
# Initialize kernel and bias with pre-generated ones
filename_W1 = "initializers/W_1_n" + str(size_hidden_layer) + ".npy"
filename_b1 = "initializers/b_1_n" + str(size_hidden_layer) + ".npy"
filename_W2 = "initializers/W_2_n" + str(size_hidden_layer) + ".npy"
......@@ -158,26 +160,25 @@ for i in range(len(list_of_weights_and_biases)):
model.set_weights(list_of_weights_and_biases)
# Add the loss and make a numpy array to save it
loss_fn = tf.keras.losses.MeanSquaredError()
model.compile(optimizer='adam', loss=loss_fn)
# Vortrainieren
#model.fit(train_set, train_labels, batch_size=32, epochs = 1000, verbose=0)
loss = loss_fn(model.predict(train_set), train_labels)
loss_numpy = np.zeros(number_of_epochs)
filename = "results/MSE_loss_NCG_backtracking_epochs" + str(number_of_epochs) + ".npy"
# Print the loss for the untrained model
print("Gewichte und Biase: ", list_of_weights_and_biases)
print("Initialer Loss: ", loss.numpy())
# Training loop
for epoch in range(number_of_epochs):
x = model.get_weights()
# Gradienten ausrechnen
# Calculate the gradient with GradientTape and save -g as the right hand side for CG
layer1 = model.layers[0]
layer2 = model.layers[1]
......@@ -193,20 +194,26 @@ for epoch in range(number_of_epochs):
for i in range(len(g)):
minus_g.append(-g[i])
# Approximativ für den Newton-Schritt lösen mittels CG
# Solve for the Newton step by CG method
m = 200
descent_dir = []
descent_dir = CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, minus_g, x_0, abs_tol=1e-12, rel_tol=1e-09, m=m)
Hp = _back_over_back_hvp(model, train_set, train_labels, descent_dir, loss_fn)
#print(minus_g)
#print(Hp)
loss = loss_fn(model.predict(train_set), train_labels).numpy()
# Step width selection by backtracking
alpha = 1
rho = 0.5
c = 1e-4
test_step = []
cond = loss + c*alpha*reelles_skalarprodukt_trainable_shape(g,descent_dir)
iter_max = 10000
cond = max(loss + c*alpha*reelles_skalarprodukt_trainable_shape(g,descent_dir), 1e-24)
iter_max = 100
for k in range(iter_max):
test_step = []
for i in range(len(list_of_weights_and_biases)):
......@@ -217,11 +224,11 @@ for epoch in range(number_of_epochs):
break
alpha = rho*alpha
# Save the loss after step width selection
loss = loss_fn(model.predict(train_set), train_labels)
loss_numpy[epoch] = loss.numpy()
np.save(filename, loss_numpy)
if epoch % 10 == 0:
print("Epoch ", epoch, " Loss: ", loss.numpy())
#if epoch % 10 == 0:
print("Epoch ", epoch, " Loss: ", loss.numpy())
def CG_solve_hessianlearn(model, train_set, train_labels, loss_fn, b, x_0 = None):
iter = 0
converged = False
reason_id = 0
list_of_weights_and_biases = model.get_weights()
x_0 = []
for i in range(len(list_of_weights_and_biases)):
x_0.append(np.zeros_like(list_of_weights_and_biases[i]))
Ax_0 = _back_over_back_hvp(model, train_set, train_labels, x_0, loss_fn)
# Calculate initial residual r = Ax_0 -b
r = []
for i in range(len(list_of_weights_and_biases)):
r[i] = b[i] - Ax_0[i]
z = r.deepcopy()
# Calculate p (copy array)
p = z.deepcopy()
# Calculate tolerance for Eisenstat Walker conditions
rz_0 = reelles_skalarprodukt_trainable_shape(r,z)
rtol2 = rz_0 * 1e-09 * 1e-09
atol2 = 1e-12 * 1e-12
tol = max(rtol2, atol2)
# Check convergence and initialize for solve:
converged = (rz_0 < tol)
if converged:
final_norm = math.sqrt(rz_0)
print( "Converged in ", iter, " iterations with final norm ", final_norm)
return x, False
# Check if the direction is negative before taking a step.
Ap = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn)
pAp = reelles_skalarprodukt_trainable_shape(p,Ap)
negative_direction = (pAp <= 0.0)
if negative_direction:
converged = True
for i in range(len(weights_and_biases_list)):
x[i] += p[i]
return x, False
# Loop until convergence
iter = 1
while True:
# Calculate alpha
alpha = rz_0/pAp
# Update x
on_boundary,x = self.update_x(x,alpha,p)
# Update r
r -= alpha*Ap
# Apply preconditioner z = M^{-1}r
feed_dict[self.Minv.x] = r
z = self.sess.run(self.Minv(),feed_dict = feed_dict)
# Calculate rz
rz = np.dot(r,z)
# print(self.iter,rz)
# Check convergence
converged = (rz < tol)
if converged:
self.converged = True
self.reason_id = 1
self.final_norm = math.sqrt(rz)
if(self.parameters["print_level"] >= 0):
print( self.reason[self.reason_id])
print( "Converged in ", self.iter, " iterations with final norm ", self.final_norm)
break
self.iter += 1
if self.iter > self.parameters["max_iter"]:
self.converged = False
self.reason_id = 0
self.final_norm = math.sqrt(rz)
if(self.parameters["print_level"] >= 0):
print( self.reason[self.reason_id])
print( "Not Converged. Final residual norm ", self.final_norm)
break
beta = rz / rz_0
p = z + beta*p
# Check if the direction is negative, and prepare for next iteration.
feed_dict[self.problem.dw] = p
Ap = self.sess.run(self.Aop,feed_dict = feed_dict)
pAp = np.dot(p,Ap)
negative_direction = (pAp <= 0.0)
if negative_direction:
self.converged = True
self.reason_id = 2
self.final_norm = math.sqrt(rz)
if(self.parameters["print_level"] >= 0):
print( self.reason[self.reason_id])
print( "Converged in ", self.iter, " iterations with final norm ", self.final_norm)
break
rz_0 = rz
return x, on_boundary
\ No newline at end of file
import numpy as np
from scipy.special import comb
import matplotlib.pyplot as plt
import math
# Finde alle Vektoren x \in {0,1}^n mit genau 5 Einträgen = 1 (weiß)
# Gibt eine Matrix der Größe (N,n) zurück, deren Zeilen die gesuchten Vektoren sind
def find_combinations(n,k):
# Rekursionsanfang
if k==0:
return np.zeros(n)
if n==1 & k==1:
return np.array([1])
if n==1 & k==0:
return np.array([0])
# Anzahl der möglichen Kombinationen: k aus n auswählen, Matrix anlegen
N = int(comb(n,k))
X = np.zeros((N,n))
# Setze den ersten Eintrag auf 1 (weiß) und rufe das Subproblem auf
number_of_combinations_problem_1 = int(comb(n-1,k-1))
X[0:number_of_combinations_problem_1,0] = 1
X[0:number_of_combinations_problem_1,1:n] = find_combinations(n-1,k-1)
if number_of_combinations_problem_1 == N:
return X
# Belasse den ersten Eintrag bei 0 (schwarz) und rufe das Subproblem auf
X[number_of_combinations_problem_1:,1:n] = find_combinations(n-1,k)
return X
# (weiß gewinnt, schwarz gewinnt, niemand gewinnt)
def winner_one_line(x1,x2,x3):
if x1 != x2 or x2 != x3:
return np.array([0,0,1]).T
if x1 == 1:
return np.array([1,0,0]).T
return np.array([0,1,0]).T
def one_tictactoe_label(x):
strikes = np.zeros((3, 8))
# Alle Möglichkeiten zu gewinnen
strikes[:,0] = winner_one_line(x[0], x[4], x[8]) # Diagonale
strikes[:,1] = winner_one_line(x[2], x[4], x[6]) # Antidiagonale
strikes[:,2] = winner_one_line(x[0], x[1], x[2]) # Horizontal 1
strikes[:,3] = winner_one_line(x[3], x[4], x[5]) # Horizontal 2
strikes[:,4] = winner_one_line(x[6], x[7], x[8]) # Horizontal 3
strikes[:,5] = winner_one_line(x[0], x[3], x[6]) # Vertikal 1
strikes[:,6] = winner_one_line(x[1], x[4], x[7]) # Vertikal 2
strikes[:,7] = winner_one_line(x[2], x[5], x[8]) # Vertikal 3
# Eine Farbe gewinnt, falls sie mindestens einen Strike hat und die andere Farbe keine Strikes hat
strikes_white = np.sum(strikes[0,:])
strikes_black = np.sum(strikes[1,:])
# Weiß gewinnt
if strikes_black == 0 and strikes_white > 0:
return np.array([1,0,0])
# Schwarz gewinnt
if strikes_white == 0 and strikes_black > 0:
return np.array([0,1,0])
return np.array([0,0,1])
def tictactoe_labels(X):
N,n = X.shape
labels = np.zeros((N,3))
for i in range(N):
labels[i,:] = one_tictactoe_label(X[i,:])
return labels.astype(float)
def generate_tictactoe():
n = 9
k = 5
N = int(comb(n,k))
X = np.zeros((N,n))
X = find_combinations(n,k).astype(float)
labels = tictactoe_labels(X)
return X, labels
import tensorflow as tf
import numpy as np
def reelles_skalarprodukt_trainable_shape(v_1, v_2):
sum = 0
for i in range(len(v_1)):
sum += np.sum(v_1[i]*v_2[i])
return sum
# Todo umschreiben allgemeines Model
def vector_flat_shape_to_trainable_shape(v):
try:
p,m = v.shape
except:
p = v.shape[0]
n = int((p-3)/13) # Größe der versteckten Schicht
slice1 = 9*n
slice2 = 9*n+n
slice3 = 9*n+n+n*3
v1 = tf.reshape(v[:slice1], (9,n))
v2 = tf.reshape(v[slice1:slice2], (n,))
v3 = tf.reshape(v[slice2:slice3], (n,3))
v4 = tf.reshape(v[slice3:], (3,))
unit_vector_trainable_shape = [v1, v2, v3, v4]
return unit_vector_trainable_shape
# Todo umschreiben für allgemeines n, allgemeines Model
def vector_trainable_shape_to_flat_shape(list):
p = 0
#Number of Parameters p
for i in range(len(list)):
try:
n,m = list[i].shape
except:
n = list[i].shape[0]
m = 1
finally:
p += n*m
n = int((p-3)/13) # Größe der versteckten Schicht
v1 = list[0]
v2 = list[1]
v3 = list[2]
v4 = list[3]
slice1 = 9*n
slice2 = 9*n+n
slice3 = 9*n+n+n*3
v = np.zeros((p,))
v[:slice1] = np.reshape(v1, (9*n,))
v[slice1:slice2] = np.reshape(v2, (n,))
v[slice2:slice3] = np.reshape(v3, (3*n,))
v[slice3:] = np.reshape(v4, (3,))
return v
def matrix_trainable_shape_to_flat_shape(model, h):
layer1 = model.layers[0]
layer2 = model.layers[1]
n_params = tf.reduce_prod(layer1.kernel.shape) + tf.reduce_prod(layer2.kernel.shape) + tf.reduce_prod(layer1.bias.shape) + tf.reduce_prod(layer2.bias.shape)
#h[0] ist die Ableitung des Gradienten nach den Gewichten Layer 1
n_params_D_weights_1 = tf.reduce_prod(layer1.kernel.shape)
H_weights_1 = tf.reshape(h[0], [n_params, n_params_D_weights_1])
#h[1] ist die Ableitung des Gradienten nach den Biasen Layer 1
n_params_D_bias_1 = tf.reduce_prod(layer1.bias.shape)
H_bias_1 = tf.reshape(h[1], [n_params, n_params_D_bias_1])
#h[2] ist die Ableitung des Gradienten nach den Gewichten Layer 2
n_params_D_weights_2 = tf.reduce_prod(layer2.kernel.shape)
H_weights_2 = tf.reshape(h[2], [n_params, n_params_D_weights_2])
#h[3] ist die Ableitung des Gradienten nach den Biasen Layer 2
n_params_D_bias_2 = tf.reduce_prod(layer2.bias.shape)
H_bias_2 = tf.reshape(h[3], [n_params, n_params_D_bias_2])
# Hesse-Matrix zusammensetzen ToDo vorher allokieren
h_mat = tf.concat([H_weights_1, H_bias_1, H_weights_2, H_bias_2], axis = 1)
return h_mat
def matrix_flat_shape_to_trainable_shape(model, A):
layer1 = model.layers[0]
layer2 = model.layers[1]
n_params, m = A.shape
A_trainable = []
#A_trainable.append(tf.reshape())
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment