Commit 8a83923e authored by Eva Lina Fesefeldt's avatar Eva Lina Fesefeldt
Browse files

Regularisierung auf Minimalbeispiel

parent 2866fca5
......@@ -69,10 +69,11 @@ def CG_trainable(model, train_set, train_labels, loss_fn, b, x_0, m=None, tau=No
return x
# Recreate the hessianlearn CG method
def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x_0, abs_tol=1e-12, rel_tol=1e-09, m=None):
def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x_0, gamma, abs_tol=1e-12, rel_tol=1e-09, m=None):
n = model.count_params()
if m == None:
m = n
......@@ -80,22 +81,38 @@ def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x
x = x_0.copy()
weights_and_biases_list = model.get_weights()
for i in range(len(weights_and_biases_list)):
b[i] = b[i] - 1
Ax = _back_over_back_hvp(model, train_set, train_labels, x, loss_fn)
r = []
for i in range(len(weights_and_biases_list)):
r.append(b[i] - Ax[i])
p = r.copy()
rho_minus = reelles_skalarprodukt_trainable_shape(r,r)
rho_minus = reelles_skalarprodukt_trainable_shape(r,r).astype('float32')
# Tolerance
# Tolerance bestimmen
r_tol2 = rho_minus * rel_tol * rel_tol
a_tol2 = abs_tol * abs_tol
tol = max(r_tol2, a_tol2)
Ap = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn)
for i in range(len(weights_and_biases_list)):
Ap[i] = Ap[i] + gamma*p[i]
pAp = reelles_skalarprodukt_trainable_shape(p,Ap)
if pAp <= 0:
for i in range(len(weights_and_biases_list)):
x[i] += p[i]
return x
for k in range(m):
v = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn)
for i in range(len(weights_and_biases_list)):
v[i] = v[i] + gamma*p[i]
alpha = (rho_minus / reelles_skalarprodukt_trainable_shape(p,v))
......@@ -115,11 +132,15 @@ def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x
for i in range(len(weights_and_biases_list)):
p[i] = r[i] + rho/rho_minus * p[i]
# Test for negative curvature
# Auf negative Krümmung testen
Ap = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn)
for i in range(len(weights_and_biases_list)):
Ap[i] = Ap[i] + gamma*p[i]
pAp = reelles_skalarprodukt_trainable_shape(p,Ap)
if pAp <= 0:
for i in range(len(weights_and_biases_list)):
x[i] += p[i]
break
rho_minus = rho
......@@ -132,8 +153,8 @@ train_set, train_labels = generate_tictactoe()
dataset = tf.data.Dataset.from_tensor_slices((train_set, train_labels))
# Generate the model
size_hidden_layer = 1
number_of_epochs = 10000
size_hidden_layer = 30
number_of_epochs = 100
number_of_parameters = 9*size_hidden_layer + size_hidden_layer + 3 * size_hidden_layer + 3
from keras.models import Sequential
from keras.layers import Dense
......@@ -170,8 +191,10 @@ loss_numpy = np.zeros(number_of_epochs)
filename = "results/MSE_loss_NCG_backtracking_epochs" + str(number_of_epochs) + ".npy"
# Print the loss for the untrained model
print("Gewichte und Biase: ", list_of_weights_and_biases)
print("Initialer Loss: ", loss.numpy())
#print("Gewichte und Biase: ", list_of_weights_and_biases)
#print("Initialer Loss: ", loss.numpy())
gamma=1e-4
# Training loop
for epoch in range(number_of_epochs):
......@@ -198,7 +221,7 @@ for epoch in range(number_of_epochs):
m = 200
descent_dir = []
descent_dir = CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, minus_g, x_0, abs_tol=1e-12, rel_tol=1e-09, m=m)
descent_dir = CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, minus_g, x_0, gamma, abs_tol=1e-12, rel_tol=1e-09, m=m)
Hp = _back_over_back_hvp(model, train_set, train_labels, descent_dir, loss_fn)
......
......@@ -4,7 +4,7 @@ import numpy as np
def reelles_skalarprodukt_trainable_shape(v_1, v_2):
sum = 0
sum = np.array([0]).astype('float32')
for i in range(len(v_1)):
sum += np.sum(v_1[i]*v_2[i])
return sum
......
......@@ -76,7 +76,7 @@ def CG_trainable(model, train_set, train_labels, loss_fn, b, x_0, m=None, tau=No
def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x_0, abs_tol=1e-12, rel_tol=1e-09, m=None):
def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x_0, gamma, abs_tol=1e-12, rel_tol=1e-09, m=None):
n = model.count_params()
......@@ -86,6 +86,7 @@ def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x
x = x_0.copy()
weights_and_biases_list = model.get_weights()
Ax = _back_over_back_hvp(model, train_set, train_labels, x, loss_fn)
r = []
for i in range(len(weights_and_biases_list)):
......@@ -101,7 +102,12 @@ def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x
tol = max(r_tol2, a_tol2)
Ap = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn)
for i in range(len(weights_and_biases_list)):
Ap[i] = Ap[i] + gamma*p[i]
pAp = reelles_skalarprodukt_trainable_shape(p,Ap)
# Typen für die Iterierten erzwingen
alpha = np.array([0]).astype('float32')
if pAp <= 0:
for i in range(len(weights_and_biases_list)):
......@@ -110,6 +116,8 @@ def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x
for k in range(m):
v = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn)
for i in range(len(weights_and_biases_list)):
v[i] = v[i] + gamma*p[i]
alpha = (rho_minus / reelles_skalarprodukt_trainable_shape(p,v))
......@@ -131,6 +139,8 @@ def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x
# Auf negative Krümmung testen
Ap = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn)
for i in range(len(weights_and_biases_list)):
Ap[i] = Ap[i] + gamma*p[i]
pAp = reelles_skalarprodukt_trainable_shape(p,Ap)
if pAp <= 0:
......@@ -147,13 +157,14 @@ def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x
number_of_epochs = 100
train_set = 2*np.ones((1,2)).astype('float32')
train_labels = np.ones((2,1)).astype('float32')
train_labels = np.ones((1,1)).astype('float32')
model = Sequential()
model.add(Dense(2,input_dim=2,activation='linear'))
list_of_weights_and_biases = [np.ones((2,2)), np.ones(2)]
model.add(Dense(1,input_dim=2,activation='linear'))
#list_of_weights_and_biases = [np.ones((2,1)), np.ones(1)]
list_of_weights_and_biases = [np.array([[1], [2]], dtype='float32'), np.array([3], dtype='float32')]
#list_of_weights_and_biases = [np.reshape(np.array([0.21052623, 0.21052623]), (2,1)), np.array([0.10526317])]
x_0 = []
for i in range(len(list_of_weights_and_biases)):
......@@ -171,6 +182,8 @@ loss = loss_fn(model.predict(train_set), train_labels)
print("Gewichte und Biase: ", list_of_weights_and_biases)
print("Initialer Loss: ", loss.numpy())
gamma = np.array([0.5]).astype('float32')
for epoch in range(number_of_epochs):
x = model.get_weights()
......@@ -180,27 +193,36 @@ for epoch in range(number_of_epochs):
#layer2 = model.layers[1]
watch = train_set
with tf.GradientTape() as t1:
watch = layer1(watch)
#watch = layer2(watch)
loss = loss_fn(train_labels, watch)
# Hesse-Matrix ausrechnen
with tf.GradientTape() as t2:
with tf.GradientTape() as t1:
watch = layer1(watch)
#watch = layer2(watch)
loss = loss_fn(train_labels, watch)
#g = t1.gradient(loss, [layer1.kernel, layer1.bias, layer2.kernel, layer2.bias])
g = t1.gradient(loss, [layer1.kernel, layer1.bias])
gradient_flat = tf.concat([tf.reshape(g[0], [2,1]), tf.reshape(g[1], [1,1])], axis=0)
#hessian = t2.jacobian(gradient_flat, [layer1.kernel, layer1.bias, layer2.kernel, layer2.bias])
hessian = t2.jacobian(gradient_flat, [layer1.kernel, layer1.bias])
#g = t1.gradient(loss, [layer1.kernel, layer1.bias, layer2.kernel, layer2.bias])
g = t1.gradient(loss, [layer1.kernel, layer1.bias])
if(reelles_skalarprodukt_trainable_shape(g,g) < 1e-24):
print("Norm von g ist zu klein")
break
minus_g = []
b = []
for i in range(len(g)):
minus_g.append(-g[i])
b.append(-(gamma*x[i] + g[i]))
# Approximativ für den Newton-Schritt lösen mittels CG
m = 200
descent_dir = []
descent_dir = CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, minus_g, x_0, abs_tol=1e-12, rel_tol=1e-09, m=m)
descent_dir = CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x_0, gamma, abs_tol=1e-12, rel_tol=1e-09, m=m)
#print(descent_dir)
......@@ -210,9 +232,10 @@ for epoch in range(number_of_epochs):
alpha = 1
rho = 0.5
c = 1e-4
w_dir_inner_g = -reelles_skalarprodukt_trainable_shape(b,descent_dir)
test_step = []
cond = max(loss + c*alpha*reelles_skalarprodukt_trainable_shape(g,descent_dir), 1e-24)
iter_max = 100
cond = max(loss + c*alpha*w_dir_inner_g, 1e-24)
iter_max = 10
for k in range(iter_max):
test_step = []
for i in range(len(list_of_weights_and_biases)):
......@@ -223,10 +246,6 @@ for epoch in range(number_of_epochs):
break
alpha = rho*alpha
if(k==iter_max-1):
print("Line search terminated without result")
break
#print(test_step)
loss = loss_fn(model.predict(train_set), train_labels)
......
......@@ -16,17 +16,17 @@ from generate_dataset import generate_tictactoe
x_train = np.reshape(np.array([2, 2]).astype('float32'), (1,2))
y_train = np.reshape(np.array([1, 1]).astype('float32'), (1,2))
y_train = np.reshape(np.array([1]).astype('float32'), (1,1))
model = Sequential()
model.add(Dense(2,input_dim=2,activation='linear'))
list_of_weights_and_biases = [np.reshape(np.array([1, 1, 1, 1]), (2,2)), np.array([1, 1])]
model.add(Dense(1,input_dim=2,activation='linear'))
#list_of_weights_and_biases = [np.reshape(np.array([0.21052623, 0.21052623]), (2,1)), np.array([0.10526317])]
list_of_weights_and_biases = [np.reshape(np.array([1, 2]), (2,1)), np.array([3])]
problem = ClassificationProblem(model, loss_type='least_squares', dtype=tf.float32)
regularization = L2Regularization(problem, gamma = 0)
regularization = L2Regularization(problem, gamma = 0.5)
# Instante the data object
train_data = {problem.x:x_train}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment