Commit a072f6a1 authored by Eva Lina Fesefeldt's avatar Eva Lina Fesefeldt
Browse files

Loss hessianlearn Vergleich

parent f1904284
......@@ -67,12 +67,70 @@ def CG_trainable(model, train_set, train_labels, loss_fn, b, x_0, m=None, tau=No
return x
def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x_0, abs_tol=1e-12, rel_tol=1e-09, m=None):
n = model.count_params()
if m == None:
m = n
x = x_0.copy()
weights_and_biases_list = model.get_weights()
Ax = _back_over_back_hvp(model, train_set, train_labels, x, loss_fn)
r = []
for i in range(len(weights_and_biases_list)):
r.append(b[i] - Ax[i])
p = r.copy()
rho_minus = reelles_skalarprodukt_trainable_shape(r,r)
# Tolerance bestimmen
r_tol2 = rho_minus * rel_tol * rel_tol
a_tol2 = abs_tol * abs_tol
tol = max(r_tol2, a_tol2)
for k in range(m):
v = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn)
alpha = (rho_minus / reelles_skalarprodukt_trainable_shape(p,v))
for i in range(len(weights_and_biases_list)):
r[i] = r[i] - alpha * v[i]
for i in range(len(weights_and_biases_list)):
x[i] = x[i] + alpha * p[i]
if norm_trainable_shape(r)**2 < tol:
break
rho = reelles_skalarprodukt_trainable_shape(r,r)
for i in range(len(weights_and_biases_list)):
p[i] = r[i] + rho/rho_minus * p[i]
# Auf negative Krümmung testen
Ap = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn)
pAp = reelles_skalarprodukt_trainable_shape(p,Ap)
if pAp <= 0:
break
rho_minus = rho
return x
# Erstelle Daten
train_set, train_labels = generate_tictactoe()
dataset = tf.data.Dataset.from_tensor_slices((train_set, train_labels))
# Modell erzeugen
size_hidden_layer = 30
size_hidden_layer = 1
number_of_epochs = 10000
number_of_parameters = 9*size_hidden_layer + size_hidden_layer + 3 * size_hidden_layer + 3
from keras.models import Sequential
......@@ -94,6 +152,10 @@ W_2 = np.load(filename_W2)
b_2 = np.load(filename_b2)
list_of_weights_and_biases = [W_1, b_1, W_2, b_2]
x_0 = []
for i in range(len(list_of_weights_and_biases)):
x_0.append(np.zeros_like(list_of_weights_and_biases[i]))
model.set_weights(list_of_weights_and_biases)
loss_fn = tf.keras.losses.MeanSquaredError()
......@@ -104,11 +166,13 @@ model.compile(optimizer='adam', loss=loss_fn)
#model.fit(train_set, train_labels, batch_size=32, epochs = 1000, verbose=0)
loss = loss_fn(model.predict(train_set), train_labels)
print("Loss Initialial: ", loss.numpy())
loss_numpy = np.zeros(number_of_epochs)
filename = "results/MSE_loss_NCG_backtracking_epochs" + str(number_of_epochs) + ".npy"
b = [0,0,0,0]
print("Gewichte und Biase: ", list_of_weights_and_biases)
print("Initialer Loss: ", loss)
for epoch in range(number_of_epochs):
......@@ -126,22 +190,16 @@ for epoch in range(number_of_epochs):
g = t1.gradient(loss, [layer1.kernel, layer1.bias, layer2.kernel, layer2.bias])
# rechte Seite b aufstellen
Hx = _back_over_back_hvp(model, train_set, train_labels, x, loss_fn)
minus_g = []
for i in range(len(g)):
b[i] = Hx[i] - g[i]
minus_g.append(-g[i])
# Approximativ für den Newton-Schritt lösen mittels CG
m = 200
x_new = CG_trainable(model, train_set, train_labels, loss_fn, b, x, m=m, tau=1e-3)
# Liniensuche
descent_dir = []
for i in range(len(list_of_weights_and_biases)):
descent_dir.append(x_new[i] -x[i])
descent_dir = CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, minus_g, x_0, abs_tol=1e-12, rel_tol=1e-09, m=m)
loss = loss_fn(model.predict(train_set), train_labels).numpy()
alpha = 1
......
......@@ -83,5 +83,5 @@ def generate_tictactoe():
X = np.zeros((N,n))
X = find_combinations(n,k).astype(float)
labels = tictactoe_labels(X)
return X, labels
return X.astype('float32'), labels.astype('float32')
......@@ -21,7 +21,7 @@ x_train, y_train = generate_tictactoe()
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# Modell erzeugen
size_hidden_layer = 30
size_hidden_layer = 1
number_of_epochs = 10000
number_of_parameters = 9*size_hidden_layer + size_hidden_layer + 3 * size_hidden_layer + 3
......@@ -53,15 +53,14 @@ data = Data({problem.x:x_train, problem.y_true:y_train}, batch_size=126, validat
HLModelSettings = HessianlearnModelSettings()
HLModelSettings['optimizer'] = 'incg'
HLModelSettings['fixed_step'] = True
HLModelSettings['max_sweeps'] = 10000
HLModelSettings['fixed_step'] = False
HLModelSettings['max_sweeps'] = 100
HLModelSettings['alpha'] = 5e-2
HLModelSettings['printing_sweep_frequency'] = 10
HLModel = HessianlearnModel(problem,regularization,data, settings=HLModelSettings)
HLModel.fit()
HLModel.fit(w_0 = list_of_weights_and_biases)
loss_fn = tf.keras.losses.MeanSquaredError()
loss_fn = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.SUM)
predictions = model.predict(x_train)
loss = loss_fn(y_train, predictions)
print()
\ No newline at end of file
sum = tf.math.reduce_sum(predictions - y_train.astype('float32')**2, axis=1)
......@@ -13,7 +13,7 @@ def dict_to_numpy(dictionary):
A = np.append(A, dictionary[i])
return A
infile = open('_logging/-2021-08-06-incg-dW=393-alpha=0.05.pkl','rb')
infile = open('_logging/-2021-08-19-incg-dW=393.pkl','rb')
meta = pickle.load(infile)
infile.close()
......
......@@ -8,8 +8,7 @@ import time
# Import hessianlearn repository
sys.path.append( os.environ.get('HESSIANLEARN_PATH', "../../"))
from hessianlearn import *
print("This is the main_INCG python file")
from generate_dataset import generate_tictactoe
settings = {}
settings['batch_size'] = 32
......@@ -17,6 +16,34 @@ settings['hess_batch_size'] = 8
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# Erstelle Daten
train_set, train_labels = generate_tictactoe()
dataset = tf.data.Dataset.from_tensor_slices((train_set, train_labels))
# Modell erzeugen
size_hidden_layer = 30
number_of_epochs = 10000
number_of_parameters = 9*size_hidden_layer + size_hidden_layer + 3 * size_hidden_layer + 3
from keras.models import Sequential
from keras.layers import Dense
from keras import backend as k
model = Sequential()
model.add(Dense(size_hidden_layer, input_dim = 9,activation='sigmoid'))
model.add(Dense(3, input_dim=size_hidden_layer, activation='sigmoid'))
# Gewichte und Biase initialisieren, um nachher Vergleichbarkeit zu haben
filename_W1 = "initializers/W_1_n" + str(size_hidden_layer) + ".npy"
filename_b1 = "initializers/b_1_n" + str(size_hidden_layer) + ".npy"
filename_W2 = "initializers/W_2_n" + str(size_hidden_layer) + ".npy"
filename_b2 = "initializers/b_2_n" + str(size_hidden_layer) + ".npy"
W_1 = np.load(filename_W1)
b_1 = np.load(filename_b1)
W_2 = np.load(filename_W2)
b_2 = np.load(filename_b2)
list_of_weights_and_biases = [W_1, b_1, W_2, b_2]
model.set_weights(list_of_weights_and_biases)
# Normalize the data
x_train = x_train.astype('float32') / 255.
......@@ -45,7 +72,7 @@ data = Data(train_data,settings['batch_size'],\
HLModelSettings = HessianlearnModelSettings()
HLModelSettings['optimizer'] = 'incg'
HLModelSettings['fixed_step'] = True
HLModelSettings['fixed_step'] = False
HLModelSettings['max_sweeps'] = 50
HLModelSettings['alpha'] = 5e-2
HLModelSettings['printing_sweep_frequency'] = 10
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment