Commit 8a83923e by Eva Lina Fesefeldt

### Regularisierung auf Minimalbeispiel

parent 2866fca5
 ... ... @@ -69,10 +69,11 @@ def CG_trainable(model, train_set, train_labels, loss_fn, b, x_0, m=None, tau=No return x # Recreate the hessianlearn CG method def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x_0, abs_tol=1e-12, rel_tol=1e-09, m=None): def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x_0, gamma, abs_tol=1e-12, rel_tol=1e-09, m=None): n = model.count_params() if m == None: m = n ... ... @@ -80,22 +81,38 @@ def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x x = x_0.copy() weights_and_biases_list = model.get_weights() for i in range(len(weights_and_biases_list)): b[i] = b[i] - 1 Ax = _back_over_back_hvp(model, train_set, train_labels, x, loss_fn) r = [] for i in range(len(weights_and_biases_list)): r.append(b[i] - Ax[i]) p = r.copy() rho_minus = reelles_skalarprodukt_trainable_shape(r,r) rho_minus = reelles_skalarprodukt_trainable_shape(r,r).astype('float32') # Tolerance # Tolerance bestimmen r_tol2 = rho_minus * rel_tol * rel_tol a_tol2 = abs_tol * abs_tol tol = max(r_tol2, a_tol2) Ap = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn) for i in range(len(weights_and_biases_list)): Ap[i] = Ap[i] + gamma*p[i] pAp = reelles_skalarprodukt_trainable_shape(p,Ap) if pAp <= 0: for i in range(len(weights_and_biases_list)): x[i] += p[i] return x for k in range(m): v = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn) for i in range(len(weights_and_biases_list)): v[i] = v[i] + gamma*p[i] alpha = (rho_minus / reelles_skalarprodukt_trainable_shape(p,v)) ... ... @@ -115,11 +132,15 @@ def CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, b, x for i in range(len(weights_and_biases_list)): p[i] = r[i] + rho/rho_minus * p[i] # Test for negative curvature # Auf negative Krümmung testen Ap = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn) for i in range(len(weights_and_biases_list)): Ap[i] = Ap[i] + gamma*p[i] pAp = reelles_skalarprodukt_trainable_shape(p,Ap) if pAp <= 0: for i in range(len(weights_and_biases_list)): x[i] += p[i] break rho_minus = rho ... ... @@ -132,8 +153,8 @@ train_set, train_labels = generate_tictactoe() dataset = tf.data.Dataset.from_tensor_slices((train_set, train_labels)) # Generate the model size_hidden_layer = 1 number_of_epochs = 10000 size_hidden_layer = 30 number_of_epochs = 100 number_of_parameters = 9*size_hidden_layer + size_hidden_layer + 3 * size_hidden_layer + 3 from keras.models import Sequential from keras.layers import Dense ... ... @@ -170,8 +191,10 @@ loss_numpy = np.zeros(number_of_epochs) filename = "results/MSE_loss_NCG_backtracking_epochs" + str(number_of_epochs) + ".npy" # Print the loss for the untrained model print("Gewichte und Biase: ", list_of_weights_and_biases) print("Initialer Loss: ", loss.numpy()) #print("Gewichte und Biase: ", list_of_weights_and_biases) #print("Initialer Loss: ", loss.numpy()) gamma=1e-4 # Training loop for epoch in range(number_of_epochs): ... ... @@ -198,7 +221,7 @@ for epoch in range(number_of_epochs): m = 200 descent_dir = [] descent_dir = CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, minus_g, x_0, abs_tol=1e-12, rel_tol=1e-09, m=m) descent_dir = CG_trainable_eisenstadt_walker(model, train_set, train_labels, loss_fn, minus_g, x_0, gamma, abs_tol=1e-12, rel_tol=1e-09, m=m) Hp = _back_over_back_hvp(model, train_set, train_labels, descent_dir, loss_fn) ... ...