Commit 1f2a670e authored by Eva Lina Fesefeldt's avatar Eva Lina Fesefeldt
Browse files

Überprüfung Ableitung gefixt, Lanczos angefangen

parent f501091c
......@@ -8,7 +8,6 @@ import matplotlib.pyplot as plt
import numpy as np
from generate_dataset import generate_tictactoe
from generate_model import dense_one_hidden_layer
# Aktivierungsfunktionen und Ableitungen
......@@ -38,11 +37,11 @@ def d_W_1_d_W_1(k, i, train_set, z_1, theta_11, theta_12, theta_13, theta_14, th
ddthetak_ddthetai_f_x_j = np.zeros((126,3)) # Zeilen: Datenpunkte x_j, Spalten: drei Komponenten für den lokalen Loss
x_i = np.reshape(train_set[:,i], (126,1))
x_k = np.reshape(train_set[:,k], (126,1))
temp = theta_11*x_i*(x_k*ddx2_sigma(z_1)*ddx_tau(sigma(z_1)*theta_11+theta_14)+ddx_sigma(z_1)*theta_11*x_k*ddx2_tau(sigma(z_1)*theta_11+theta_14))
temp = theta_11*x_i*(x_k*ddx2_sigma(z_1)*ddx_tau(sigma(z_1)*theta_11+theta_14)+(ddx_sigma(z_1))**2*theta_11*x_k*ddx2_tau(sigma(z_1)*theta_11+theta_14))
ddthetak_ddthetai_f_x_j[:,0] = np.reshape(temp, (126,))
temp = theta_12*x_i*(x_k*ddx2_sigma(z_1)*ddx_tau(sigma(z_1)*theta_12+theta_15)+ddx_sigma(z_1)*theta_12*x_k*ddx2_tau(sigma(z_1)*theta_12+theta_15))
temp = theta_12*x_i*(x_k*ddx2_sigma(z_1)*ddx_tau(sigma(z_1)*theta_12+theta_15)+(ddx_sigma(z_1))**2*theta_12*x_k*ddx2_tau(sigma(z_1)*theta_12+theta_15))
ddthetak_ddthetai_f_x_j[:,1] = np.reshape(temp, (126,))
temp = theta_13*x_i*(x_k*ddx2_sigma(z_1)*ddx_tau(sigma(z_1)*theta_13+theta_16)+ddx_sigma(z_1)*theta_13*x_k*ddx2_tau(sigma(z_1)*theta_13+theta_16) )
temp = theta_13*x_i*(x_k*ddx2_sigma(z_1)*ddx_tau(sigma(z_1)*theta_13+theta_16)+ddx_sigma(z_1)**2*theta_13*x_k*ddx2_tau(sigma(z_1)*theta_13+theta_16) )
ddthetak_ddthetai_f_x_j[:,2] = np.reshape(temp, (126,))
return ddthetak_ddthetai_f_x_j
......@@ -572,9 +571,10 @@ if __name__ == "__main__":
from keras.models import Sequential
from keras.layers import Dense
tf.keras.backend.set_floatx('float64')
model = Sequential()
model.add(Dense(size_hidden_layer, input_dim = 9,activation='sigmoid', dtype='float32'))
model.add(Dense(3, input_dim=size_hidden_layer, activation='sigmoid', dtype='float32'))
model.add(Dense(size_hidden_layer, input_dim = 9,activation='sigmoid'))
model.add(Dense(3, input_dim=size_hidden_layer, activation='sigmoid'))
weights_and_biases_list = model.get_weights()
W_1 = weights_and_biases_list[0]
b_1 = weights_and_biases_list[1]
......@@ -627,4 +627,10 @@ if __name__ == "__main__":
gradient_hand, hessian_hand = grad_and_hesse_matrix(model, train_set, train_labels)
imshow_zero_center(hessian_hand - h_mat_keras.numpy(), "")
\ No newline at end of file
print("Eintrag (4,4):", hessian_hand[3,3], h_mat_keras.numpy()[3,3])
print("Differenz: ", hessian_hand[3,3] - h_mat_keras.numpy()[3,3])
print("Relativer Fehler: ", (hessian_hand[3,3] - h_mat_keras.numpy()[3,3])/hessian_hand[3,3])
imshow_zero_center(hessian_hand - h_mat_keras.numpy(), "Absoluter Fehler Hesse-Matrix")
imshow_zero_center((hessian_hand - h_mat_keras.numpy())/h_mat_keras.numpy(), "Relativer Fehler Hesse-Matrix")
\ No newline at end of file
This diff is collapsed.
import numpy as np
from numpy.linalg import norm
import tensorflow as tf
from generate_dataset import generate_tictactoe
from plots import *
from analytical_derivative_n1 import grad_and_hesse_matrix
from helper import matrix_trainable_shape_to_flat_shape
from jvp import _back_over_forward_hvp, _tf_gradients_forward_over_back_hvp, _back_over_back_hvp
from helper import vector_flat_shape_to_trainable_shape
from helper import vector_trainable_shape_to_flat_shape
# Erstelle Daten
train_set, train_labels = generate_tictactoe()
norms = norm(train_labels, ord=1, axis=0)
size_hidden_layer = 1
number_of_parameters = 9*size_hidden_layer + size_hidden_layer + 3 * size_hidden_layer + 3
# KNN erzeugen
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(size_hidden_layer, input_dim = 9,activation='sigmoid'))
model.add(Dense(3, input_dim=size_hidden_layer, activation='sigmoid'))
model.summary()
dataset = tf.data.Dataset.from_tensor_slices((train_set, train_labels))
# Klassifizierer trainieren
loss_fn = tf.keras.losses.MeanSquaredError()
model.compile(optimizer='adam', loss=loss_fn)
#model.fit(train_set, train_labels, batch_size=32, epochs=10000, verbose=0)
weights_and_bias = model.get_weights()
predictions = model.predict(train_set)
print("Loss: ", loss_fn(train_labels, predictions).numpy())
# Hesse-Matrix berechnen mit nested Gradient Tapes
layer1 = model.layers[0]
layer2 = model.layers[1]
x = train_set
with tf.GradientTape() as t2:
with tf.GradientTape() as t1:
x = layer1(x)
x = layer2(x)
loss = loss_fn(train_labels, x)
g = t1.gradient(loss, [layer1.kernel, layer1.bias, layer2.kernel, layer2.bias])
grad = tf.concat([tf.reshape(g[0], [9*size_hidden_layer,1]), tf.reshape(g[1], [size_hidden_layer,1]), tf.reshape(g[2], [size_hidden_layer*3, 1]), tf.reshape(g[3], [3,1])], axis=0)
h = t2.jacobian(grad, [layer1.kernel, layer1.bias, layer2.kernel, layer2.bias])
hessian_nested_tapes = matrix_trainable_shape_to_flat_shape(model, h)
# Gradient und Hesse-Matrix als Referenz berechnen
gradient_hand, hessian_hand = grad_and_hesse_matrix(model, train_set, train_labels)
unit_vector = tf.eye(number_of_parameters, 1)
unit_vector_trainable_shape = vector_flat_shape_to_trainable_shape(unit_vector)
Hv_BOF = _back_over_forward_hvp(model, train_set, train_labels, unit_vector_trainable_shape, loss_fn)
#Hv_FOB = _tf_gradients_forward_over_back_hvp(model, train_set, train_labels, unit_vector_trainable_shape, loss_fn)
Hv_BOB = _back_over_back_hvp(model, train_set, train_labels, unit_vector_trainable_shape, loss_fn)
Hv_BOF_vec = vector_trainable_shape_to_flat_shape(Hv_BOF)
Hv_BOB_vec = vector_trainable_shape_to_flat_shape(Hv_BOB)
Hv = np.reshape(hessian_hand@unit_vector, (16,))
Hv_nested_grad = np.reshape(hessian_nested_tapes@unit_vector, (16,))
vecshow_zero_center(np.reshape(Hv_nested_grad-Hv, (16,1)), "Differenz Nested Gradient zu Hv analytisch")
vecshow_zero_center(np.reshape(Hv_BOF_vec-Hv, (16,1)), "Differenz BOF zu Hv analytisch")
vecshow_zero_center(np.reshape(Hv_BOB_vec-Hv, (16,1)), "Differenz BOB zu Hv analytisch")
\ No newline at end of file
% This file was created by tikzplotlib v0.9.8.
\begin{tikzpicture}
\begin{groupplot}[group style={group size=3 by 3}]
\nextgroupplot[
tick align=outside,
tick pos=left,
title={unentschieden},
x grid style={white!69.0196078431373!black},
xmin=-0.5, xmax=2.5,
xtick style={color=black},
y dir=reverse,
y grid style={white!69.0196078431373!black},
ymin=-0.5, ymax=2.5,
ytick style={color=black}
]
\addplot graphics [includegraphics cmd=\pgfimage,xmin=-0.5, xmax=2.5, ymin=2.5, ymax=-0.5] {data-000.png};
\nextgroupplot[
tick align=outside,
tick pos=left,
title={unentschieden},
x grid style={white!69.0196078431373!black},
xmin=-0.5, xmax=2.5,
xtick style={color=black},
y dir=reverse,
y grid style={white!69.0196078431373!black},
ymin=-0.5, ymax=2.5,
ytick style={color=black}
]
\addplot graphics [includegraphics cmd=\pgfimage,xmin=-0.5, xmax=2.5, ymin=2.5, ymax=-0.5] {data-001.png};
\nextgroupplot[
tick align=outside,
tick pos=left,
title={unentschieden},
x grid style={white!69.0196078431373!black},
xmin=-0.5, xmax=2.5,
xtick style={color=black},
y dir=reverse,
y grid style={white!69.0196078431373!black},
ymin=-0.5, ymax=2.5,
ytick style={color=black}
]
\addplot graphics [includegraphics cmd=\pgfimage,xmin=-0.5, xmax=2.5, ymin=2.5, ymax=-0.5] {data-002.png};
\nextgroupplot[
tick align=outside,
tick pos=left,
title={unentschieden},
x grid style={white!69.0196078431373!black},
xmin=-0.5, xmax=2.5,
xtick style={color=black},
y dir=reverse,
y grid style={white!69.0196078431373!black},
ymin=-0.5, ymax=2.5,
ytick style={color=black}
]
\addplot graphics [includegraphics cmd=\pgfimage,xmin=-0.5, xmax=2.5, ymin=2.5, ymax=-0.5] {data-003.png};
\nextgroupplot[
tick align=outside,
tick pos=left,
title={unentschieden},
x grid style={white!69.0196078431373!black},
xmin=-0.5, xmax=2.5,
xtick style={color=black},
y dir=reverse,
y grid style={white!69.0196078431373!black},
ymin=-0.5, ymax=2.5,
ytick style={color=black}
]
\addplot graphics [includegraphics cmd=\pgfimage,xmin=-0.5, xmax=2.5, ymin=2.5, ymax=-0.5] {data-004.png};
\nextgroupplot[
tick align=outside,
tick pos=left,
title={unentschieden},
x grid style={white!69.0196078431373!black},
xmin=-0.5, xmax=2.5,
xtick style={color=black},
y dir=reverse,
y grid style={white!69.0196078431373!black},
ymin=-0.5, ymax=2.5,
ytick style={color=black}
]
\addplot graphics [includegraphics cmd=\pgfimage,xmin=-0.5, xmax=2.5, ymin=2.5, ymax=-0.5] {data-005.png};
\nextgroupplot[
tick align=outside,
tick pos=left,
title={unentschieden},
x grid style={white!69.0196078431373!black},
xmin=-0.5, xmax=2.5,
xtick style={color=black},
y dir=reverse,
y grid style={white!69.0196078431373!black},
ymin=-0.5, ymax=2.5,
ytick style={color=black}
]
\addplot graphics [includegraphics cmd=\pgfimage,xmin=-0.5, xmax=2.5, ymin=2.5, ymax=-0.5] {data-006.png};
\nextgroupplot[
tick align=outside,
tick pos=left,
title={unentschieden},
x grid style={white!69.0196078431373!black},
xmin=-0.5, xmax=2.5,
xtick style={color=black},
y dir=reverse,
y grid style={white!69.0196078431373!black},
ymin=-0.5, ymax=2.5,
ytick style={color=black}
]
\addplot graphics [includegraphics cmd=\pgfimage,xmin=-0.5, xmax=2.5, ymin=2.5, ymax=-0.5] {data-007.png};
\nextgroupplot[
tick align=outside,
tick pos=left,
title={unentschieden},
x grid style={white!69.0196078431373!black},
xmin=-0.5, xmax=2.5,
xtick style={color=black},
y dir=reverse,
y grid style={white!69.0196078431373!black},
ymin=-0.5, ymax=2.5,
ytick style={color=black}
]
\addplot graphics [includegraphics cmd=\pgfimage,xmin=-0.5, xmax=2.5, ymin=2.5, ymax=-0.5] {data-008.png};
\end{groupplot}
\end{tikzpicture}
% This file was created by tikzplotlib v0.9.8.
\begin{tikzpicture}
\begin{axis}[
log basis x={10},
log basis y={10},
tick align=outside,
tick pos=left,
title={Eigenwerte der Hesse-Matrix für n = 1},
x grid style={white!69.0196078431373!black},
xlabel={Eigenwertindex},
xmin=1.75155167408259e-10, xmax=49.7387093403197,
xmode=log,
xtick style={color=black},
y grid style={white!69.0196078431373!black},
ylabel={Betrag des Eigenwertes},
ymin=2.31684888003225e-10, ymax=0.139870040879275,
ymode=log,
ytick style={color=black}
]
\addplot [semithick, red, opacity=0.3, mark=*, mark size=3, mark options={solid}, only marks]
table {%
-0.039544727653265 0
-0.00246070558205247 0
-0.000682254030834883 0
-0.000563587760552764 0
-0.000435220310464501 0
-0.000432378728874028 0
-0.000344131112797186 0
-0.000283828965621069 0
-0.000103800775832497 0
5.80799464078297e-10 0
0.00124311831314117 0
0.00317561579868197 0
0.013360233977437 0
0.0382034070789814 0
0.0544451847672462 0
0.0557951182126999 0
};
\addplot [semithick, blue, mark=*, mark size=3, mark options={solid}, only marks]
table {%
0 0.039544727653265
1 0.00246070558205247
2 0.000682254030834883
3 0.000563587760552764
4 0.000435220310464501
5 0.000432378728874028
6 0.000344131112797186
7 0.000283828965621069
8 0.000103800775832497
9 5.80799464078297e-10
10 0.00124311831314117
11 0.00317561579868197
12 0.013360233977437
13 0.0382034070789814
14 0.0544451847672462
15 0.0557951182126999
};
\end{axis}
\end{tikzpicture}
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
def dense_one_hidden_layer(size_hidden_layer):
# KNN erzeugen und Gewichte und Biase zuweisen
W_1 = np.reshape(np.array([1,2,3,4,5,6,7,8,9]), (9,1))
b_1 = np.reshape(np.array([10]), (1,))
W_2 = np.reshape(np.array([11,12,13]), (1,3))
b_2 = np.reshape(np.array([14,15,16]), (3,))
weights_and_biases_list = [W_1,b_1,W_2,b_2]
model = Sequential()
model.add(Dense(size_hidden_layer, input_dim = 9,activation='sigmoid'))
model.add(Dense(3, input_dim=size_hidden_layer, activation='softmax'))
model.set_weights(weights_and_biases_list)
return weights_and_biases_list, model
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment