Commit fef81ce9 authored by Eva Lina Fesefeldt's avatar Eva Lina Fesefeldt
Browse files

Konvergenz Schätzer und wirklich

parent dfcfbf1e
......@@ -5,6 +5,7 @@ from numpy.linalg import norm
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import math
from generate_dataset import generate_tictactoe
from helper import matrix_trainable_shape_to_flat_shape
......@@ -17,9 +18,8 @@ from matrixalgorithmen.lanczos import lanczos_sym
# Erstelle Daten
train_set, train_labels = generate_tictactoe()
norms = norm(train_labels, ord=1, axis=0)
size_hidden_layer = 30
size_hidden_layer = 100
number_of_parameters = 9*size_hidden_layer + size_hidden_layer + 3 * size_hidden_layer + 3
# KNN erzeugen
......@@ -52,7 +52,7 @@ dataset = tf.data.Dataset.from_tensor_slices((train_set, train_labels))
# Klassifizierer trainieren
loss_fn = tf.keras.losses.MeanSquaredError()
model.compile(optimizer='adam', loss=loss_fn)
#model.fit(train_set, train_labels, batch_size=32, epochs=10000, verbose=0)
model.fit(train_set, train_labels, batch_size=32, epochs=10000, verbose=0)
weights_and_bias = model.get_weights()
predictions = model.predict(train_set)
......@@ -77,6 +77,10 @@ h = t2.jacobian(grad, [layer1.kernel, layer1.bias, layer2.kernel, layer2.bias])
hessian_nested_tapes = matrix_trainable_shape_to_flat_shape(model, h)
#Symmetrie prüfen
antisymmetrischer_anteil = 1/2 * (hessian_nested_tapes - tf.transpose(hessian_nested_tapes))
print("Abweichung von Symmetrie: ", np.linalg.norm(antisymmetrischer_anteil.numpy(), 2))
r_numpy = np.reshape(np.random.rand(number_of_parameters), (number_of_parameters,))
#np.save("random_residuum", r_numpy)
#r_numpy = np.load("random_residuum.npy")
......@@ -88,8 +92,10 @@ m = 100
# Lanczos-Verfahren für KNN
from hessian_lanczos import lanczos_sym_knn
a1, b1 = lanczos_sym_knn(model, train_set, train_labels, r, loss_fn, m)
S = np.eye(m+1,m+1)
a1, b1, Q = lanczos_sym_knn(model, train_set, train_labels, r, loss_fn, S, m)
T = diags([b1, a1, b1], [-1, 0, 1], shape=(m+1, m), format='csr').toarray()
print("Genauigkeit der Hessenberg-Zerlegung:", np.linalg.norm(hessian_nested_tapes@Q[:,:m] - Q@T, ord=2))
eig_T = eigvalsh(T[:m,:m])
eig_T_axis = np.linspace(0,1,m)
......@@ -112,8 +118,9 @@ plt.close()
eig_T_k = np.zeros((m,m))
bound_lanczos = np.zeros((m,m))
berr_lanczos = np.zeros((m,m))
# Farbplots
# Schätzer für den Rückwärtsfehler
for k in range(m):
# Eigenwerte der Hessenberg-Matrix bestimmen
......@@ -122,24 +129,59 @@ for k in range(m):
# geschätzten Rückwärtsfehler bestimmen
bound_lanczos[:k+1,k] = abs(S_T_k[k,:]*T[k+1,k])
# Teil a) Plot
# genauen Rückwärtsfehler bestimmen
for j in range(k+1):
temp1 = hessian_nested_tapes.numpy()@Q[:,j]
temp2 = Q[:,j]*eig_T_k[j,k]
temp3 = np.linalg.norm(Q[:,j])
berr_lanczos[j,k] = np.linalg.norm(hessian_nested_tapes.numpy()@Q[:,j] - Q[:,j]*eig_T_k[j,k])/np.linalg.norm(Q[:,j])
# Schätzer Plot
plt.figure(figsize=(12,8))
plt.xlabel('Eigenwerte')
plt.ylabel('Schritt')
plt.ylim(0,m+1)
l_max = max(np.max(berr_lanczos), np.max(bound_lanczos))
l_min = l_max
for k in range(1,m):
for j in range(k+1):
l_min_candidate = min(bound_lanczos[:k+1,k].min()+1e-16, berr_lanczos[:k+1,k].min()+1e-16)
l_min = min(l_min, l_min_candidate)
cn = matplotlib.colors.LogNorm(l_min, l_max)
plt.plot(eig_A, (m+1)*np.ones(number_of_parameters), 'r+')
for k in range(1,m):
temp = bound_lanczos[:k+1,k]
l_min = bound_lanczos[:k+1,k].min()+1e-16
l_max = bound_lanczos[:k+1,k].max()
cn = matplotlib.colors.LogNorm(l_min, l_max)
#temp = bound_lanczos[:k+1,k]
#l_min = bound_lanczos[:k+1,k].min()+1e-16
#l_max = bound_lanczos[:k+1,k].max()
#cn = matplotlib.colors.LogNorm(l_min, l_max)
c = plt.cm.cool(cn(bound_lanczos[:k+1,k]))
for j in range(k+1):
colour = c[j]
plot = plt.semilogx(eig_T_k[j,k], k+1, '.', c=c[j], fillstyle='none')
sm = plt.cm.ScalarMappable(cmap=plt.cm.cool, norm=cn)
plt.colorbar(sm)
plt.title("Lanczos-Ritz Konvergenz Schätzer bei MSE Loss = " + str(loss_fn(train_labels, predictions).numpy()))
plt.show()
# Wirklicher Rückwärtsfehler
plt.figure(figsize=(12,8))
plt.xlabel('Eigenwerte')
plt.ylabel('Schritt')
plt.ylim(0,m+1)
plt.plot(eig_A, (m+1)*np.ones(number_of_parameters), 'r+')
for k in range(1,m):
#cn = matplotlib.colors.LogNorm(berr_lanczos[:k+1,k].min()+1e-16, berr_lanczos[:k+1,k].max())
c = plt.cm.cool(cn(berr_lanczos[:k+1,k]))
for j in range(k+1):
plot = plt.semilogx(eig_T_k[j,k], k+1, '.', c=c[j], fillstyle='none')
sm = plt.cm.ScalarMappable(cmap=plt.cm.cool, norm=cn)
plt.colorbar(sm)
plt.title("Lanczos-Ritz Konvergenz Schätzer")
plt.title('Lanczos-Ritz-Verfahren: Rückwärtsfehler bei MSE Loss = ' + str(loss_fn(train_labels, predictions).numpy()))
plt.show()
plt.close()
\ No newline at end of file
......@@ -5,6 +5,7 @@ from jvp import _back_over_forward_hvp, _back_over_back_hvp
from math import sqrt
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from helper import *
# trainable shape:= Liste von Gewichten und Biasen, wird verwendet um kompatibel mit AD zu sein
# flat shape:= Vektor mit allen Gewichten und Biasen, wird für plots verwendet
......@@ -17,7 +18,7 @@ def reeles_skalarprodukt_trainable_shape(v_1, v_2):
sum += np.sum(v_1[i]*v_2[i])
return sum
def lanczos_sym_knn(model, train_set, train_labels, r, loss_fn, m=None):
def lanczos_sym_knn(model, train_set, train_labels, r, loss_fn, S= None, m=None):
n = model.count_params()
weights_and_biases_list = model.get_weights()
......@@ -29,6 +30,13 @@ def lanczos_sym_knn(model, train_set, train_labels, r, loss_fn, m=None):
if m is None:
m = n
if S is None:
S = np.zeros(m+1,0)
k,l = S.shape
if k != m+1:
print("S hat falsche Dimensionen")
# Zeile 1
alpha = np.zeros(m)
......@@ -37,66 +45,69 @@ def lanczos_sym_knn(model, train_set, train_labels, r, loss_fn, m=None):
# Zeile 3
for i in range(len(weights_and_biases_list)):
p.append(r[i]/norm_r) #passt
p.append(r[i]/norm_r)
norm_p = sqrt(reeles_skalarprodukt_trainable_shape(p,p))
# Zeile 4
p_flat = vector_trainable_shape_to_flat_shape(p)
Y = p_flat.reshape((n,1)) * S[0,:].reshape((1,l))
# Zeile 5
Ap = _back_over_forward_hvp(model, train_set, train_labels, p, loss_fn)
r = Ap
norm_r = sqrt(reeles_skalarprodukt_trainable_shape(r,r))
# Zeile 5
# Zeile 6
alpha[0] = reeles_skalarprodukt_trainable_shape(r,p)
# Zeile 6
# Zeile 7
for i in range(len(weights_and_biases_list)):
r[i] = r[i] - p[i]*alpha[0]
# Zeile 7
# Zeile 8
beta[0] = sqrt(reeles_skalarprodukt_trainable_shape(r,r))
# Zeile 8
# Zeile 9
for i in range(len(weights_and_biases_list)):
q.append(r[i]/beta[0])
norm_q = sqrt(reeles_skalarprodukt_trainable_shape(q,q))
# Zeile 10
q_flat = vector_trainable_shape_to_flat_shape(q)
Y = Y + q_flat.reshape((n,1)) * S[1,:].reshape((1,l))
# Zeile 9
# Zeile 11
for k in range(1,m):
Aq = _back_over_back_hvp(model, train_set, train_labels, q, loss_fn)
norm_Aq = sqrt(reeles_skalarprodukt_trainable_shape(Aq,Aq))
# Zeile 10
# Zeile 12
for i in range(len(weights_and_biases_list)):
r[i] = Aq[i] - p[i]*beta[k-1]
norm_r = sqrt(reeles_skalarprodukt_trainable_shape(r,r))
# Zeile 11
# Zeile 13
alpha[k] = reeles_skalarprodukt_trainable_shape(r,q)
# Zeile 12
# Zeile 14
for i in range(len(weights_and_biases_list)):
r[i] = r[i] - q[i]*alpha[k]
# Zeile 13
# Zeile 15
beta[k] = sqrt(reeles_skalarprodukt_trainable_shape(r,r))
# Zeile 14 Listen-Zuweisungen funktionieren so nicht
# Zeile 16
temp = p
p = q
q = temp
norm_p = sqrt(reeles_skalarprodukt_trainable_shape(p,p))
# Zeile 15
# Zeile 17
for i in range(len(weights_and_biases_list)):
q[i] = r[i]/beta[k]
norm_q = sqrt(reeles_skalarprodukt_trainable_shape(q,q))
return alpha, beta
# Zeile 18
q_flat = vector_trainable_shape_to_flat_shape(q)
Y = Y + q_flat.reshape((n,1)) * S[k+1,:].reshape((1,l))
return alpha, beta, Y
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment