Commit 32e91fbf authored by Eva Lina Fesefeldt's avatar Eva Lina Fesefeldt
Browse files

Vergleich implizit vs explizit CG korrigiert

parent 2e66cd93
......@@ -4,6 +4,7 @@ from tensorflow import keras
from tensorflow.python.framework.dtypes import as_dtype
from tensorflow.python.ops.gen_math_ops import xlog1py_eager_fallback
import matplotlib.pyplot as plt
from math import sqrt
from generate_dataset import generate_tictactoe
from jvp import _back_over_back_hvp
......@@ -11,20 +12,29 @@ from helper import *
from tensorflow.python.eager import forwardprop
from helper import reelles_skalarprodukt_trainable_shape
def norm_trainable_shape(x):
return sqrt(reelles_skalarprodukt_trainable_shape(x,x))
def CG_trainable(model, train_set, train_labels, loss_fn, b, x_0, m=None, tau=None):
n = model.count_params()
if m == None:
m = n
if tau == None:
tau = 10e-8
tol = tau*norm_trainable_shape(b)
P = np.zeros((n,m))
R = np.zeros((n,m))
resvec = np.zeros(m+1)
trueres = np.zeros(m+1)
norm_x = np.zeros(m+1)
norm_r = np.zeros(m+1)
norm_p = np.zeros(m+1)
norm_v = np.zeros(m+1)
x = x_0.copy()
weights_and_biases_list = model.get_weights()
......@@ -40,26 +50,39 @@ def CG_trainable(model, train_set, train_labels, loss_fn, b, x_0, m=None, tau=No
resvec[0] = rho_minus
trueres[0] = rho_minus
# Initiale Werte abspeichern
norm_x[0] = norm_trainable_shape(x)
norm_r[0] = norm_trainable_shape(r)
norm_p[0] = norm_trainable_shape(p)
for k in range(m):
v = _back_over_back_hvp(model, train_set, train_labels, p, loss_fn)
norm_v[k+1] = norm_trainable_shape(v)
alpha = (rho_minus / reelles_skalarprodukt_trainable_shape(p,v))
for i in range(len(weights_and_biases_list)):
r[i] = r[i] - alpha * v[i]
norm_r[k+1] = norm_trainable_shape(r)
for i in range(len(weights_and_biases_list)):
x[i] = x[i] + alpha * p[i]
norm_x[k+1] = norm_trainable_shape(x)
if reelles_skalarprodukt_trainable_shape(r,r) < tau*reelles_skalarprodukt_trainable_shape(b,b):
if norm_trainable_shape(r) < tol:
resvec[k+1] = rho
print("Beende CG nach ", k, " Schritten")
break
rho = reelles_skalarprodukt_trainable_shape(r,r)
for i in range(len(weights_and_biases_list)):
p[i] = r[i] + rho/rho_minus * p[i]
norm_p[k+1] = norm_trainable_shape(p)
rho_minus = rho
# P,R, TrueRes abspeichern
P[:,k] = vector_trainable_shape_to_flat_shape(p)
R[:,k] = vector_trainable_shape_to_flat_shape(r)
resvec[k+1] = rho
......@@ -69,7 +92,7 @@ def CG_trainable(model, train_set, train_labels, loss_fn, b, x_0, m=None, tau=No
temp.append(b[i] - Hx[i])
trueres[k+1] = reelles_skalarprodukt_trainable_shape(temp, temp)
return x, P, R, resvec, trueres
return x, P, R, resvec, trueres, norm_p, norm_x, norm_r, norm_v
# Erstelle Daten
train_set, train_labels = generate_tictactoe()
......@@ -133,10 +156,12 @@ for epoch in range(1000):
for i in range(len(g)):
b[i] = Hx[i] - g[i]
print("Norm b=", reelles_skalarprodukt_trainable_shape(b,b))
# Approximativ für den Newton-Schritt lösen mittels CG
m = 200
x_new, P, R, rho, trueres = CG_trainable(model, train_set, train_labels, loss_fn, b, x, m=m)
x_new, P, R, rho, trueres, norm_p, norm_x, norm_r, norm_v = CG_trainable(model, train_set, train_labels, loss_fn, b, x, m=m, tau=1e-3)
# Daten aus dem CG-Verfahren abspeichern für Analyse der Konvergenz
if(epoch == 0):
......@@ -144,6 +169,10 @@ for epoch in range(1000):
np.save("results/CG_R_epoch0.npy", R)
np.save("results/CG_rho.npy", rho)
np.save("results/CG_trueres.npy", trueres)
np.save("results/norm_p.npy", norm_p)
np.save("results/norm_x.npy", norm_x)
np.save("results/norm_r.npy", norm_r)
np.save("results/norm_v.npy", norm_v)
current_weights = model.get_weights()
np.save("results/W_1_trained_epoch0.npy", current_weights[0])
......
......@@ -95,7 +95,33 @@ plt.show()
plt.close()
plt.semilogy(rho, label="Implizite Hesse-Matrix des KNN")
plt.semilogy(rho_flat, label="Explizite Hesse-Matrix des KNN")
plt.semilogy(rho[:50], label="Implizite Hesse-Matrix des KNN")
plt.semilogy(rho_flat[:50], label="Explizite Hesse-Matrix des KNN")
plt.legend()
plt.show()
\ No newline at end of file
plt.show()
plt.close()
x_norm_flat = np.load("results/norm_x_flat.npy")
p_norm_flat = np.load("results/norm_p_flat.npy")
v_norm_flat = np.load("results/norm_v_flat.npy")
r_norm_flat = np.load("results/norm_r_flat.npy")
x_norm = np.load("results/norm_x.npy")
p_norm = np.load("results/norm_p.npy")
v_norm = np.load("results/norm_v.npy")
r_norm = np.load("results/norm_r.npy")
plt.semilogy(x_norm_flat[:20], 'r-')
plt.semilogy(x_norm[:20], 'r--', label="norm(x)")
plt.semilogy(p_norm_flat[:20], 'b-')
plt.semilogy(p_norm[:20], 'b--', label="norm(p)")
plt.semilogy(v_norm_flat[:20], 'g-')
plt.semilogy(v_norm[:20], 'g--', label="norm(v)")
plt.semilogy(r_norm_flat[:20], 'c-')
plt.semilogy(r_norm[:20], 'c--', label="norm(r)")
plt.legend()
plt.show()
import numpy as np
from numpy.linalg import norm
def omin(A, b, x0, m=None, eps=0, trueres=False):
def omin(A, b, x0, m=None, trueres=False, tau=None):
n = A.shape[0]
if m is None:
m = n
if tau is None:
tau = 10e-8
P = np.zeros((n,m))
R = np.zeros((n,m))
norm_x = np.zeros(m+1)
norm_r = np.zeros(m+1)
norm_p = np.zeros(m+1)
norm_v = np.zeros(m+1)
tol = eps*norm(b)
tol = tau*norm(b)
rho = np.zeros(2, dtype=complex)
resvec = np.zeros(m+1)
......@@ -21,21 +27,30 @@ def omin(A, b, x0, m=None, eps=0, trueres=False):
r = b - A@x
p = r
resvec[0] = norm(r)
resvec[0] = norm(r)**2
if trueres:
trueresvec[0] = resvec[0]
rho[0] = resvec[0]**2
rho[0] = resvec[0]
iter = m+1
# Initiale Werte abspeichern
norm_x[0] = norm(x)
norm_r[0] = norm(r)
norm_p[0] = norm(p)
for k in range(m):
v = A@p
norm_v[k+1] = norm(v)
alpha = rho[0]/(v.T@np.conj(p))
r = r - alpha*v
norm_r[k+1] = norm(r)
x = x + alpha*p
norm_x[k+1] = norm(x)
resvec[k+1] = norm(r)
resvec[k+1] = norm(r)**2
if trueres:
trueresvec[k+1] = norm(b - A@x)
......@@ -43,14 +58,15 @@ def omin(A, b, x0, m=None, eps=0, trueres=False):
iter = k+1
break
rho[1] = resvec[k+1]**2
rho[1] = resvec[k+1]
p = r + rho[1]/rho[0]*p
norm_p[k+1] = norm(p)
rho[0] = rho[1]
P[:,k] = np.reshape(p, (393,))
R[:,k] = np.reshape(r, (393,))
if trueres:
return x, iter, resvec, trueresvec, P, R
return x, iter, resvec, trueresvec, P, R, norm_p, norm_x, norm_r, norm_v
else:
return x, iter, resvec, P, R
\ No newline at end of file
......@@ -76,8 +76,10 @@ for epoch in range(10000):
x_flat = np.reshape(vector_trainable_shape_to_flat_shape(x), (393,1))
b = np.reshape((hessian_flat@x_flat - gradient_flat), (393,1))
print("Norm b = ", b.T@b)
m=200
x, iter, resvec, trueres, P, R = omin(hessian_flat, b, x_flat, m, trueres=True)
x, iter, resvec, trueres, P, R, norm_p, norm_x, norm_r, norm_v = omin(hessian_flat, b, x_flat, m, trueres=True)
# Daten aus dem CG-Verfahren abspeichern für Analyse der Konvergenz
if(epoch == 0):
......@@ -86,6 +88,10 @@ for epoch in range(10000):
np.save("results/CG_rho_flat.npy", resvec)
np.save("results/CG_trueres_flat.npy", trueres)
np.save("results/hessian-flat.npy", hessian_flat)
np.save("results/norm_p_flat.npy", norm_p)
np.save("results/norm_x_flat.npy", norm_x)
np.save("results/norm_r_flat.npy", norm_r)
np.save("results/norm_v_flat.npy", norm_v)
current_weights = model.get_weights()
np.save("results/W_1_trained_epoch0.npy", current_weights[0])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment