Commit d84e17af authored by Eva Lina Fesefeldt's avatar Eva Lina Fesefeldt
Browse files

[dev] ReLu3 hinzugefügt

parent 83c1324d
This diff is collapsed.
# This file is part of the hessianlearn package
#
# hessianlearn is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or any later version.
#
# hessianlearn is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# If not, see <http://www.gnu.org/licenses/>.
#
# Author: Tom O'Leary-Roseberry
# Contact: tom.olearyroseberry@utexas.edu
from __future__ import absolute_import, division, print_function
from .problem import Problem, ClassificationProblem, RegressionProblem, H1RegressionProblem,\
AutoencoderProblem,VariationalAutoencoderProblem, GenerativeAdversarialNetworkProblem
from .hessian import Hessian, HessianWrapper
from .preconditioner import Preconditioner, IdentityPreconditioner
from .regularization import Regularization, L2Regularization
\ No newline at end of file
# This file is part of the hessianlearn package
#
# hessianlearn is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or any later version.
#
# hessianlearn is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# If not, see <http://www.gnu.org/licenses/>.
#
# Author: Tom O'Leary-Roseberry
# Contact: tom.olearyroseberry@utexas.edu
from __future__ import absolute_import, division, print_function
import numpy as np
# import tensorflow as tf
# if int(tf.__version__[0]) > 1:
# import tensorflow.compat.v1 as tf
# tf.disable_v2_behavior()
from abc import ABC, abstractmethod
class Hessian(ABC):
"""
This class implements methods for the neural network training Hessian.
Must have a problem and a sess in order to be evaluated
"""
def __init__(self,problem=None,sess=None):
"""
Create a Hessian given:
- problem: the description of the neural network training problem
(hessianlearn.problem.Problem)
- sess: the tf.Session() needed for evaluation at run time
"""
self._problem = problem
self._sess = sess
@property
def problem(self):
return self._problem
@property
def sess(self):
return self._sess
@property
def dimension(self):
return self.problem.dimension
@property
def T(self):
return self._T
def _T(self):
return self
def __mult__(self,x):
return self(x)
def __call__(self,x,feed_dict,verbose = False):
"""
This method implements Hessian action, must have a problem and sess
set before this method can be evaluated.
-x: numpy array to be multiplied one at a time
-feed_dict: data used in finite sum Hessian evaluation
-verbose: for printing
"""
assert self.problem is not None
assert self.sess is not None
if len(x.shape) == 1:
feed_dict[self.problem.dw] = x
return self.sess.run(self.problem.Hdw,feed_dict)
elif len(x.shape) == 2:
n_vectors = x.shape[-1]
if self.problem._HdW is None:
if verbose:
print('Total vectors = ',n_vectors)
print('Initializing Hessian blocking')
self.problem._initialize_hessian_blocking(n_vectors)
# When the block sizes agree
if n_vectors == self.problem._hessian_block_size:
feed_dict[self.problem._dW] = x
HdW = self.sess.run(self.problem.HdW,feed_dict)
return HdW
# When the requested block size is smaller
elif n_vectors < self.problem._hessian_block_size:
# The speedup is roughly 5x, so in the case that its less
# than 1/5 its faster to either reinitialize the blocking
# or for loop around running problem.Hdw
if n_vectors < 0.2*self.problem._hessian_block_size:
# Could reinitialize the blocking or just for loop
# For looping for now
HdW = np.zeros_like(x)
for i in range(n_vectors):
feed_dict[problem.dw] = x[:,i]
HdW[:,i] = sess.run(problem.Hdw,feed_dict)
return HdW
else:
dW = np.zeros(self.problem.dimension,self.problem._hessian_block_size)
dW[:,:n_vectors] = x
feed_dict[self.problem._dW] = dW
HdW = self.sess.run(self.problem.HdW,feed_dict)
return HdW[:,:n_vectors]
# When the requested block size is larger
elif n_vectors > self.problem._hessian_block_size:
HdW = np.zeros_like(x)
block_size = self.problem._hessian_block_size
blocks, remainder = np.divmod(block_size,block_size)
for i in range(blocks):
feed_dict[self.problem._dW] = x[:,i*block_size:(i+1)*block_size]
HdW[:,i*block_size:(i+1)*block_size] = self.sess.run(self.problem.HdW,feed_dict)
# The last vectors are done as a for loop or a zeroed out array
if remainder < 0.2*self.problem._hessian_block_size:
for i in range(n_vectors):
feed_dict[problem.dw] = x[:,blocks*block_size+i]
HdW[:,blocks*block_size+i] = sess.run(problem.Hdw,feed_dict)
else:
dW = np.zeros(self.problem.dimension,self.problem._hessian_block_size)
dW[:,:remainder] = x[:,-remainder:]
feed_dict[self.problem._dW] = dW
HdW[:,-remainder:] = sess.run(problem.Hdw,feed_dict)
else:
# Many different Hessian mat-vecs interpreted as a tensor?
print('This case is not yet implemented'.center(80))
raise
def quadratics(self,x,feed_dict,verbose = False):
"""
This method implements Hessian quadratics xTHx.
Must have self._problem and self._sess set before this method can be evaluated.
-x: numpy array to be multiplied one at a time
-feed_dict: data used in finite sum Hessian evaluation
-verbose: for printing
"""
assert self.problem is not None
assert self.sess is not None
if len(x.shape) == 1:
feed_dict[self.problem.dw] = x
return self.sess.run(self.problem.H_quadratic,feed_dict)
elif len(x.shape) == 2:
number_of_quadratics = x.shape[1]
H_quads = np.zeros(number_of_quadratics)
if verbose:
try:
from tqdm import tqdm
for i in tqdm(range(number_of_quadratics)):
feed_dict[self.problem.dw] = x[:,i]
H_quads[i] = self.sess.run(self.problem.H_quadratic,feed_dict)
except:
print('No progress bar :(')
for i in range(number_of_quadratics):
feed_dict[self.problem.dw] = x[:,i]
H_quads[i] = self.sess.run(self.problem.H_quadratic,feed_dict)
else:
for i in range(number_of_quadratics):
feed_dict[self.problem.dw] = x[:,i]
H_quads[i] = self.sess.run(self.problem.H_quadratic,feed_dict)
return H_quads
else:
raise
class HessianWrapper:
def __init__(self,hessian,data_dictionary):
self._hessian = hessian
self._data_dictionary = data_dictionary
def __call__(self,x):
return self._hessian(x,self._data_dictionary)
# This file is part of the hessianlearn package
#
# hessianlearn is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or any later version.
#
# hessianlearn is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# If not, see <http://www.gnu.org/licenses/>.
#
# Author: Tom O'Leary-Roseberry
# Contact: tom.olearyroseberry@utexas.edu
from __future__ import absolute_import, division, print_function
import numpy as np
import tensorflow as tf
tf.compat.v1.enable_eager_execution()
if int(tf.__version__[0]) > 1:
import tensorflow.compat.v1 as tf
# tf.disable_v2_behavior()
tf.enable_eager_execution()
class Preconditioner(object):
"""
This class describes a preconditioner, currently it is empty
Child class should implement method __call__ which implements
the preconditioner approximation of the (Hessian) inverse
"""
class IdentityPreconditioner(Preconditioner):
"""
This class describes identity preconditioning, which means doing nothing
"""
def __init__(self,problem,dtype = tf.float32):
"""
The constructor for this class takes:
-problem: hessianlearn.problem.Problem class
-dtype: data type
"""
# Rethink this later and improve for Krylov methods.
self.x = tf.placeholder(dtype,problem.gradient.shape,name='vec_for_prec_apply')
def __call__(self):
"""
The call method simply returns vector which must be passed to
the sess at runtime. self.x is a placeholder variable.
"""
return self.x
# This file is part of the hessianlearn package
#
# hessianlearn is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or any later version.
#
# hessianlearn is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# If not, see <http://www.gnu.org/licenses/>.
#
# Author: Tom O'Leary-Roseberry
# Contact: tom.olearyroseberry@utexas.edu
from __future__ import absolute_import, division, print_function
import numpy as np
import tensorflow as tf
# tf.compat.v1.enable_eager_execution()
if int(tf.__version__[0]) > 1:
import tensorflow.compat.v1 as tf
# tf.disable_v2_behavior()
# tf.enable_eager_execution()
from ..utilities import ParameterList
from abc import ABC, abstractmethod
def ParametersRegularization(dictionary = {}):
parameters = dictionary
parameters["gamma"] = [1e-1, "regularization parameter"]
return ParameterList(parameters)
class Regularization (ABC):
"""
This class describes the components of regularization used during training.
The child class implements the specifics during construction
"""
@property
def cost(self):
return self._cost
@property
def gradient(self):
return self._gradient
@property
def Hdw(self):
return self._Hdw
class L2Regularization(Regularization):
"""
This class implements standard Tikhonov (L2) regularization
with regularization parameter gamma
(gamma/2)||w||^2
"""
def __init__(self,problem, gamma = None,parameters = ParametersRegularization(),dtype = tf.float32):
"""
The constructor for this class takes
-problem: The description of the training problem i.e. hessianlearn.problem.Problem variant
-gamma: The regularization parameter, can be found via Morozov discrepancy, trial and error etc.
"""
self.problem = problem
self.parameters = parameters
if gamma is not None:
self.parameters['gamma'] = gamma
self._cost = 0.5*self.parameters['gamma']*tf.reduce_sum(self.problem._flat_w*self.problem._flat_w)
self._gradient = self.parameters['gamma']*self.problem._flat_w
self._Hdw = self.parameters['gamma']*self.problem.dw
# This file is part of the hessianlearn package
#
# hessianlearn is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or any later version.
#
# hessianlearn is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# If not, see <http://www.gnu.org/licenses/>.
#
# Author: Tom O'Leary-Roseberry
# Contact: tom.olearyroseberry@utexas.edu
# This file is part of the hessianlearn package
#
# hessianlearn is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or any later version.
#
# hessianlearn is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# If not, see <http://www.gnu.org/licenses/>.
#
# Author: Tom O'Leary-Roseberry
# Contact: tom.olearyroseberry@utexas.edu
from __future__ import absolute_import, division, print_function
import unittest
import numpy as np
import tensorflow as tf
if int(tf.__version__[0]) > 1:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['KMP_DUPLICATE_LIB_OK']='True'
os.environ["KMP_WARNINGS"] = "FALSE"
import sys
sys.path.append('../../')
from hessianlearn import (HessianlearnModel, HessianlearnModelSettings,
ClassificationProblem,Data, L2Regularization)
tf.set_random_seed(0)
class TestHessianlearnModel(unittest.TestCase):
def test_all_optimizers(self):
# Instantiate data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# Normalize the data
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
def one_hot_vectors(labels_temp):
labels = np.zeros((labels_temp.shape[0],10))
for i,label in enumerate(labels_temp):
labels[i,label] = 1
return labels
y_train = one_hot_vectors(y_train)
y_test = one_hot_vectors(y_test)
# Instantiate neural network
classifier = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(10)
])
# Instantiate the problem, regularization.
problem = ClassificationProblem(classifier,loss_type = 'least_squares',dtype=tf.float32)
regularization = L2Regularization(problem,gamma =0.001)
# Instante the data object
train_dict = {problem.x:x_train, problem.y_true:y_train}
validation_dict = {problem.x:x_test, problem.y_true:y_test}
data = Data(train_dict,256,validation_data = validation_dict,hessian_batch_size = 32)
# Instantiate the model object
HLModelSettings = HessianlearnModelSettings()
HLModelSettings['max_sweeps'] = 1.
HLModel = HessianlearnModel(problem,regularization,data,settings = HLModelSettings)
for optimizer in ['lrsfn','adam','gd','incg','sgd']:
HLModel.settings['optimizer'] = optimizer
HLModel.fit()
first_loss = HLModel.logger['train_loss'][0]
last_iteration = max(HLModel.logger['train_loss'].keys())
last_loss = HLModel.logger['train_loss'][last_iteration]
print('first loss = ',first_loss)
print('last_loss = ',last_loss)
assert last_loss < first_loss
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
# This file is part of the hessianlearn package
#
# hessianlearn is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or any later version.
#
# hessianlearn is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# If not, see <http://www.gnu.org/licenses/>.
#
# Author: Tom O'Leary-Roseberry
# Contact: tom.olearyroseberry@utexas.edu
from __future__ import absolute_import, division, print_function
import unittest
import numpy as np
import sys
sys.path.append('../../')
from hessianlearn import (randomized_eigensolver)
class TestRandomizedEigensolver(unittest.TestCase):
def test_basic(self):
my_state = np.random.RandomState(seed=0)
n = 100
Q,_ = np.linalg.qr(my_state.randn(n,n))
d = np.concatenate((np.ones(10),np.exp(-np.arange(n-10))))
Aop = lambda x: Q@np.diag(d)@(Q.T@x)
d_hl, Q_hl = randomized_eigensolver(Aop,100, 100)
assert np.linalg.norm(d[:50] - d_hl[0:50]) < 1e-10
error = np.linalg.norm(Q@np.diag(d)@Q.T - Q_hl@np.diag(d_hl)@Q_hl.T)
assert error < 1e-10
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
# This file is part of the hessianlearn package
#
# hessianlearn is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or any later version.
#
# hessianlearn is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# If not, see <http://www.gnu.org/licenses/>.
#
# Author: Tom O'Leary-Roseberry
# Contact: tom.olearyroseberry@utexas.edu
from __future__ import absolute_import, division, print_function
import unittest
import numpy as np
import sys
sys.path.append('../../')
from hessianlearn import (block_range_finder)
class TestRangeFinders(unittest.TestCase):
def test_basic(self):
my_state = np.random.RandomState(seed=0)
n = 100
Q,_ = np.linalg.qr(my_state.randn(n,n))
d = np.concatenate((np.ones(10),np.exp(-np.arange(n-10))))
Aop = lambda x: Q@np.diag(d)@(Q.T@x)
Q_range = block_range_finder(Aop,100,1e-5,10)
assert Q_range.shape[-1] <=40
w_action = my_state.randn(100,1)
action = Aop(w_action)
error = np.linalg.norm(action - Q_range@(Q_range.T@ action))
print(error)
assert error < 1e-5
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
# This file is part of the hessianlearn package
#
# hessianlearn is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or any later version.
#
# hessianlearn is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.