cavis/libnd4j/include/ops/declarable/headers/updaters.h

211 lines
6.1 KiB
C
Raw Normal View History

Learning updaters for gradient (#335) * libnd4j raw implementation of sgd upader Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j some corrections and simple test added Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j some corrections after discussion Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j integrate applyScalar Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j raw implementation of rmsPropUpdater on cpu Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j fix operations declaration Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j rmsPropUpdater added, test cases for sgd, etc Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j fixed several typos Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j some fixes and improvements for rmsPropUpdater based on Java tests Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j fixed cuda implementation, update tests and corrected behavior according java tests Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j adaGrad updater added Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j one minor fix for ada grad Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j several more fixes for ada_grad Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j nesterovs updater added Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j fixed nesterovs updater behavior, several typos and rename file Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j one minor typo Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j ada max updater added Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j fixed several typos in adaMax updater Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j fixed several typos in adaMaxUpdater Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j several fixes for adaMax, added Adam Updater Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j adaDeltaUpdater added, minor fixes for adamUpdater Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j several fixes for adaDeltaUpdater Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j nadamUpdater added Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j one more correction for nadam updater Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j several fixes for nadam updater and added amsGradUpdater Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j several typos fixed in amsGradUpdater Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j some corrections and added f order support rmsProp updater Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j added support of f order for all updaters and modify tests for testing in place Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j fixed issues for updates when not in place mode used, added tests for f order Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j added input shape checks Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j some corrections for different cases handling Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j some code clean up and optimize per request Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j updaters refactoring after review Signed-off-by: Oleg <oleg.semeniv@gmail.com> * SgdUpdater wrapper Signed-off-by: raver119 <raver119@gmail.com> * first test Signed-off-by: raver119 <raver119@gmail.com> * RmsPropUpdater added Signed-off-by: raver119 <raver119@gmail.com> * NadamUpdater + NesterovsUpdater Signed-off-by: raver119 <raver119@gmail.com> * AmsGradUpdater Signed-off-by: raver119 <raver119@gmail.com> * AdamUpdater added Signed-off-by: raver119 <raver119@gmail.com> * AdaGradUpdater + AdaDeltaUpdater + AdaMaxUpdater Signed-off-by: raver119 <raver119@gmail.com> * AdaGradUpdater test added Signed-off-by: raver119 <raver119@gmail.com> * libnd4j remove input parameters parsing through NDArray, split implementation of helpers to separate files, added some rename, etc Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j next step to split operations implementation into separate files Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j merge master and minor corrections Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j revert some changes of split implementation Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j forgot to add header file Signed-off-by: Oleg <oleg.semeniv@gmail.com> * public default constructors Signed-off-by: raver119 <raver119@gmail.com> * ImportClassMapping updated Signed-off-by: raver119 <raver119@gmail.com> Co-authored-by: raver119 <raver119@gmail.com>
2020-03-23 05:28:31 +01:00
/*******************************************************************************
* Copyright (c) 2019-2020 Konduit K.K.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// @author Oleh Semeniv (oleg.semeniv@gmail.com)
//
#ifndef LIBND4J_HEADERS_UPDATERS_H
#define LIBND4J_HEADERS_UPDATERS_H
#include <ops/declarable/headers/common.h>
#include <ops/declarable/CustomOperations.h>
#include <helpers/ConstantTadHelper.h>
#include <execution/Threads.h>
#include <ops/declarable/helpers/updatersHelpers.h>
namespace sd {
namespace ops {
/**
* SGD updater
* Input arrays:
* 0 - input array with gradients.
* Optional:
* 1 - scalar learning rate value
* Optional:
* T args
* 0 - scalar learning rate value
*/
#if NOT_EXCLUDED(OP_sgd_updater)
DECLARE_CONFIGURABLE_OP(sgd_updater, 1, 1, true, 0, 0);
#endif
/**
* RmsPropUpdater updater
* Input arrays:
* 0 - input array with gradients.
* 1 - Initial state
* Optional:
* 2 - scalar learning rate value
* 3 - scalar rms decay
* 4 - epsilon
* Optional:
* T args
* 0 - scalar learning rate value
* 1 - scalar rms decay
* 2 - epsilon
*/
#if NOT_EXCLUDED(OP_rms_prop_updater)
DECLARE_CONFIGURABLE_OP(rms_prop_updater, 2, 2, true, 0, 0);
#endif
// AdaGrad
/* Input arrays :
* 0 - input array with gradients.
* 1 - historical grad state
* Optional :
* 2 - scalar learning rate value
* 3 - epsilon
* Optional:
* T args
* 0 - scalar learning rate value
* 1 - epsilon
*/
#if NOT_EXCLUDED(OP_ada_grad_updater)
DECLARE_CONFIGURABLE_OP(ada_grad_updater, 2, 2, true, 0, 0);
#endif
// AdaMax
/* Input arrays :
* 0 - input array with gradients.
* 1 - gradient state V
* 2 - gradient state M
* Optional :
* 3 - scalar learning rate value
* 4 - beta 1 value
* 5 - beta 2 value
* 6 - epsilon
* Optional:
* T args
* 0 - scalar learning rate value
* 1 - beta 1 value
* 2 - beta 2 value
* 3 - epsilon
* Optional:
* I args
* 0 - iteration
*/
#if NOT_EXCLUDED(OP_ada_max_updater)
DECLARE_CONFIGURABLE_OP(ada_max_updater, 3, 3, true, 0, 0);
#endif
// Nesterov's momentum
/* Input arrays :
* 0 - input array with gradients.
* 1 - V grad state
* Optional :
* 2 - scalar learning rate value
* 3 - scalar momentum value
* Optional:
* T args
* 0 - learning rate value
* 1 - momentum value
*/
#if NOT_EXCLUDED(OP_nesterovs_updater)
DECLARE_CONFIGURABLE_OP(nesterovs_updater, 2, 2, true, 0, 0);
#endif
// Adam
/* Input arrays :
* 0 - input array with gradients.
* 1 - gradient state V
* 2 - gradient state M
* Optional :
* 3 - scalar learning rate value
* 4 - beta 1 value
* 5 - beta 2 value
* 6 - epsilon
* Optional:
* T args
* 0 - scalar learning rate value
* 1 - beta 1 value
* 2 - beta 2 value
* 3 - epsilon
* Optional:
* I args
* 0 - iteration
*/
#if NOT_EXCLUDED(OP_adam_updater)
DECLARE_CONFIGURABLE_OP(adam_updater, 3, 3, true, 0, 0);
#endif
// AdaDelta
/* Input arrays :
* 0 - input array with gradients.
* 1 - gradient state V
* 2 - gradient state M
* Optional :
* 3 - rho value
* 6 - epsilon
* Optional:
* T args
* 0 - rho
* 1 - epsilon
*/
#if NOT_EXCLUDED(OP_ada_delta_updater)
DECLARE_CONFIGURABLE_OP(ada_delta_updater, 3, 3, true, 0, 0);
#endif
// Nadam
/* Input arrays :
* 0 - input array with gradients.
* 1 - gradient state V
* 2 - gradient state M
* Optional :
* 3 - scalar learning rate value
* 4 - beta 1 value
* 5 - beta 2 value
* 6 - epsilon
* Optional:
* T args
* 0 - scalar learning rate value
* 1 - beta 1 value
* 2 - beta 2 value
* 3 - epsilon
* Optional:
* I args
* 0 - iteration
*/
#if NOT_EXCLUDED(OP_nadam_updater)
DECLARE_CONFIGURABLE_OP(nadam_updater, 3, 3, true, 0, 0);
#endif
// AmsGrad
/* Input arrays :
* 0 - input array with gradients.
* 1 - gradient state V - sqrd gradients
* 2 - gradient state M - moving avg
* 3 - gradient state H - max
* Optional :
* 4 - scalar learning rate value
* 5 - beta 1 value
* 6 - beta 2 value
* 7 - epsilon
* Optional:
* T args
* 0 - scalar learning rate value
* 1 - beta 1 value
* 2 - beta 2 value
* 3 - epsilon
* Optional:
* I args
* 0 - iteration
*/
#if NOT_EXCLUDED(OP_ams_grad_updater)
DECLARE_CONFIGURABLE_OP(ams_grad_updater, 4, 4, true, 0, 0);
#endif
}
}
#endif