cavis/libnd4j/include/ops/declarable/generic/updaters/adaBeliefUpdater.cpp

/*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  * See the NOTICE file distributed with this work for additional
 *  * information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */

 //
 // @author Oleh Semeniv (oleg.semeniv@gmail.com)
 // @author Abdelrauf(rauf@konduit.ai)

#include <ops/declarable/headers/updaters.h>
#include <ops/declarable/CustomOperations.h>
#include <helpers/ConstantTadHelper.h>
#include <execution/Threads.h>
#include <array/NDArray.h>

namespace sd {
    namespace ops {

        CONFIGURABLE_OP_IMPL(adabelief_updater, 3, 3, true, 0, 0) {

            const auto gradient = INPUT_VARIABLE(0);
            const auto initStateU = INPUT_VARIABLE(1);
            const auto initStateM = INPUT_VARIABLE(2);

            auto update = OUTPUT_VARIABLE(0);
            auto stateU = OUTPUT_VARIABLE(1);
            auto stateM = OUTPUT_VARIABLE(2);

            // todo maybe we need an error like on Java side
            if (gradient->isEmpty() || initStateU->isEmpty() || initStateM->isEmpty())
                return Status::OK();

            REQUIRE_TRUE(gradient->isSameShape(initStateU), 0, "ADABELIEF UPDATER OP: input state V must have the same shape as gradient,"
                "  expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(),
                ShapeUtils::shapeAsString(initStateU->shapeInfo()).c_str());
            REQUIRE_TRUE(gradient->isSameShape(initStateM), 0, "ADABELIEF UPDATER OP: input state M must have the same shape as gradient,"
                "  expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(),
                ShapeUtils::shapeAsString(initStateM->shapeInfo()).c_str());

            bool bParamsSupply = 7 == block.width() || 4 == block.getTArguments()->size();

            auto iteration = block.getIArguments()->size() > 0 ? INT_ARG(0) : 0;

            REQUIRE_TRUE(bParamsSupply, 0, "ADABELIEF UPDATER OP: learning rate, beta 1, beta 2 and epsilon were not provided!");

            double dLr, dBeta1, dBeta2, dEpsilon;

            if (block.width() > 3) {
                const auto lr = INPUT_VARIABLE(3);
                const auto beta1 = INPUT_VARIABLE(4);
                const auto beta2 = INPUT_VARIABLE(5);
                const auto epsilon = INPUT_VARIABLE(6);

                REQUIRE_TRUE(lr->isScalar(), 0, "ADABELIEF UPDATER OP: Learning rate has to be a scalar, but instead got rank %i!", lr->rankOf());
                REQUIRE_TRUE(beta1->isScalar(), 0, "ADABELIEF UPDATER OP: beta 1 has to be a scalar, but instead got rank %i!", beta1->rankOf());
                REQUIRE_TRUE(beta2->isScalar(), 0, "ADABELIEF UPDATER OP: beta 2 has to be a scalar, but instead got rank %i!", beta2->rankOf());
                REQUIRE_TRUE(epsilon->isScalar(), 0, "ADABELIEF UPDATER OP: Epsilon has to be a scalar, but instead got rank %i!", epsilon->rankOf());

                dLr = lr->e<double>(0);
                dBeta1 = beta1->e<double>(0);
                dBeta2 = beta2->e<double>(0);
                dEpsilon = epsilon->e<double>(0);
            }
            else {
                dLr = T_ARG(0);
                dBeta1 = T_ARG(1);
                dBeta2 = T_ARG(2);
                dEpsilon = T_ARG(3);
            }

            helpers::updaterAdaBelief(block.launchContext(), *gradient, *initStateU, *initStateM, *update, *stateU, *stateM, dLr, dBeta1, dBeta2, dEpsilon, iteration);
            return Status::OK();
        }

        DECLARE_TYPES(adabelief_updater) {
            getOpDescriptor()->setAllowedInputTypes({ ALL_FLOATS })
                ->setSameMode(true);
        }

    }
}
AdaBelief updater: it was agreed to modify changes on the copy of AdamUpdater. This way we can improve it later. https://arxiv.org/pdf/2010.07468.pdf Signed-off-by: AbdelRauf <rauf@konduit.ai> 2021-02-18 13:53:38 +01:00			`/*`
			`* ******************************************************************************`
			`* *`
			`* *`
			`* * This program and the accompanying materials are made available under the`
			`* * terms of the Apache License, Version 2.0 which is available at`
			`* * https://www.apache.org/licenses/LICENSE-2.0.`
			`* *`
			`* * See the NOTICE file distributed with this work for additional`
			`* * information regarding copyright ownership.`
			`* * Unless required by applicable law or agreed to in writing, software`
			`* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT`
			`* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the`
			`* * License for the specific language governing permissions and limitations`
			`* * under the License.`
			`* *`
			`* * SPDX-License-Identifier: Apache-2.0`
			`* *****************************************************************************`
			`*/`

			`//`
			`// @author Oleh Semeniv (oleg.semeniv@gmail.com)`
			`// @author Abdelrauf(rauf@konduit.ai)`

			`#include <ops/declarable/headers/updaters.h>`
			`#include <ops/declarable/CustomOperations.h>`
			`#include <helpers/ConstantTadHelper.h>`
			`#include <execution/Threads.h>`
			`#include <array/NDArray.h>`

			`namespace sd {`
			`namespace ops {`

			`CONFIGURABLE_OP_IMPL(adabelief_updater, 3, 3, true, 0, 0) {`

			`const auto gradient = INPUT_VARIABLE(0);`
			`const auto initStateU = INPUT_VARIABLE(1);`
			`const auto initStateM = INPUT_VARIABLE(2);`

			`auto update = OUTPUT_VARIABLE(0);`
			`auto stateU = OUTPUT_VARIABLE(1);`
			`auto stateM = OUTPUT_VARIABLE(2);`

			`// todo maybe we need an error like on Java side`
			`if (gradient->isEmpty() \|\| initStateU->isEmpty() \|\| initStateM->isEmpty())`
			`return Status::OK();`

			`REQUIRE_TRUE(gradient->isSameShape(initStateU), 0, "ADABELIEF UPDATER OP: input state V must have the same shape as gradient,"`
			`" expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(),`
			`ShapeUtils::shapeAsString(initStateU->shapeInfo()).c_str());`
			`REQUIRE_TRUE(gradient->isSameShape(initStateM), 0, "ADABELIEF UPDATER OP: input state M must have the same shape as gradient,"`
			`" expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(),`
			`ShapeUtils::shapeAsString(initStateM->shapeInfo()).c_str());`

			`bool bParamsSupply = 7 == block.width() \|\| 4 == block.getTArguments()->size();`

			`auto iteration = block.getIArguments()->size() > 0 ? INT_ARG(0) : 0;`

			`REQUIRE_TRUE(bParamsSupply, 0, "ADABELIEF UPDATER OP: learning rate, beta 1, beta 2 and epsilon were not provided!");`

			`double dLr, dBeta1, dBeta2, dEpsilon;`

			`if (block.width() > 3) {`
			`const auto lr = INPUT_VARIABLE(3);`
			`const auto beta1 = INPUT_VARIABLE(4);`
			`const auto beta2 = INPUT_VARIABLE(5);`
			`const auto epsilon = INPUT_VARIABLE(6);`

			`REQUIRE_TRUE(lr->isScalar(), 0, "ADABELIEF UPDATER OP: Learning rate has to be a scalar, but instead got rank %i!", lr->rankOf());`
			`REQUIRE_TRUE(beta1->isScalar(), 0, "ADABELIEF UPDATER OP: beta 1 has to be a scalar, but instead got rank %i!", beta1->rankOf());`
			`REQUIRE_TRUE(beta2->isScalar(), 0, "ADABELIEF UPDATER OP: beta 2 has to be a scalar, but instead got rank %i!", beta2->rankOf());`
			`REQUIRE_TRUE(epsilon->isScalar(), 0, "ADABELIEF UPDATER OP: Epsilon has to be a scalar, but instead got rank %i!", epsilon->rankOf());`

			`dLr = lr->e<double>(0);`
			`dBeta1 = beta1->e<double>(0);`
			`dBeta2 = beta2->e<double>(0);`
			`dEpsilon = epsilon->e<double>(0);`
			`}`
			`else {`
			`dLr = T_ARG(0);`
			`dBeta1 = T_ARG(1);`
			`dBeta2 = T_ARG(2);`
			`dEpsilon = T_ARG(3);`
			`}`

			`helpers::updaterAdaBelief(block.launchContext(), gradient, initStateU, initStateM, update, stateU, stateM, dLr, dBeta1, dBeta2, dEpsilon, iteration);`
			`return Status::OK();`
			`}`

			`DECLARE_TYPES(adabelief_updater) {`
			`getOpDescriptor()->setAllowedInputTypes({ ALL_FLOATS })`
			`->setSameMode(true);`
			`}`

			`}`
			`}`