/* ******************************************************************************
 *
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 *  See the NOTICE file distributed with this work for additional
 *  information regarding copyright ownership.
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 ******************************************************************************/

//
// @author raver119@gmail.com
//

#include "testlayers.h"
#include <array/ExtraArguments.h>
#include <array>
#include <cuda.h>
#include <cuda_runtime.h>

using namespace sd;

class LambdaTests : public testing::Test {
public:

    LambdaTests() {
        printf("\n");
        fflush(stdout);
    }
};

template <typename Lambda>
__global__ void runLambda(double *input, double *output, Nd4jLong length, Lambda lambda) {
    auto tid = blockIdx.x * blockDim.x + threadIdx.x;
    for (Nd4jLong e = tid; e < length; e += gridDim.x * blockDim.x) {
        output[e] = lambda(input[e]);
    }
}

void launcher(cudaStream_t *stream, double *input, double *output, Nd4jLong length) {
    //auto f = [] __host__ __device__ (double x) -> double {
    //        return x + 1.;
    //};
    auto f = LAMBDA_D(x) {
        return x+1.;
    };


    runLambda<<<128, 128, 128, *stream>>>(input, output, length, f);
}


// TEST_F(LambdaTests, test_basic_1) {
//     auto x = NDArrayFactory::create<double>('c', {5});
//     auto e = NDArrayFactory::create<double>('c', {5}, {1., 1., 1., 1., 1.});



//     //x.applyLambda<double>(f, nullptr);
//     launcher(LaunchContext::defaultContext()->getCudaStream(), (double *)x.specialBuffer(), (double *)x.specialBuffer(), x.lengthOf());
//     auto res = cudaStreamSynchronize(*LaunchContext::defaultContext()->getCudaStream());
//     ASSERT_EQ(0, res);

//     ASSERT_EQ(e, x);
// }

// void test(NDArray &x) {
//     auto f = LAMBDA_D(x) {
//         return x+1.;
//     };

//     x.applyLambda(f, x);
// }

// template <typename T>
// void test2(NDArray &x) {
//     auto f = LAMBDA_T(x) {
//         return x+1.;
//     };

//     x.applyLambda(f, x);
// }

// void testPairwise(NDArray &x, NDArray &y) {
//     auto f = LAMBDA_DD(x, y) {
//         return x + y +1.;
//     };

//     x.applyPairwiseLambda(y, f, x);
// }

// void testTriplewise(NDArray &i, NDArray &j, NDArray &k) {
//     auto f = LAMBDA_DDD(i, j, k) {
//         return i + j + k + 2.;
//     };

//     i.applyTriplewiseLambda(j, k, f, i);
// }

// void testIndexed(NDArray &x) {
//     auto f = ILAMBDA_D(x) {
//         return _idx + 1.;
//     };

//     x.applyIndexedLambda(f, x);
// }

// void testIndexedPairwise(NDArray &x, NDArray &y) {
//     auto f = ILAMBDA_DD(x, y) {
//         return _idx + x + y +1.;
//     };

//     x.applyIndexedPairwiseLambda(y, f, x);
// }

// TEST_F(LambdaTests, test_basic_2) {
//     auto x = NDArrayFactory::create<double>('c', {5});
//     auto e = NDArrayFactory::create<double>('c', {5}, {1., 1., 1., 1., 1.});

//     test(x);

//     ASSERT_EQ(e, x);
// }

// TEST_F(LambdaTests, test_basic_3) {
//     auto x = NDArrayFactory::create<float>('c', {5});
//     auto e = NDArrayFactory::create<float>('c', {5}, {1.f, 1.f, 1.f, 1.f, 1.f});

//     test(x);

//     ASSERT_EQ(e, x);
// }

// TEST_F(LambdaTests, test_basic_4) {
//     auto x = NDArrayFactory::create<float>('c', {5});
//     auto e = NDArrayFactory::create<float>('c', {5}, {1.f, 1.f, 1.f, 1.f, 1.f});

//     test2<float>(x);

//     ASSERT_EQ(e, x);
// }

// TEST_F(LambdaTests, test_basic_5) {
//     auto x = NDArrayFactory::create<double>('c', {5}, {1., 1., 1., 1., 1.});
//     auto y = NDArrayFactory::create<double>('c', {5}, {2., 2., 2., 2., 2.});
//     auto e = NDArrayFactory::create<double>('c', {5}, {4., 4., 4., 4., 4.});

//     testPairwise(x, y);

//     ASSERT_EQ(e, x);
// }

// TEST_F(LambdaTests, test_basic_6) {
//     auto x = NDArrayFactory::create<double>('c', {5});
//     auto e = NDArrayFactory::create<double>('c', {5}, {1., 2., 3., 4., 5.});

//     testIndexed(x);

//     ASSERT_EQ(e, x);
// }

// TEST_F(LambdaTests, test_basic_7) {
//     auto w = NDArrayFactory::create<double>('c', {5}, {0., 0., 0., 0., 0.});
//     auto x = NDArrayFactory::create<double>('c', {5}, {1., 1., 1., 1., 1.});
//     auto y = NDArrayFactory::create<double>('c', {5}, {2., 2., 2., 2., 2.});
//     auto e = NDArrayFactory::create<double>('c', {5}, {5., 5., 5., 5., 5.});

//     testTriplewise(w, x, y);

//     ASSERT_EQ(e, w);
// }

// TEST_F(LambdaTests, test_basic_8) {
//     auto x = NDArrayFactory::create<double>('c', {5}, {1., 1., 1., 1., 1.});
//     auto y = NDArrayFactory::create<double>('c', {5}, {2., 2., 2., 2., 2.});
//     auto e = NDArrayFactory::create<double>('c', {5}, {4., 5., 6., 7., 8.});

//     testIndexedPairwise(x, y);

//     ASSERT_EQ(e, x);
// }


// template <typename T>
// void testPairwiseMy(NDArray &x, NDArray &y, NDArray &z) {

//     auto f = LAMBDA_TT(x, y){
//         return sd::math::nd4j_max<T>(x, (T)0.f)
//               - x * y
//               + sd::math::nd4j_log<T,T>((T)1.f
//                 + sd::math::nd4j_exp<T,T>(-sd::math::nd4j_abs(x)));
//     };

//     x.applyPairwiseLambda(y, f, z);
// }

// ///////////////////////////////////////////////////////////////////
// TEST_F(LambdaTests, test_basic_9) {

//     NDArray labels('c', {2,3,4},{0,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0});
//     NDArray logits('c', {2,3,4}, sd::DataType::DOUBLE);
//     NDArray output('c', {2,3,4}, sd::DataType::DOUBLE);
//     NDArray expected('c', {2,3,4}, {0.744397, 0.598139, 0.554355, 0.913015, 0.474077, 1.037488, 0.403186, 1.171101, 0.341154, 1.313262, 0.287335, 1.463282, 0.241008, 1.620417, 0.201413, 1.783901, 0.167786, 1.952978, 2.039387, 0.126928, 0.115520, 2.305083, 0.095545, 2.486836});

//     logits.linspace(0.1, 0.1);

//     NDArray::prepareSpecialUse({&output}, {&logits, &labels});
//     testPairwiseMy<double>(logits, labels, output);
//     NDArray::registerSpecialUse({&output}, {&logits, &labels});

//     // output.printBuffer(nullptr, -1, true);
//     ASSERT_TRUE(expected.equalsTo(output));
// }