* #8568 ArrayUtil optimization Signed-off-by: AlexDBlack <blacka101@gmail.com> * #6171 Keras ReLU and ELU support Signed-off-by: AlexDBlack <blacka101@gmail.com> * Keras softmax layer import Signed-off-by: AlexDBlack <blacka101@gmail.com> * #8549 Webjars dependency management Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix for TF import names ':0' suffix issue / NPE Signed-off-by: AlexDBlack <blacka101@gmail.com> * BiasAdd: fix default data format for TF import Signed-off-by: AlexDBlack <blacka101@gmail.com> * Update zoo test ignores Signed-off-by: AlexDBlack <blacka101@gmail.com> * #8509 SameDiff Listener API - provide frame + iteration Signed-off-by: AlexDBlack <blacka101@gmail.com> * #8520 ND4J Environment Signed-off-by: AlexDBlack <blacka101@gmail.com> * Deconv3d Signed-off-by: AlexDBlack <blacka101@gmail.com> * Deconv3d fixes + gradient check Signed-off-by: AlexDBlack <blacka101@gmail.com> * Conv3d fixes + deconv3d DType test Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix issue with deconv3d gradinet check weight init Signed-off-by: AlexDBlack <blacka101@gmail.com> * #8579 Fix BaseCudaDataBuffer constructor fix for UINT16 Signed-off-by: AlexDBlack <blacka101@gmail.com> * DataType.isNumerical() returns false for BOOL type Signed-off-by: AlexDBlack <blacka101@gmail.com> * #8504 Reduce Spark log spam for tests Signed-off-by: AlexDBlack <blacka101@gmail.com> * Clean up DL4J gradient check test spam Signed-off-by: AlexDBlack <blacka101@gmail.com> * More Gradient check spam reduction Signed-off-by: AlexDBlack <blacka101@gmail.com> * SameDiff test spam reduction Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fixes for FlatBuffers mapping Signed-off-by: AlexDBlack <blacka101@gmail.com> * SameDiff log spam cleanup Signed-off-by: AlexDBlack <blacka101@gmail.com> * Tests should extend BaseNd4jTest Signed-off-by: AlexDBlack <blacka101@gmail.com> * Remove debug line in c++ op Signed-off-by: AlexDBlack <blacka101@gmail.com> * ND4J test spam cleanup Signed-off-by: AlexDBlack <blacka101@gmail.com> * DL4J test spam reduction Signed-off-by: AlexDBlack <blacka101@gmail.com> * More Dl4J and datavec test spam cleanup Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix for bad conv3d test Signed-off-by: AlexDBlack <blacka101@gmail.com> * Additional test Signed-off-by: AlexDBlack <blacka101@gmail.com> * Embedding layers: don't inherit global default activation function Signed-off-by: AlexDBlack <blacka101@gmail.com> * Trigger CI Signed-off-by: AlexDBlack <blacka101@gmail.com> * Consolidate all BaseDL4JTest classes to single class used everywhere; make timeout configurable per class Signed-off-by: AlexDBlack <blacka101@gmail.com> * Test fixes and timeout increases Signed-off-by: AlexDBlack <blacka101@gmail.com> * Timeouts and PReLU fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * Restore libnd4j build threads arg for CUDA build Signed-off-by: AlexDBlack <blacka101@gmail.com> * Increase timeouts on a few tests to avoid spurious failures on some CI machines Signed-off-by: AlexDBlack <blacka101@gmail.com> * More timeout fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * More test timeout fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * Tweak timeout for one more test Signed-off-by: AlexDBlack <blacka101@gmail.com> * Final tweaks Signed-off-by: AlexDBlack <blacka101@gmail.com> * One more ignore Signed-off-by: AlexDBlack <blacka101@gmail.com>
420 lines
18 KiB
Java
420 lines
18 KiB
Java
/*******************************************************************************
|
|
* Copyright (c) 2015-2018 Skymind, Inc.
|
|
*
|
|
* This program and the accompanying materials are made available under the
|
|
* terms of the Apache License, Version 2.0 which is available at
|
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
* License for the specific language governing permissions and limitations
|
|
* under the License.
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
******************************************************************************/
|
|
|
|
package org.deeplearning4j.nn.multilayer;
|
|
|
|
import org.deeplearning4j.BaseDL4JTest;
|
|
import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
|
|
import org.deeplearning4j.nn.api.Layer;
|
|
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
|
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
|
import org.deeplearning4j.nn.conf.layers.DenseLayer;
|
|
import org.deeplearning4j.nn.conf.layers.OutputLayer;
|
|
import org.deeplearning4j.nn.gradient.Gradient;
|
|
import org.deeplearning4j.nn.params.DefaultParamInitializer;
|
|
import org.deeplearning4j.nn.weights.WeightInit;
|
|
import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
|
|
import org.junit.Test;
|
|
import org.nd4j.linalg.activations.Activation;
|
|
import org.nd4j.linalg.api.buffer.DataType;
|
|
import org.nd4j.linalg.api.iter.NdIndexIterator;
|
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
|
import org.nd4j.linalg.api.ops.impl.transforms.strict.SigmoidDerivative;
|
|
import org.nd4j.linalg.api.ops.impl.transforms.strict.TanhDerivative;
|
|
import org.nd4j.linalg.dataset.DataSet;
|
|
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
|
import org.nd4j.linalg.factory.Nd4j;
|
|
import org.nd4j.linalg.learning.config.Sgd;
|
|
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
|
|
import org.nd4j.linalg.ops.transforms.Transforms;
|
|
|
|
import java.util.Arrays;
|
|
|
|
import static org.junit.Assert.assertArrayEquals;
|
|
import static org.junit.Assert.fail;
|
|
|
|
public class BackPropMLPTest extends BaseDL4JTest {
|
|
|
|
@Test
|
|
public void testMLPTrivial() {
|
|
//Simplest possible case: 1 hidden layer, 1 hidden neuron, batch size of 1.
|
|
MultiLayerNetwork network = new MultiLayerNetwork(getIrisMLPSimpleConfig(new int[] {1}, Activation.SIGMOID));
|
|
network.setListeners(new ScoreIterationListener(1));
|
|
network.init();
|
|
|
|
DataSetIterator iter = new IrisDataSetIterator(1, 10);
|
|
|
|
while (iter.hasNext())
|
|
network.fit(iter.next());
|
|
}
|
|
|
|
@Test
|
|
public void testMLP() {
|
|
//Simple mini-batch test with multiple hidden layers
|
|
MultiLayerConfiguration conf = getIrisMLPSimpleConfig(new int[] {5, 4, 3}, Activation.SIGMOID);
|
|
// System.out.println(conf);
|
|
MultiLayerNetwork network = new MultiLayerNetwork(conf);
|
|
network.init();
|
|
DataSetIterator iter = new IrisDataSetIterator(10, 100);
|
|
|
|
while (iter.hasNext()) {
|
|
network.fit(iter.next());
|
|
}
|
|
}
|
|
|
|
@Test
|
|
public void testMLP2() {
|
|
//Simple mini-batch test with multiple hidden layers
|
|
MultiLayerConfiguration conf = getIrisMLPSimpleConfig(new int[] {5, 15, 3}, Activation.TANH);
|
|
// System.out.println(conf);
|
|
MultiLayerNetwork network = new MultiLayerNetwork(conf);
|
|
network.init();
|
|
|
|
DataSetIterator iter = new IrisDataSetIterator(12, 120);
|
|
|
|
while (iter.hasNext()) {
|
|
network.fit(iter.next());
|
|
}
|
|
}
|
|
|
|
@Test
|
|
public void testSingleExampleWeightUpdates() {
|
|
//Simplest possible case: 1 hidden layer, 1 hidden neuron, batch size of 1.
|
|
//Manually calculate weight updates (entirely outside of DL4J and ND4J)
|
|
// and compare expected and actual weights after backprop
|
|
|
|
DataSetIterator iris = new IrisDataSetIterator(1, 10);
|
|
|
|
MultiLayerNetwork network = new MultiLayerNetwork(getIrisMLPSimpleConfig(new int[] {1}, Activation.SIGMOID));
|
|
network.init();
|
|
|
|
Layer[] layers = network.getLayers();
|
|
|
|
final boolean printCalculations = false;
|
|
|
|
while (iris.hasNext()) {
|
|
DataSet data = iris.next();
|
|
INDArray x = data.getFeatures();
|
|
INDArray y = data.getLabels();
|
|
float[] xFloat = asFloat(x);
|
|
float[] yFloat = asFloat(y);
|
|
|
|
//Do forward pass:
|
|
INDArray l1Weights = layers[0].getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); //Hidden layer
|
|
INDArray l2Weights = layers[1].getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); //Output layer
|
|
INDArray l1Bias = layers[0].getParam(DefaultParamInitializer.BIAS_KEY).dup();
|
|
INDArray l2Bias = layers[1].getParam(DefaultParamInitializer.BIAS_KEY).dup();
|
|
float[] l1WeightsFloat = asFloat(l1Weights);
|
|
float[] l2WeightsFloat = asFloat(l2Weights);
|
|
float l1BiasFloat = l1Bias.getFloat(0);
|
|
float[] l2BiasFloatArray = asFloat(l2Bias);
|
|
|
|
float hiddenUnitPreSigmoid = dotProduct(l1WeightsFloat, xFloat) + l1BiasFloat; //z=w*x+b
|
|
float hiddenUnitPostSigmoid = sigmoid(hiddenUnitPreSigmoid); //a=sigma(z)
|
|
|
|
float[] outputPreSoftmax = new float[3];
|
|
//Normally a matrix multiplication here, but only one hidden unit in this trivial example
|
|
for (int i = 0; i < 3; i++) {
|
|
outputPreSoftmax[i] = hiddenUnitPostSigmoid * l2WeightsFloat[i] + l2BiasFloatArray[i];
|
|
}
|
|
float[] outputPostSoftmax = softmax(outputPreSoftmax);
|
|
|
|
//Do backward pass:
|
|
float[] deltaOut = vectorDifference(outputPostSoftmax, yFloat); //out-labels
|
|
//deltaHidden = sigmaPrime(hiddenUnitZ) * sum_k (w_jk * \delta_k); here, only one j
|
|
float deltaHidden = 0.0f;
|
|
for (int i = 0; i < 3; i++)
|
|
deltaHidden += l2WeightsFloat[i] * deltaOut[i];
|
|
deltaHidden *= derivOfSigmoid(hiddenUnitPreSigmoid);
|
|
|
|
//Calculate weight/bias updates:
|
|
//dL/dW = delta * (activation of prev. layer)
|
|
//dL/db = delta
|
|
float[] dLdwOut = new float[3];
|
|
for (int i = 0; i < dLdwOut.length; i++)
|
|
dLdwOut[i] = deltaOut[i] * hiddenUnitPostSigmoid;
|
|
float[] dLdwHidden = new float[4];
|
|
for (int i = 0; i < dLdwHidden.length; i++)
|
|
dLdwHidden[i] = deltaHidden * xFloat[i];
|
|
float[] dLdbOut = deltaOut;
|
|
float dLdbHidden = deltaHidden;
|
|
|
|
if (printCalculations) {
|
|
System.out.println("deltaOut = " + Arrays.toString(deltaOut));
|
|
System.out.println("deltaHidden = " + deltaHidden);
|
|
System.out.println("dLdwOut = " + Arrays.toString(dLdwOut));
|
|
System.out.println("dLdbOut = " + Arrays.toString(dLdbOut));
|
|
System.out.println("dLdwHidden = " + Arrays.toString(dLdwHidden));
|
|
System.out.println("dLdbHidden = " + dLdbHidden);
|
|
}
|
|
|
|
|
|
//Calculate new parameters:
|
|
//w_i = w_i - (learningRate)/(batchSize) * sum_j (dL_j/dw_i)
|
|
//b_i = b_i - (learningRate)/(batchSize) * sum_j (dL_j/db_i)
|
|
//Which for batch size of one (here) is simply:
|
|
//w_i = w_i - learningRate * dL/dW
|
|
//b_i = b_i - learningRate * dL/db
|
|
float[] expectedL1WeightsAfter = new float[4];
|
|
float[] expectedL2WeightsAfter = new float[3];
|
|
float expectedL1BiasAfter = l1BiasFloat - 0.1f * dLdbHidden;
|
|
float[] expectedL2BiasAfter = new float[3];
|
|
|
|
for (int i = 0; i < 4; i++)
|
|
expectedL1WeightsAfter[i] = l1WeightsFloat[i] - 0.1f * dLdwHidden[i];
|
|
for (int i = 0; i < 3; i++)
|
|
expectedL2WeightsAfter[i] = l2WeightsFloat[i] - 0.1f * dLdwOut[i];
|
|
for (int i = 0; i < 3; i++)
|
|
expectedL2BiasAfter[i] = l2BiasFloatArray[i] - 0.1f * dLdbOut[i];
|
|
|
|
|
|
//Finally, do back-prop on network, and compare parameters vs. expected parameters
|
|
network.fit(data);
|
|
|
|
/* INDArray l1WeightsAfter = layers[0].getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); //Hidden layer
|
|
INDArray l2WeightsAfter = layers[1].getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); //Output layer
|
|
INDArray l1BiasAfter = layers[0].getParam(DefaultParamInitializer.BIAS_KEY).dup();
|
|
INDArray l2BiasAfter = layers[1].getParam(DefaultParamInitializer.BIAS_KEY).dup();
|
|
float[] l1WeightsFloatAfter = asFloat(l1WeightsAfter);
|
|
float[] l2WeightsFloatAfter = asFloat(l2WeightsAfter);
|
|
float l1BiasFloatAfter = l1BiasAfter.getFloat(0);
|
|
float[] l2BiasFloatAfter = asFloat(l2BiasAfter);
|
|
|
|
if( printCalculations) {
|
|
System.out.println("Expected L1 weights = " + Arrays.toString(expectedL1WeightsAfter));
|
|
System.out.println("Actual L1 weights = " + Arrays.toString(asFloat(l1WeightsAfter)));
|
|
System.out.println("Expected L2 weights = " + Arrays.toString(expectedL2WeightsAfter));
|
|
System.out.println("Actual L2 weights = " + Arrays.toString(asFloat(l2WeightsAfter)));
|
|
System.out.println("Expected L1 bias = " + expectedL1BiasAfter);
|
|
System.out.println("Actual L1 bias = " + Arrays.toString(asFloat(l1BiasAfter)));
|
|
System.out.println("Expected L2 bias = " + Arrays.toString(expectedL2BiasAfter));
|
|
System.out.println("Actual L2 bias = " + Arrays.toString(asFloat(l2BiasAfter)));
|
|
}
|
|
|
|
|
|
float eps = 1e-4f;
|
|
assertArrayEquals(l1WeightsFloatAfter,expectedL1WeightsAfter,eps);
|
|
assertArrayEquals(l2WeightsFloatAfter,expectedL2WeightsAfter,eps);
|
|
assertEquals(l1BiasFloatAfter,expectedL1BiasAfter,eps);
|
|
assertArrayEquals(l2BiasFloatAfter,expectedL2BiasAfter,eps);
|
|
*/
|
|
// System.out.println("\n\n--------------");
|
|
}
|
|
}
|
|
|
|
|
|
@Test
|
|
public void testMLPGradientCalculation() {
|
|
testIrisMiniBatchGradients(1, new int[] {1}, Activation.SIGMOID);
|
|
testIrisMiniBatchGradients(1, new int[] {5}, Activation.SIGMOID);
|
|
testIrisMiniBatchGradients(12, new int[] {15, 25, 10}, Activation.SIGMOID);
|
|
testIrisMiniBatchGradients(50, new int[] {10, 50, 200, 50, 10}, Activation.TANH);
|
|
testIrisMiniBatchGradients(150, new int[] {30, 50, 20}, Activation.TANH);
|
|
}
|
|
|
|
private static void testIrisMiniBatchGradients(int miniBatchSize, int[] hiddenLayerSizes,
|
|
Activation activationFunction) {
|
|
int totalExamples = 10 * miniBatchSize;
|
|
if (totalExamples > 150) {
|
|
totalExamples = miniBatchSize * (150 / miniBatchSize);
|
|
}
|
|
if (miniBatchSize > 150) {
|
|
fail();
|
|
}
|
|
DataSetIterator iris = new IrisDataSetIterator(miniBatchSize, totalExamples);
|
|
|
|
MultiLayerNetwork network = new MultiLayerNetwork(getIrisMLPSimpleConfig(hiddenLayerSizes, Activation.SIGMOID));
|
|
network.init();
|
|
|
|
Layer[] layers = network.getLayers();
|
|
int nLayers = layers.length;
|
|
|
|
while (iris.hasNext()) {
|
|
DataSet data = iris.next();
|
|
INDArray x = data.getFeatures();
|
|
INDArray y = data.getLabels();
|
|
|
|
//Do forward pass:
|
|
INDArray[] layerWeights = new INDArray[nLayers];
|
|
INDArray[] layerBiases = new INDArray[nLayers];
|
|
for (int i = 0; i < nLayers; i++) {
|
|
layerWeights[i] = layers[i].getParam(DefaultParamInitializer.WEIGHT_KEY).dup();
|
|
layerBiases[i] = layers[i].getParam(DefaultParamInitializer.BIAS_KEY).dup();
|
|
}
|
|
|
|
INDArray[] layerZs = new INDArray[nLayers];
|
|
INDArray[] layerActivations = new INDArray[nLayers];
|
|
for (int i = 0; i < nLayers; i++) {
|
|
INDArray layerInput = (i == 0 ? x : layerActivations[i - 1]);
|
|
layerZs[i] = layerInput.castTo(layerWeights[i].dataType()).mmul(layerWeights[i]).addiRowVector(layerBiases[i]);
|
|
layerActivations[i] = (i == nLayers - 1 ? doSoftmax(layerZs[i].dup()) : doSigmoid(layerZs[i].dup()));
|
|
}
|
|
|
|
//Do backward pass:
|
|
INDArray[] deltas = new INDArray[nLayers];
|
|
deltas[nLayers - 1] = layerActivations[nLayers - 1].sub(y.castTo(layerActivations[nLayers-1].dataType())); //Out - labels; shape=[miniBatchSize,nOut];
|
|
assertArrayEquals(deltas[nLayers - 1].shape(), new long[] {miniBatchSize, 3});
|
|
for (int i = nLayers - 2; i >= 0; i--) {
|
|
INDArray sigmaPrimeOfZ;
|
|
sigmaPrimeOfZ = doSigmoidDerivative(layerZs[i]);
|
|
INDArray epsilon = layerWeights[i + 1].mmul(deltas[i + 1].transpose()).transpose();
|
|
deltas[i] = epsilon.mul(sigmaPrimeOfZ);
|
|
assertArrayEquals(deltas[i].shape(), new long[] {miniBatchSize, hiddenLayerSizes[i]});
|
|
}
|
|
|
|
INDArray[] dLdw = new INDArray[nLayers];
|
|
INDArray[] dLdb = new INDArray[nLayers];
|
|
for (int i = 0; i < nLayers; i++) {
|
|
INDArray prevActivations = (i == 0 ? x : layerActivations[i - 1]);
|
|
//Raw gradients, so not yet divided by mini-batch size (division is done in BaseUpdater)
|
|
dLdw[i] = deltas[i].transpose().castTo(prevActivations.dataType()).mmul(prevActivations).transpose(); //Shape: [nIn, nOut]
|
|
dLdb[i] = deltas[i].sum(true, 0); //Shape: [1,nOut]
|
|
|
|
int nIn = (i == 0 ? 4 : hiddenLayerSizes[i - 1]);
|
|
int nOut = (i < nLayers - 1 ? hiddenLayerSizes[i] : 3);
|
|
assertArrayEquals(dLdw[i].shape(), new long[] {nIn, nOut});
|
|
assertArrayEquals(dLdb[i].shape(), new long[] {1, nOut});
|
|
}
|
|
|
|
|
|
//Calculate and get gradient, compare to expected
|
|
network.setInput(x);
|
|
network.setLabels(y);
|
|
network.computeGradientAndScore();
|
|
Gradient gradient = network.gradientAndScore().getFirst();
|
|
|
|
float eps = 1e-4f;
|
|
for (int i = 0; i < hiddenLayerSizes.length; i++) {
|
|
String wKey = i + "_" + DefaultParamInitializer.WEIGHT_KEY;
|
|
String bKey = i + "_" + DefaultParamInitializer.BIAS_KEY;
|
|
INDArray wGrad = gradient.getGradientFor(wKey);
|
|
INDArray bGrad = gradient.getGradientFor(bKey);
|
|
float[] wGradf = asFloat(wGrad);
|
|
float[] bGradf = asFloat(bGrad);
|
|
float[] expWGradf = asFloat(dLdw[i]);
|
|
float[] expBGradf = asFloat(dLdb[i]);
|
|
assertArrayEquals(wGradf, expWGradf, eps);
|
|
assertArrayEquals(bGradf, expBGradf, eps);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/** Very simple back-prop config set up for Iris.
|
|
* Learning Rate = 0.1
|
|
* No regularization, no Adagrad, no momentum etc. One iteration.
|
|
*/
|
|
private static MultiLayerConfiguration getIrisMLPSimpleConfig(int[] hiddenLayerSizes,
|
|
Activation activationFunction) {
|
|
NeuralNetConfiguration.ListBuilder lb = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
|
|
.seed(12345L).list();
|
|
|
|
for (int i = 0; i < hiddenLayerSizes.length; i++) {
|
|
int nIn = (i == 0 ? 4 : hiddenLayerSizes[i - 1]);
|
|
lb.layer(i, new DenseLayer.Builder().nIn(nIn).nOut(hiddenLayerSizes[i]).weightInit(WeightInit.XAVIER)
|
|
.activation(activationFunction).build());
|
|
}
|
|
|
|
lb.layer(hiddenLayerSizes.length,
|
|
new OutputLayer.Builder(LossFunction.MCXENT).nIn(hiddenLayerSizes[hiddenLayerSizes.length - 1])
|
|
.nOut(3).weightInit(WeightInit.XAVIER)
|
|
.activation(activationFunction.equals(Activation.IDENTITY) ? Activation.IDENTITY
|
|
: Activation.SOFTMAX)
|
|
.build());
|
|
|
|
return lb.build();
|
|
}
|
|
|
|
public static float[] asFloat(INDArray arr) {
|
|
long len = arr.length();
|
|
if (len > Integer.MAX_VALUE)
|
|
throw new ND4JArraySizeException();
|
|
float[] f = new float[(int) len];
|
|
NdIndexIterator iterator = new NdIndexIterator('c', arr.shape());
|
|
for (int i = 0; i < len; i++) {
|
|
f[i] = arr.getFloat(iterator.next());
|
|
}
|
|
return f;
|
|
}
|
|
|
|
public static float dotProduct(float[] x, float[] y) {
|
|
float sum = 0.0f;
|
|
for (int i = 0; i < x.length; i++)
|
|
sum += x[i] * y[i];
|
|
return sum;
|
|
}
|
|
|
|
public static float sigmoid(float in) {
|
|
return (float) (1.0 / (1.0 + Math.exp(-in)));
|
|
}
|
|
|
|
public static float[] sigmoid(float[] in) {
|
|
float[] out = new float[in.length];
|
|
for (int i = 0; i < in.length; i++) {
|
|
out[i] = sigmoid(in[i]);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
public static float derivOfSigmoid(float in) {
|
|
// float v = (float)( Math.exp(in) / Math.pow(1+Math.exp(in),2.0) );
|
|
float v = in * (1 - in);
|
|
return v;
|
|
}
|
|
|
|
public static float[] derivOfSigmoid(float[] in) {
|
|
float[] out = new float[in.length];
|
|
for (int i = 0; i < in.length; i++) {
|
|
out[i] = derivOfSigmoid(in[i]);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
public static float[] softmax(float[] in) {
|
|
float[] out = new float[in.length];
|
|
float sumExp = 0.0f;
|
|
for (int i = 0; i < in.length; i++) {
|
|
sumExp += Math.exp(in[i]);
|
|
}
|
|
for (int i = 0; i < in.length; i++) {
|
|
out[i] = (float) Math.exp(in[i]) / sumExp;
|
|
}
|
|
return out;
|
|
}
|
|
|
|
public static float[] vectorDifference(float[] x, float[] y) {
|
|
float[] out = new float[x.length];
|
|
for (int i = 0; i < x.length; i++) {
|
|
out[i] = x[i] - y[i];
|
|
}
|
|
return out;
|
|
}
|
|
|
|
public static INDArray doSoftmax(INDArray input) {
|
|
return Transforms.softmax(input, true);
|
|
}
|
|
|
|
public static INDArray doSigmoid(INDArray input) {
|
|
return Transforms.sigmoid(input, true);
|
|
}
|
|
|
|
public static INDArray doSigmoidDerivative(INDArray input) {
|
|
return Nd4j.getExecutioner().exec(new SigmoidDerivative(input.dup()));
|
|
}
|
|
|
|
}
|