101 lines
4.0 KiB
Java
101 lines
4.0 KiB
Java
/*
|
|
* ******************************************************************************
|
|
* *
|
|
* *
|
|
* * This program and the accompanying materials are made available under the
|
|
* * terms of the Apache License, Version 2.0 which is available at
|
|
* * https://www.apache.org/licenses/LICENSE-2.0.
|
|
* *
|
|
* * See the NOTICE file distributed with this work for additional
|
|
* * information regarding copyright ownership.
|
|
* * Unless required by applicable law or agreed to in writing, software
|
|
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
* * License for the specific language governing permissions and limitations
|
|
* * under the License.
|
|
* *
|
|
* * SPDX-License-Identifier: Apache-2.0
|
|
* *****************************************************************************
|
|
*/
|
|
|
|
package org.deeplearning4j.nn.params;
|
|
|
|
import java.util.Map;
|
|
import lombok.val;
|
|
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
|
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
|
import org.nd4j.linalg.factory.Nd4j;
|
|
import org.nd4j.linalg.indexing.NDArrayIndex;
|
|
|
|
/**
|
|
* Pretrain weight initializer.
|
|
* Has the visible bias as well as hidden and weight matrix.
|
|
*
|
|
* @author Adam Gibson
|
|
*/
|
|
public class PretrainParamInitializer extends DefaultParamInitializer {
|
|
|
|
private static final PretrainParamInitializer INSTANCE = new PretrainParamInitializer();
|
|
|
|
public static PretrainParamInitializer getInstance() {
|
|
return INSTANCE;
|
|
}
|
|
|
|
public final static String VISIBLE_BIAS_KEY = "v" + DefaultParamInitializer.BIAS_KEY;
|
|
|
|
@Override
|
|
public long numParams(LayerConfiguration conf) {
|
|
org.deeplearning4j.nn.conf.layers.BasePretrainNetwork layerConf =
|
|
(org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf;
|
|
return super.numParams(conf) + layerConf.getNIn();
|
|
}
|
|
|
|
@Override
|
|
public Map<String, INDArray> init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) {
|
|
Map<String, INDArray> params = super.init(conf, paramsView, initializeParams);
|
|
|
|
org.deeplearning4j.nn.conf.layers.BasePretrainNetwork layerConf =
|
|
(org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf;
|
|
val nIn = layerConf.getNIn();
|
|
val nOut = layerConf.getNOut();
|
|
val nWeightParams = nIn * nOut;
|
|
|
|
INDArray visibleBiasView = paramsView.get(NDArrayIndex.interval(0,0,true),
|
|
NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nIn));
|
|
params.put(VISIBLE_BIAS_KEY, createVisibleBias(conf, visibleBiasView, initializeParams));
|
|
conf.addVariable(VISIBLE_BIAS_KEY);
|
|
|
|
return params;
|
|
}
|
|
|
|
protected INDArray createVisibleBias(LayerConfiguration conf, INDArray visibleBiasView,
|
|
boolean initializeParameters) {
|
|
org.deeplearning4j.nn.conf.layers.BasePretrainNetwork layerConf =
|
|
(org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf;
|
|
if (initializeParameters) {
|
|
INDArray ret = Nd4j.valueArrayOf(new long[]{1, layerConf.getNIn()}, layerConf.getVisibleBiasInit());
|
|
visibleBiasView.assign(ret);
|
|
}
|
|
return visibleBiasView;
|
|
}
|
|
|
|
|
|
@Override
|
|
public Map<String, INDArray> getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) {
|
|
Map<String, INDArray> out = super.getGradientsFromFlattened(conf, gradientView);
|
|
org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
|
|
(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf;
|
|
|
|
val nIn = layerConf.getNIn();
|
|
val nOut = layerConf.getNOut();
|
|
val nWeightParams = nIn * nOut;
|
|
|
|
INDArray vBiasView = gradientView.get(NDArrayIndex.interval(0,0,true),
|
|
NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nIn));
|
|
|
|
out.put(VISIBLE_BIAS_KEY, vBiasView);
|
|
|
|
return out;
|
|
}
|
|
}
|