* - start working on bp for lstm Signed-off-by: Yurii <iuriish@yahoo.com> * - further working on bp for lstmLayer Signed-off-by: Yurii <iuriish@yahoo.com> * - minor change Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 2 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 3 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 4 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 5 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 6 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 7 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 8 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 9 Signed-off-by: Yurii <iuriish@yahoo.com> * - provide lstmLayerCell and lstmLayerCellBp as separate CUSTOM_OPs Signed-off-by: Yurii <iuriish@yahoo.com> * - testing and fixing lstmLayerCellBp helper Signed-off-by: Yurii <iuriish@yahoo.com> * - implement lstmLayerCellBp as separate op Signed-off-by: Yurii <iuriish@yahoo.com> * - implement lstmLayerBp as separate op (not tested) Signed-off-by: Yurii <iuriish@yahoo.com> * - fixing calculations of dLdWp and dLdb in lstmLayerCellBp Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 10 Signed-off-by: Yurii <iuriish@yahoo.com> * - fixing typo in lstmLayerTimeLoop Signed-off-by: Yurii <iuriish@yahoo.com> * - forgot to perform clipping of c array and calculate corresponding derivative in lstmLayerCellBp Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 10 Signed-off-by: Yurii <iuriish@yahoo.com> * - testing and fixing bugs in lstmLayer_bp op 1 Signed-off-by: Yurii <iuriish@yahoo.com> * - testing and fixing bugs in lstmLayer_bp op 2 Signed-off-by: Yurii <iuriish@yahoo.com> * - turn off heavy tests for cuda for lstmLayer_bp op Signed-off-by: Yurii <iuriish@yahoo.com> * - forgot to nullify gradients at eliminated time steps (when sequnce length array is present ) Signed-off-by: Yurii <iuriish@yahoo.com>
		
			
				
	
	
		
			174 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			174 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*******************************************************************************
 | |
|  * Copyright (c) 2015-2018 Skymind, Inc.
 | |
|  *
 | |
|  * This program and the accompanying materials are made available under the
 | |
|  * terms of the Apache License, Version 2.0 which is available at
 | |
|  * https://www.apache.org/licenses/LICENSE-2.0.
 | |
|  *
 | |
|  * Unless required by applicable law or agreed to in writing, software
 | |
|  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 | |
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 | |
|  * License for the specific language governing permissions and limitations
 | |
|  * under the License.
 | |
|  *
 | |
|  * SPDX-License-Identifier: Apache-2.0
 | |
|  ******************************************************************************/
 | |
| 
 | |
| //
 | |
| // @author Yurii Shyrma (iuriish@yahoo.com), created on 16.07.2018
 | |
| //
 | |
| 
 | |
| #include <helpers/GradCheck.h>
 | |
| #include <array/NDArrayFactory.h>
 | |
| 
 | |
| 
 | |
| namespace sd {
 | |
| 
 | |
| //////////////////////////////////////////////////////////////////////////
 | |
| void GradCheck::fillGradArrays(const LossFunc loss, const std::vector<NDArray*>& gradArrs) {
 | |
| 
 | |
| 	const int numInGradArrs = gradArrs.size();
 | |
| 
 | |
| 	// fill input gradient arrays in accordance to type of loss function
 | |
| 	switch(loss) {
 | |
| 
 | |
| 		case MEAN:
 | |
| 			for(int i = 0; i < numInGradArrs; ++i)
 | |
| 				*gradArrs[i] = 1. / gradArrs[i]->lengthOf();
 | |
| 			break;
 | |
| 
 | |
| 		case SUM:
 | |
| 			for(int i = 0; i < numInGradArrs; ++i)
 | |
| 				*gradArrs[i] = 1.;
 | |
| 			break;
 | |
| 
 | |
| 		default:
 | |
| 			throw std::invalid_argument("GradCheck::fillGradArrays: invalid type of loss function !");
 | |
| 	}
 | |
| }
 | |
| 
 | |
| //////////////////////////////////////////////////////////////////////////
 | |
| bool GradCheck::checkGrad(ops::DeclarableOp& opFF, ops::DeclarableOp& opBP, const OpArgsHolder& argsHolderFF, const OpArgsHolder& argsHolderBP,
 | |
| 	                      const std::vector<bool>& whatArrsToCheck, const std::vector<double>& idxRange, const LossFunc loss, const std::vector<int>& outArrsFFIdx) {
 | |
| 
 | |
| 	const int numInArrsFF     = argsHolderFF.getNumInArrs();						// at the same time numInArrsFF = number of output arrays in opBP
 | |
| 	const int numInGradArrsBP = argsHolderBP.getNumInArrs() - numInArrsFF;  		// because argsHolderBP.getNumInArrs() = numInArrsFF + numInGradArrsBP
 | |
| 	const std::vector<NDArray*>& inArrsFF = argsHolderFF.getInArrs();
 | |
| 	const std::vector<NDArray*>& inArrsBP = argsHolderBP.getInArrs();
 | |
| 
 | |
| 	// fill input gradient arrays in accordance to kind of loss function
 | |
| 	fillGradArrays(loss, std::vector<NDArray*>(&inArrsBP[numInArrsFF], &inArrsBP[numInArrsFF + numInGradArrsBP]));
 | |
| 
 | |
| 	// back prop pass
 | |
| 	ResultSet outArrsBP = opBP.execute(argsHolderBP);		// number of output arrays in back prop = numInArrsFF;
 | |
| 
 | |
| 	NDArray tmpScalar(sd::DataType::DOUBLE, inArrsFF[0]->getContext()); // scalar = 0
 | |
| 
 | |
| 	for(int i = 0; i < numInArrsFF; ++i) {							// loop through input array
 | |
| 
 | |
| 		if(!whatArrsToCheck.empty() && static_cast<bool>(whatArrsToCheck[i]) == false)
 | |
| 			continue;
 | |
| 
 | |
| 		const Nd4jLong idxStart = static_cast<Nd4jLong>(idxRange[0] * inArrsFF[i]->lengthOf());
 | |
| 		const Nd4jLong idxEnd   = static_cast<Nd4jLong>(idxRange[1] * inArrsFF[i]->lengthOf());
 | |
| 
 | |
| 		for(Nd4jLong j = idxStart; j < idxEnd; ++j) {			// loop through all elements for current array
 | |
| 
 | |
| 			const double orig = inArrsFF[i]->e<double>(j);
 | |
| 
 | |
| 			// add epsilon, feed forward
 | |
| 			inArrsFF[i]->p<double>(j, orig + EPSILON);
 | |
| 			ResultSet outArrsFF = opFF.execute(argsHolderFF);
 | |
| 			int numOutArrs = outArrsFF.size();
 | |
| 			double scorePlus = 0.;
 | |
| 
 | |
| 			if(!outArrsFFIdx.empty()) {
 | |
| 				for(const auto& k : outArrsFFIdx) {                // loop through independent output arrays
 | |
| 					if(loss == SUM)
 | |
| 						outArrsFF.at(k)->reduceNumber(reduce::Sum, tmpScalar);
 | |
| 					else
 | |
| 						outArrsFF.at(k)->reduceNumber(reduce::Mean, tmpScalar);
 | |
| 					scorePlus += tmpScalar.e<double>(0);
 | |
| 				}
 | |
| 			}
 | |
| 			else {
 | |
| 				for(int k = 0; k < numOutArrs; ++k) {                // loop through output arrays
 | |
| 					if(loss == SUM)
 | |
| 						outArrsFF.at(k)->reduceNumber(reduce::Sum, tmpScalar);
 | |
| 					else
 | |
| 						outArrsFF.at(k)->reduceNumber(reduce::Mean, tmpScalar);
 | |
| 					scorePlus += tmpScalar.e<double>(0);
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			// subtract epsilon, feed forward
 | |
| 			inArrsFF[i]->p<double>(j, orig - EPSILON);
 | |
| 			outArrsFF = opFF.execute(argsHolderFF);
 | |
| 			double scoreMinus = 0.;
 | |
| 
 | |
| 			if(!outArrsFFIdx.empty()) {
 | |
| 				for(const auto& k : outArrsFFIdx) {                // loop through independent output arrays
 | |
| 					if(loss == SUM)
 | |
| 						outArrsFF.at(k)->reduceNumber(reduce::Sum, tmpScalar);
 | |
| 					else
 | |
| 						outArrsFF.at(k)->reduceNumber(reduce::Mean, tmpScalar);
 | |
| 					scoreMinus += tmpScalar.e<double>(0);
 | |
| 				}
 | |
| 			}
 | |
| 			else {
 | |
| 				for(int k = 0; k < numOutArrs; ++k) {            // loop through output arrays
 | |
| 					if(loss == SUM)
 | |
| 						outArrsFF.at(k)->reduceNumber(reduce::Sum, tmpScalar);
 | |
| 					else
 | |
| 						outArrsFF.at(k)->reduceNumber(reduce::Mean, tmpScalar);
 | |
| 					scoreMinus += tmpScalar.e<double>(0);
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			// restore initial element value
 | |
| 			inArrsFF[i]->p<double>(j, orig);
 | |
| 
 | |
| 			// calculate numerical gradient
 | |
| 			const double numericalGrad = (scorePlus - scoreMinus) / (2 * EPSILON);
 | |
| 			if(std::isnan(numericalGrad) || std::isinf(numericalGrad)) {
 | |
| 				printf("GradCheck::checkGrad: got wrong value for numerical gradient for input array # %i and its element at position %lld ! \n", i, j);
 | |
| 				throw std::runtime_error("");
 | |
| 			}
 | |
| 
 | |
| 			// get analytical gradient
 | |
| 			const double analyticGrad = outArrsBP.at(i)->e<double>(j);
 | |
| 			if(std::isnan(analyticGrad) || std::isinf(analyticGrad)) {
 | |
| 				printf("GradCheck::checkGrad: got wrong value for analytical gradient for input array # %i and its element at position %lld ! \n", i, j);
 | |
| 				throw std::runtime_error("");
 | |
| 			}
 | |
| 
 | |
| 			// printf("%lld: num = %.15f, ana = %.15f\n", j, numericalGrad, analyticGrad);
 | |
| 
 | |
| 			// calculate relative error
 | |
| 			double relError;
 | |
| 			if(numericalGrad == 0. && analyticGrad == 0.)
 | |
| 				relError = 0.;
 | |
| 			else
 | |
|                 relError = math::nd4j_abs<double>(analyticGrad - numericalGrad) / (math::nd4j_abs<double>(analyticGrad) + math::nd4j_abs<double>(numericalGrad));
 | |
| 
 | |
|             // verify result
 | |
|             if(relError > MAXRELERR || std::isnan(relError)) {
 | |
| 
 | |
|             	if(math::nd4j_abs<double>(analyticGrad - numericalGrad) < MINABSERR)
 | |
|             		continue;
 | |
|             	printf("numericalGrad = %.15f,  analyticGrad = %.15f \n", numericalGrad, analyticGrad);
 | |
|             	printf("GradCheck::checkGrad: got RELERROR = %f > MAXRELERROR(%f) for input array # %i and its element at position %lld ! \n", relError, MAXRELERR, i, j);
 | |
|             	return false;
 | |
|             }
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| }
 | |
| 
 | |
| 
 |