2022-09-20 15:40:53 +02:00
/ *
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* *
* * This program and the accompanying materials are made available under the
* * terms of the Apache License , Version 2.0 which is available at
* * https : //www.apache.org/licenses/LICENSE-2.0.
* *
* * See the NOTICE file distributed with this work for additional
* * information regarding copyright ownership .
* * Unless required by applicable law or agreed to in writing , software
* * distributed under the License is distributed on an "AS IS" BASIS , WITHOUT
* * WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied . See the
* * License for the specific language governing permissions and limitations
* * under the License .
* *
* * SPDX - License - Identifier : Apache - 2.0
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*
* /
package net.brutex.spark ;
//import net.brutex.ai.performance.storage.PostgresStatsStorage;
import lombok.extern.slf4j.Slf4j ;
import org.datavec.api.records.reader.RecordReader ;
import org.datavec.api.records.reader.impl.collection.ListStringRecordReader ;
import org.datavec.api.records.reader.impl.csv.CSVRecordReader ;
import org.datavec.api.split.FileSplit ;
import org.datavec.api.split.ListStringSplit ;
import org.datavec.image.recordreader.ImageRecordReader ;
import org.deeplearning4j.core.storage.StatsStorage ;
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator ;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration ;
2023-03-23 17:39:00 +01:00
import org.deeplearning4j.nn.conf.layers.DenseLayer ;
2022-09-20 15:40:53 +02:00
import org.deeplearning4j.nn.conf.layers.LSTM ;
import org.deeplearning4j.nn.conf.layers.OutputLayer ;
import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor ;
import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor ;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork ;
import org.deeplearning4j.nn.weights.WeightInit ;
import org.deeplearning4j.optimize.listeners.ScoreIterationListener ;
import org.deeplearning4j.ui.api.UIServer ;
import org.deeplearning4j.ui.model.stats.StatsListener ;
import org.deeplearning4j.ui.model.storage.FileStatsStorage ;
import org.junit.jupiter.api.AfterAll ;
import org.junit.jupiter.api.Tag ;
import org.junit.jupiter.api.Test ;
import org.nd4j.evaluation.classification.Evaluation ;
import org.nd4j.linalg.activations.Activation ;
import org.nd4j.linalg.api.ndarray.INDArray ;
import org.nd4j.linalg.dataset.SplitTestAndTrain ;
import org.nd4j.linalg.dataset.api.DataSet ;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator ;
import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization ;
import org.nd4j.linalg.dataset.api.preprocessor.ImagePreProcessingScaler ;
import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize ;
import org.nd4j.linalg.factory.Nd4j ;
import org.nd4j.linalg.learning.config.Nesterovs ;
import org.nd4j.linalg.lossfunctions.impl.LossMCXENT ;
import java.io.* ;
import java.util.ArrayList ;
import java.util.Arrays ;
import java.util.Iterator ;
import java.util.List ;
@Slf4j
@Tag ( "integration" )
public class TestServer2 {
@AfterAll
public static void tidyUp ( ) throws Exception {
UIServer . stopInstance ( ) ;
}
@Test
public void runServer ( ) throws InterruptedException , IOException {
/ *
for ( int page = 1 ; page < = 10 ; page + + ) {
Connection xx = Jsoup . connect ( "https://www.ebay.de/b/Bier-Bierdeckel-fur-Sammler/8734/bn_16579776?rt=nc&_dmd=1&_pgn=" + page ) ;
Elements xxx = xx . get ( ) . body ( ) . select ( ".s-item__image-img" ) ;
File datafile = new File ( "c:\\temp\\img_dump.csv" ) ;
int ifile = 0 ;
for ( Element e : xxx ) {
log . info ( e . toString ( ) ) ;
String imgurl = e . attr ( "src" ) ;
if ( ! imgurl . endsWith ( ".jpg" ) ) {
imgurl = e . attr ( "data-src" ) ;
}
Connection . Response res = Jsoup . connect ( imgurl ) . ignoreContentType ( true ) . execute ( ) ;
FileOutputStream out = new FileOutputStream ( new File ( "c:\\temp\\imgdump\\" + page + ifile + ".jpg" ) ) ;
out . write ( res . bodyAsBytes ( ) ) ;
out . close ( ) ;
FileUtils . writeStringToFile ( datafile , e . attr ( "alt" ) . toLowerCase ( ) . replace ( ";" , "" ) + ";" + page + ifile + ".jpg" + "\r\n" , Charset . defaultCharset ( ) , true ) ;
ifile + + ;
}
}
* /
RecordReader rrr = new ImageRecordReader ( 32 , 32 , 3 ) ;
rrr . initialize ( new FileSplit ( new File ( "c:\\temp\\imgdump\\" ) ) ) ;
DataSetIterator diter = new RecordReaderDataSetIterator . Builder ( rrr , 12 )
. classification ( 1 , 3 )
. preProcessor ( new ImagePreProcessingScaler ( ) )
. build ( ) ;
log . info ( "Using backend: " + Nd4j . getBackend ( ) ) ;
UIServer ui = UIServer . getInstance ( ) ;
log . info ( "Port:" + ui . getPort ( ) ) ;
//Get our network and training data
//MultiLayerNetwork net = UIExampleUtils.getMnistNetwork();
//DataSetIterator trainData = UIExampleUtils.getMnistData();
int i = 2000 ;
int numClasses = 10 ;
int numBatchSize = 100 ;
2023-03-23 17:39:00 +01:00
NeuralNetConfiguration conf = NeuralNetConfiguration . builder ( )
2022-09-20 15:40:53 +02:00
. seed ( 1234 )
. weightInit ( WeightInit . XAVIER )
. updater ( new Nesterovs . Builder ( ) . learningRate ( 0.15 ) . build ( ) )
. activation ( Activation . RELU )
. l2 ( 0 )
2023-03-23 17:39:00 +01:00
2022-09-20 15:40:53 +02:00
//.layer(0, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 5).stride(1,1).padding(0,2).nOut(1).name("1st Filter").updater(new Adam.Builder().learningRate(0.2).build()).build())
//.layer(1, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 2).stride(1,2).padding(0,0).nOut(1).name("2nd Filter").updater(new Adam.Builder().learningRate(0.1).build()).build())
2023-03-23 17:39:00 +01:00
// .layer(1, new DenseLayerConfiguration.Builder().nIn(10).nOut(64).activation(Activation.RELU).build())
2022-09-20 15:40:53 +02:00
. layer ( 0 , new DenseLayer . Builder ( ) . nIn ( 10 ) . nOut ( 100 ) . activation ( Activation . RELU ) . l2 ( 0.003 ) . build ( ) )
. layer ( 1 , new LSTM . Builder ( ) . nIn ( 100 ) . nOut ( 100 ) . activation ( Activation . TANH ) . build ( ) )
. layer ( 2 , new LSTM . Builder ( ) . nIn ( 100 ) . nOut ( 100 ) . activation ( Activation . TANH ) . build ( ) )
. layer ( 3 , new DenseLayer . Builder ( ) . nIn ( 100 ) . nOut ( 16 ) . activation ( Activation . RELU ) . l2 ( 0.001 ) . build ( ) )
. layer ( 4 , new OutputLayer . Builder ( ) . nIn ( 16 ) . nOut ( numClasses )
. activation ( Activation . SOFTMAX )
. lossFunction ( new LossMCXENT ( ) )
. build ( )
)
//.inputPreProcessor(0, new FeedForwardToCnnPreProcessor(1,10, 1))
//.inputPreProcessor(2, new CnnToFeedForwardPreProcessor())
. inputPreProcessor ( 1 , new FeedForwardToRnnPreProcessor ( ) )
. inputPreProcessor ( 3 , new RnnToFeedForwardPreProcessor ( ) )
. build ( ) ;
MultiLayerNetwork net = new MultiLayerNetwork ( conf ) ;
RecordReader trainrecords = new CSVRecordReader ( 0 , ';' ) ;
File dataFile = new File ( "c://temp/werte2-medium.csv" ) ;
trainrecords . initialize ( new FileSplit ( dataFile ) ) ;
/ *
DataSetIterator iter = new RecordReaderDataSetIterator . Builder ( trainrecords , numBatchSize )
. classification ( 10 , numClasses )
. build ( )
;
* /
List < INDArray > featuresTrain = new ArrayList < INDArray > ( ) ;
List < INDArray > labelsTrain = new ArrayList < INDArray > ( ) ;
List < INDArray > featuresTest = new ArrayList < INDArray > ( ) ;
List < INDArray > labelsTest = new ArrayList < INDArray > ( ) ;
List < INDArray > rawLabels = new ArrayList < INDArray > ( ) ;
List < INDArray > rawTrainLabels = new ArrayList < INDArray > ( ) ;
INDArray indexes = null ;
while ( diter . hasNext ( ) ) {
DataSet next = diter . next ( ) ;
SplitTestAndTrain split = next . splitTestAndTrain ( 0.9 ) ;
DataSet dsTest = split . getTest ( ) ;
DataSet dsTrain = split . getTrain ( ) ;
DataNormalization normalizer = new NormalizerStandardize ( ) ;
normalizer . fit ( dsTrain ) ;
normalizer . transform ( dsTrain ) ;
normalizer . transform ( dsTest ) ;
featuresTrain . add ( dsTrain . getFeatures ( ) ) ;
labelsTrain . add ( dsTrain . getLabels ( ) ) ;
rawTrainLabels . add ( dsTrain . getLabels ( ) ) ;
featuresTest . add ( dsTest . getFeatures ( ) ) ;
rawLabels . add ( dsTest . getLabels ( ) ) ;
indexes = Nd4j . argMax ( dsTest . getLabels ( ) , 1 ) ;
labelsTest . add ( indexes ) ;
}
//Configure where the network information (gradients, activations, score vs. time etc) is to be stored
//Then add the StatsListener to collect this information from the network, as it trains
File logFile = new File ( "c://temp/" , "ui-stats-brian.dl4j" ) ;
logFile . delete ( ) ;
StatsStorage statsStorage = new FileStatsStorage ( logFile ) ;
//PostgresStatsStorage psqlStore = new PostgresStatsStorage();
int listenerFrequency = 2 ;
//net.setListeners(new StatsListener(psqlStore, listenerFrequency), new StatsListener(statsStorage, listenerFrequency), new ScoreIterationListener(200));
2023-03-23 17:39:00 +01:00
net . addTrainingListeners ( new StatsListener ( statsStorage , listenerFrequency ) , new ScoreIterationListener ( 200 ) ) ;
2022-09-20 15:40:53 +02:00
//Attach the StatsStorage instance to the UI: this allows the contents of the StatsStorage to be visualized
ui . attach ( statsStorage ) ;
Iterator < INDArray > labelsTrainIterator = labelsTrain . iterator ( ) ;
File deb = new File ( "c:\\temp\\debug.txt" ) ;
OutputStream out = new BufferedOutputStream ( new FileOutputStream ( deb ) ) ;
while ( i > 0 ) {
for ( INDArray a : featuresTrain ) {
net . fit ( a , labelsTrainIterator . next ( ) ) ;
}
labelsTrainIterator = labelsTrain . iterator ( ) ;
i - - ;
//Play Visualisation
/ *
NDArrayStrings fm = new NDArrayStrings ( " | " ) ;
Nd4j . writeTxt ( net . getLayer ( 1 ) . getGradientsViewArray ( ) , "c:/temp/dump" + i + ".txt" ) ;
out . write ( fm . format ( net . getLayer ( 1 ) . getGradientsViewArray ( ) , false ) . getBytes ( StandardCharsets . UTF_8 ) ) ;
out . write ( 10 ) ;
out . write ( 13 ) ;
out . write ( net . getLayer ( 1 ) . toString ( ) . getBytes ( StandardCharsets . UTF_8 ) ) ;
out . write ( 10 ) ;
out . write ( 13 ) ;
* /
}
out . close ( ) ;
//Thread.sleep(60000);
List < String > tt = new ArrayList < > ( ) ;
tt . addAll ( Arrays . asList ( "1" , "2" , "3" , "3" , "5" , "6" , "7" , "8" , "9" , "5" ) ) ;
List < List < String > > ttt = new ArrayList ( ) ;
ttt . add ( tt ) ;
RecordReader rr = new ListStringRecordReader ( ) ;
rr . initialize ( new ListStringSplit ( ttt ) ) ;
DataSetIterator dataIter = new RecordReaderDataSetIterator ( rr , 1 ) ;
org . nd4j . linalg . dataset . DataSet set = dataIter . next ( ) ;
log . info ( "Brian out:" + net . score ( set ) ) ;
log . info ( "Brian out:" + net . f1Score ( set ) ) ;
log . info ( "============================== Training Data =======================================" ) ;
runEval ( numClasses , featuresTrain , rawTrainLabels , net ) ;
log . info ( "====================================================================================" ) ;
log . info ( "============================== Test Data =======================================" ) ;
runEval ( numClasses , featuresTest , rawLabels , net ) ;
log . info ( "====================================================================================" ) ;
}
void runEval ( int numClasses , List < INDArray > trainingData , List < INDArray > trainingLabels , MultiLayerNetwork network ) {
Evaluation eval = new Evaluation ( numClasses ) ;
Iterator < INDArray > testIterator = trainingData . iterator ( ) ;
Iterator < INDArray > labelsIterator = trainingLabels . iterator ( ) ;
while ( testIterator . hasNext ( ) ) {
INDArray output = network . output ( testIterator . next ( ) ) ;
eval . eval ( labelsIterator . next ( ) , output ) ;
}
log . info ( eval . stats ( ) ) ;
}
}