Datavec code cleaup (#9071)
* removed unnecessary semicolons Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Use standard charset object Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com> * Removed unused imports Signed-off-by: Dariusz Zbyrad <dariusz.zbyrad@gmail.com>
This commit is contained in:
		
							parent
							
								
									7f4f3b61f5
								
							
						
					
					
						commit
						4394965cb5
					
				@ -23,9 +23,6 @@ import org.datavec.api.formats.output.OutputFormat;
 | 
			
		||||
import org.datavec.api.records.writer.RecordWriter;
 | 
			
		||||
import org.datavec.api.records.writer.impl.csv.CSVRecordWriter;
 | 
			
		||||
 | 
			
		||||
import java.io.File;
 | 
			
		||||
import java.io.FileNotFoundException;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Creates an @link{CSVRecordWriter}
 | 
			
		||||
 *
 | 
			
		||||
 | 
			
		||||
@ -23,9 +23,6 @@ import org.datavec.api.formats.output.OutputFormat;
 | 
			
		||||
import org.datavec.api.records.writer.RecordWriter;
 | 
			
		||||
import org.datavec.api.records.writer.impl.LineRecordWriter;
 | 
			
		||||
 | 
			
		||||
import java.io.File;
 | 
			
		||||
import java.io.FileNotFoundException;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Line output format
 | 
			
		||||
 * @author Adam Gibson
 | 
			
		||||
 | 
			
		||||
@ -24,8 +24,6 @@ import org.datavec.api.formats.output.OutputFormat;
 | 
			
		||||
import org.datavec.api.records.writer.RecordWriter;
 | 
			
		||||
import org.datavec.api.records.writer.impl.misc.SVMLightRecordWriter;
 | 
			
		||||
 | 
			
		||||
import java.io.File;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Created by agibsonccc on 1/11/15.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
@ -22,6 +22,7 @@ import org.datavec.api.writable.Text;
 | 
			
		||||
import org.datavec.api.writable.Writable;
 | 
			
		||||
 | 
			
		||||
import java.io.*;
 | 
			
		||||
import java.nio.charset.StandardCharsets;
 | 
			
		||||
import java.util.zip.GZIPInputStream;
 | 
			
		||||
import java.util.zip.GZIPOutputStream;
 | 
			
		||||
 | 
			
		||||
@ -79,12 +80,12 @@ public final class WritableUtils {
 | 
			
		||||
        byte[] bytes = readCompressedByteArray(in);
 | 
			
		||||
        if (bytes == null)
 | 
			
		||||
            return null;
 | 
			
		||||
        return new String(bytes, "UTF-8");
 | 
			
		||||
        return new String(bytes, StandardCharsets.UTF_8);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    public static int writeCompressedString(DataOutput out, String s) throws IOException {
 | 
			
		||||
        return writeCompressedByteArray(out, (s != null) ? s.getBytes("UTF-8") : null);
 | 
			
		||||
        return writeCompressedByteArray(out, (s != null) ? s.getBytes(StandardCharsets.UTF_8) : null);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
@ -96,7 +97,7 @@ public final class WritableUtils {
 | 
			
		||||
     */
 | 
			
		||||
    public static void writeString(DataOutput out, String s) throws IOException {
 | 
			
		||||
        if (s != null) {
 | 
			
		||||
            byte[] buffer = s.getBytes("UTF-8");
 | 
			
		||||
            byte[] buffer = s.getBytes(StandardCharsets.UTF_8);
 | 
			
		||||
            int len = buffer.length;
 | 
			
		||||
            out.writeInt(len);
 | 
			
		||||
            out.write(buffer, 0, len);
 | 
			
		||||
@ -117,7 +118,7 @@ public final class WritableUtils {
 | 
			
		||||
            return null;
 | 
			
		||||
        byte[] buffer = new byte[length];
 | 
			
		||||
        in.readFully(buffer); // could/should use readFully(buffer,0,length)?
 | 
			
		||||
        return new String(buffer, "UTF-8");
 | 
			
		||||
        return new String(buffer, StandardCharsets.UTF_8);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -19,7 +19,6 @@ package org.datavec.api.io.labels;
 | 
			
		||||
import org.datavec.api.writable.Writable;
 | 
			
		||||
 | 
			
		||||
import java.io.Serializable;
 | 
			
		||||
import java.net.URI;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 | 
			
		||||
@ -27,7 +27,6 @@ import org.datavec.api.writable.Writable;
 | 
			
		||||
import java.io.DataInputStream;
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.net.URI;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 | 
			
		||||
@ -16,16 +16,11 @@
 | 
			
		||||
 | 
			
		||||
package org.datavec.api.records.reader.impl.jackson;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.Arrays;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
import java.util.Map;
 | 
			
		||||
 | 
			
		||||
import org.datavec.api.records.reader.impl.LineRecordReader;
 | 
			
		||||
import org.datavec.api.writable.Text;
 | 
			
		||||
import org.datavec.api.writable.Writable;
 | 
			
		||||
import org.nd4j.shade.jackson.core.type.TypeReference;
 | 
			
		||||
import org.nd4j.shade.jackson.databind.ObjectMapper;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 | 
			
		||||
@ -28,14 +28,12 @@ import org.datavec.api.records.metadata.RecordMetaDataURI;
 | 
			
		||||
import org.datavec.api.records.reader.BaseRecordReader;
 | 
			
		||||
import org.datavec.api.split.FileSplit;
 | 
			
		||||
import org.datavec.api.split.InputSplit;
 | 
			
		||||
import org.datavec.api.writable.Text;
 | 
			
		||||
import org.datavec.api.writable.Writable;
 | 
			
		||||
import org.nd4j.shade.jackson.core.type.TypeReference;
 | 
			
		||||
import org.nd4j.shade.jackson.databind.ObjectMapper;
 | 
			
		||||
 | 
			
		||||
import java.io.*;
 | 
			
		||||
import java.net.URI;
 | 
			
		||||
import java.nio.charset.Charset;
 | 
			
		||||
import java.nio.charset.StandardCharsets;
 | 
			
		||||
import java.util.*;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -34,6 +34,7 @@ import org.slf4j.LoggerFactory;
 | 
			
		||||
import java.io.*;
 | 
			
		||||
import java.net.URI;
 | 
			
		||||
import java.nio.charset.Charset;
 | 
			
		||||
import java.nio.charset.StandardCharsets;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.Collections;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
@ -57,7 +58,7 @@ import java.util.regex.Pattern;
 | 
			
		||||
 */
 | 
			
		||||
public class RegexSequenceRecordReader extends FileRecordReader implements SequenceRecordReader {
 | 
			
		||||
    public static final String SKIP_NUM_LINES = NAME_SPACE + ".skipnumlines";
 | 
			
		||||
    public static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8");
 | 
			
		||||
    public static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
 | 
			
		||||
    public static final LineErrorHandling DEFAULT_ERROR_HANDLING = LineErrorHandling.FailOnInvalid;
 | 
			
		||||
 | 
			
		||||
    /**Error handling mode: How should invalid lines (i.e., those that don't match the provided regex) be handled?<br>
 | 
			
		||||
@ -67,7 +68,7 @@ public class RegexSequenceRecordReader extends FileRecordReader implements Seque
 | 
			
		||||
     */
 | 
			
		||||
    public enum LineErrorHandling {
 | 
			
		||||
        FailOnInvalid, SkipInvalid, SkipInvalidWithWarning
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public static final Logger LOG = LoggerFactory.getLogger(RegexSequenceRecordReader.class);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -16,14 +16,12 @@
 | 
			
		||||
 | 
			
		||||
package org.datavec.api.records.reader.impl.transform;
 | 
			
		||||
 | 
			
		||||
import lombok.AllArgsConstructor;
 | 
			
		||||
import org.datavec.api.conf.Configuration;
 | 
			
		||||
import org.datavec.api.records.Record;
 | 
			
		||||
import org.datavec.api.records.listener.RecordListener;
 | 
			
		||||
import org.datavec.api.records.metadata.RecordMetaData;
 | 
			
		||||
import org.datavec.api.records.reader.RecordReader;
 | 
			
		||||
import org.datavec.api.split.InputSplit;
 | 
			
		||||
import org.datavec.api.transform.Transform;
 | 
			
		||||
import org.datavec.api.transform.TransformProcess;
 | 
			
		||||
import org.datavec.api.writable.Writable;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -25,6 +25,7 @@ import org.datavec.api.split.partition.Partitioner;
 | 
			
		||||
import java.io.DataOutputStream;
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.nio.charset.Charset;
 | 
			
		||||
import java.nio.charset.StandardCharsets;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Write to files.
 | 
			
		||||
@ -38,7 +39,7 @@ import java.nio.charset.Charset;
 | 
			
		||||
 */
 | 
			
		||||
public abstract class FileRecordWriter implements RecordWriter {
 | 
			
		||||
 | 
			
		||||
    public static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8");
 | 
			
		||||
    public static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
 | 
			
		||||
 | 
			
		||||
    protected DataOutputStream out;
 | 
			
		||||
    public final static String NEW_LINE = "\n";
 | 
			
		||||
 | 
			
		||||
@ -17,10 +17,6 @@
 | 
			
		||||
package org.datavec.api.records.writer.impl.misc;
 | 
			
		||||
 | 
			
		||||
import lombok.extern.slf4j.Slf4j;
 | 
			
		||||
import org.datavec.api.conf.Configuration;
 | 
			
		||||
 | 
			
		||||
import java.io.File;
 | 
			
		||||
import java.io.FileNotFoundException;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 | 
			
		||||
@ -21,7 +21,6 @@ import java.net.URI;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.Arrays;
 | 
			
		||||
import java.util.Collection;
 | 
			
		||||
import java.util.LinkedList;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * A simple InputSplit based on a collection of URIs
 | 
			
		||||
 | 
			
		||||
@ -18,11 +18,9 @@ package org.datavec.api.split;
 | 
			
		||||
 | 
			
		||||
import lombok.extern.slf4j.Slf4j;
 | 
			
		||||
import org.datavec.api.util.files.UriFromPathIterator;
 | 
			
		||||
import org.datavec.api.writable.WritableType;
 | 
			
		||||
 | 
			
		||||
import java.io.*;
 | 
			
		||||
import java.net.URI;
 | 
			
		||||
import java.nio.file.Paths;
 | 
			
		||||
import java.util.Iterator;
 | 
			
		||||
import java.util.NoSuchElementException;
 | 
			
		||||
import java.util.regex.Matcher;
 | 
			
		||||
 | 
			
		||||
@ -26,7 +26,6 @@ import org.datavec.api.transform.schema.Schema;
 | 
			
		||||
import org.datavec.api.transform.serde.JsonMappers;
 | 
			
		||||
import org.datavec.api.transform.serde.JsonSerializer;
 | 
			
		||||
import org.datavec.api.transform.serde.YamlSerializer;
 | 
			
		||||
import org.nd4j.shade.jackson.annotation.JsonSubTypes;
 | 
			
		||||
import org.nd4j.shade.jackson.annotation.JsonTypeInfo;
 | 
			
		||||
import org.nd4j.shade.jackson.databind.JsonNode;
 | 
			
		||||
import org.nd4j.shade.jackson.databind.ObjectMapper;
 | 
			
		||||
 | 
			
		||||
@ -49,27 +49,27 @@ public class DoubleAnalysisCounter implements AnalysisCounter<DoubleAnalysisCoun
 | 
			
		||||
  private TDigest digest = TDigest.createDigest(100);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    public DoubleAnalysisCounter() {};
 | 
			
		||||
    public DoubleAnalysisCounter() {}
 | 
			
		||||
 | 
			
		||||
    public double getMinValueSeen() {
 | 
			
		||||
        return counter.getMin();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public double getMaxValueSeen() {
 | 
			
		||||
        return counter.getMax();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public double getSum() {
 | 
			
		||||
        return counter.getSum();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public long getCountTotal() {
 | 
			
		||||
        return counter.getCount();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public double getSampleStdev() {
 | 
			
		||||
        return counter.getStddev(false);
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public double getMean() {
 | 
			
		||||
        return counter.getMean();
 | 
			
		||||
@ -105,7 +105,7 @@ public class DoubleAnalysisCounter implements AnalysisCounter<DoubleAnalysisCoun
 | 
			
		||||
            countPositive++;
 | 
			
		||||
        } else {
 | 
			
		||||
            countNegative++;
 | 
			
		||||
        } ;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        digest.add(value);
 | 
			
		||||
        counter.add(value);
 | 
			
		||||
 | 
			
		||||
@ -47,27 +47,27 @@ public class IntegerAnalysisCounter implements AnalysisCounter<IntegerAnalysisCo
 | 
			
		||||
     */
 | 
			
		||||
    private TDigest digest = TDigest.createDigest(100);
 | 
			
		||||
 | 
			
		||||
    public IntegerAnalysisCounter() {};
 | 
			
		||||
    public IntegerAnalysisCounter() {}
 | 
			
		||||
 | 
			
		||||
    public int getMinValueSeen() {
 | 
			
		||||
        return (int) counter.getMin();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public int getMaxValueSeen() {
 | 
			
		||||
        return (int) counter.getMax();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public long getSum() {
 | 
			
		||||
        return (long) counter.getSum();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public long getCountTotal() {
 | 
			
		||||
        return counter.getCount();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public double getSampleStdev() {
 | 
			
		||||
        return counter.getStddev(false);
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public double getMean() {
 | 
			
		||||
        return counter.getMean();
 | 
			
		||||
@ -100,7 +100,7 @@ public class IntegerAnalysisCounter implements AnalysisCounter<IntegerAnalysisCo
 | 
			
		||||
            countPositive++;
 | 
			
		||||
        } else {
 | 
			
		||||
            countNegative++;
 | 
			
		||||
        } ;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        digest.add((double) value);
 | 
			
		||||
        counter.add((double) value);
 | 
			
		||||
 | 
			
		||||
@ -47,23 +47,23 @@ public class LongAnalysisCounter implements AnalysisCounter<LongAnalysisCounter>
 | 
			
		||||
     */
 | 
			
		||||
    private TDigest digest = TDigest.createDigest(100);
 | 
			
		||||
 | 
			
		||||
    public LongAnalysisCounter() {};
 | 
			
		||||
    public LongAnalysisCounter() {}
 | 
			
		||||
 | 
			
		||||
    public long getMinValueSeen() {
 | 
			
		||||
        return (long) counter.getMin();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public long getMaxValueSeen() {
 | 
			
		||||
        return (long) counter.getMax();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public long getSum() {
 | 
			
		||||
        return (long) counter.getSum();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public long getCountTotal() {
 | 
			
		||||
        return counter.getCount();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public double getSampleStdev() {
 | 
			
		||||
        return counter.getStddev(false);
 | 
			
		||||
@ -100,7 +100,7 @@ public class LongAnalysisCounter implements AnalysisCounter<LongAnalysisCounter>
 | 
			
		||||
            countPositive++;
 | 
			
		||||
        } else {
 | 
			
		||||
            countNegative++;
 | 
			
		||||
        } ;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        digest.add((double) value);
 | 
			
		||||
        counter.add((double) value);
 | 
			
		||||
 | 
			
		||||
@ -35,27 +35,27 @@ public class StringAnalysisCounter implements AnalysisCounter<StringAnalysisCoun
 | 
			
		||||
    private long countMinLength = 0;
 | 
			
		||||
    private long countMaxLength = 0;
 | 
			
		||||
 | 
			
		||||
    public StringAnalysisCounter() {};
 | 
			
		||||
    public StringAnalysisCounter() {}
 | 
			
		||||
 | 
			
		||||
    public int getMinLengthSeen() {
 | 
			
		||||
        return (int) counter.getMin();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public int getMaxLengthSeen() {
 | 
			
		||||
        return (int) counter.getMax();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public long getSumLength() {
 | 
			
		||||
        return (long) counter.getSum();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public long getCountTotal() {
 | 
			
		||||
        return counter.getCount();
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public double getSampleStdev() {
 | 
			
		||||
        return counter.getStddev(false);
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public double getMean() {
 | 
			
		||||
        return counter.getMean();
 | 
			
		||||
 | 
			
		||||
@ -46,7 +46,7 @@ public class Join implements Serializable {
 | 
			
		||||
     */
 | 
			
		||||
    public enum JoinType {
 | 
			
		||||
        Inner, LeftOuter, RightOuter, FullOuter
 | 
			
		||||
    };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private JoinType joinType;
 | 
			
		||||
    private Schema leftSchema;
 | 
			
		||||
@ -196,7 +196,7 @@ public class Join implements Serializable {
 | 
			
		||||
 | 
			
		||||
        for (ColumnMetaData rightMeta : rightSchema.getColumnMetaData()) {
 | 
			
		||||
            if (keySetRight.contains(rightMeta.getName()))
 | 
			
		||||
                continue;;
 | 
			
		||||
                continue;
 | 
			
		||||
            metaDataOut.add(rightMeta);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -16,7 +16,6 @@
 | 
			
		||||
 | 
			
		||||
package org.datavec.api.transform.quality.columns;
 | 
			
		||||
 | 
			
		||||
import com.clearspring.analytics.stream.cardinality.CardinalityMergeException;
 | 
			
		||||
import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus;
 | 
			
		||||
import lombok.Data;
 | 
			
		||||
import lombok.EqualsAndHashCode;
 | 
			
		||||
 | 
			
		||||
@ -24,14 +24,8 @@ import org.datavec.api.transform.rank.CalculateSortedRank;
 | 
			
		||||
import org.datavec.api.transform.reduce.IAssociativeReducer;
 | 
			
		||||
import org.datavec.api.transform.sequence.SequenceComparator;
 | 
			
		||||
import org.datavec.api.transform.sequence.SequenceSplit;
 | 
			
		||||
import org.nd4j.shade.jackson.annotation.JsonAutoDetect;
 | 
			
		||||
import org.nd4j.shade.jackson.annotation.PropertyAccessor;
 | 
			
		||||
import org.nd4j.shade.jackson.core.JsonFactory;
 | 
			
		||||
import org.nd4j.shade.jackson.core.type.TypeReference;
 | 
			
		||||
import org.nd4j.shade.jackson.databind.DeserializationFeature;
 | 
			
		||||
import org.nd4j.shade.jackson.databind.ObjectMapper;
 | 
			
		||||
import org.nd4j.shade.jackson.databind.SerializationFeature;
 | 
			
		||||
import org.nd4j.shade.jackson.datatype.joda.JodaModule;
 | 
			
		||||
 | 
			
		||||
import java.util.Arrays;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
@ -16,17 +16,8 @@
 | 
			
		||||
 | 
			
		||||
package org.datavec.api.transform.serde;
 | 
			
		||||
 | 
			
		||||
import org.datavec.api.transform.Transform;
 | 
			
		||||
import org.datavec.api.transform.TransformProcess;
 | 
			
		||||
import org.datavec.api.transform.condition.Condition;
 | 
			
		||||
import org.datavec.api.transform.filter.Filter;
 | 
			
		||||
import org.datavec.api.transform.reduce.IAssociativeReducer;
 | 
			
		||||
import org.datavec.api.transform.sequence.SequenceComparator;
 | 
			
		||||
import org.nd4j.shade.jackson.core.JsonFactory;
 | 
			
		||||
import org.nd4j.shade.jackson.databind.ObjectMapper;
 | 
			
		||||
 | 
			
		||||
import java.util.Arrays;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Serializer used for converting objects (Transforms, Conditions, etc) to JSON format
 | 
			
		||||
 *
 | 
			
		||||
 | 
			
		||||
@ -16,16 +16,7 @@
 | 
			
		||||
 | 
			
		||||
package org.datavec.api.transform.serde;
 | 
			
		||||
 | 
			
		||||
import org.datavec.api.transform.Transform;
 | 
			
		||||
import org.datavec.api.transform.TransformProcess;
 | 
			
		||||
import org.datavec.api.transform.condition.Condition;
 | 
			
		||||
import org.datavec.api.transform.filter.Filter;
 | 
			
		||||
import org.datavec.api.transform.reduce.IAssociativeReducer;
 | 
			
		||||
import org.datavec.api.transform.sequence.SequenceComparator;
 | 
			
		||||
import org.nd4j.shade.jackson.databind.ObjectMapper;
 | 
			
		||||
import org.nd4j.shade.jackson.dataformat.yaml.YAMLFactory;
 | 
			
		||||
 | 
			
		||||
import java.util.Arrays;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Serializer used for converting objects (Transforms, Conditions, etc) to YAML format
 | 
			
		||||
 | 
			
		||||
@ -35,7 +35,6 @@ import org.joda.time.DateTimeFieldType;
 | 
			
		||||
import org.joda.time.DateTimeZone;
 | 
			
		||||
import org.joda.time.format.DateTimeFormat;
 | 
			
		||||
import org.joda.time.format.DateTimeFormatter;
 | 
			
		||||
import org.nd4j.shade.jackson.annotation.JsonIgnore;
 | 
			
		||||
import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties;
 | 
			
		||||
import org.nd4j.shade.jackson.annotation.JsonInclude;
 | 
			
		||||
import org.nd4j.shade.jackson.annotation.JsonProperty;
 | 
			
		||||
 | 
			
		||||
@ -32,7 +32,6 @@ import org.nd4j.shade.jackson.annotation.JsonProperty;
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.io.ObjectInputStream;
 | 
			
		||||
import java.io.ObjectOutputStream;
 | 
			
		||||
import java.text.DateFormat;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
import java.util.TimeZone;
 | 
			
		||||
 | 
			
		||||
@ -34,7 +34,7 @@ public class BooleanWritable implements WritableComparable {
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     */
 | 
			
		||||
    public BooleanWritable() {};
 | 
			
		||||
    public BooleanWritable() {}
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     */
 | 
			
		||||
 | 
			
		||||
@ -16,7 +16,6 @@
 | 
			
		||||
 | 
			
		||||
package org.datavec.api.writable;
 | 
			
		||||
 | 
			
		||||
import lombok.AllArgsConstructor;
 | 
			
		||||
import lombok.Getter;
 | 
			
		||||
import lombok.NoArgsConstructor;
 | 
			
		||||
import lombok.Setter;
 | 
			
		||||
@ -28,9 +27,7 @@ import java.io.DataInput;
 | 
			
		||||
import java.io.DataOutput;
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.nio.ByteBuffer;
 | 
			
		||||
import java.nio.ByteOrder;
 | 
			
		||||
import java.util.Arrays;
 | 
			
		||||
import java.util.Objects;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * {@link Writable} type for
 | 
			
		||||
 | 
			
		||||
@ -46,14 +46,14 @@ public class Text extends BinaryComparable implements WritableComparable<BinaryC
 | 
			
		||||
 | 
			
		||||
    private static ThreadLocal<CharsetEncoder> ENCODER_FACTORY = new ThreadLocal<CharsetEncoder>() {
 | 
			
		||||
        protected CharsetEncoder initialValue() {
 | 
			
		||||
            return Charset.forName("UTF-8").newEncoder().onMalformedInput(CodingErrorAction.REPORT)
 | 
			
		||||
            return StandardCharsets.UTF_8.newEncoder().onMalformedInput(CodingErrorAction.REPORT)
 | 
			
		||||
                            .onUnmappableCharacter(CodingErrorAction.REPORT);
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    private static ThreadLocal<CharsetDecoder> DECODER_FACTORY = new ThreadLocal<CharsetDecoder>() {
 | 
			
		||||
        protected CharsetDecoder initialValue() {
 | 
			
		||||
            return Charset.forName("UTF-8").newDecoder().onMalformedInput(CodingErrorAction.REPORT)
 | 
			
		||||
            return StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT)
 | 
			
		||||
                            .onUnmappableCharacter(CodingErrorAction.REPORT);
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
@ -17,7 +17,6 @@
 | 
			
		||||
package org.datavec.api.writable.comparator;
 | 
			
		||||
 | 
			
		||||
import lombok.AllArgsConstructor;
 | 
			
		||||
import org.datavec.api.writable.Writable;
 | 
			
		||||
 | 
			
		||||
import java.io.Serializable;
 | 
			
		||||
import java.util.Comparator;
 | 
			
		||||
 | 
			
		||||
@ -23,7 +23,6 @@ import org.datavec.api.records.writer.impl.misc.SVMLightRecordWriter;
 | 
			
		||||
import org.datavec.api.split.FileSplit;
 | 
			
		||||
import org.datavec.api.split.partition.NumberOfRecordsPartitioner;
 | 
			
		||||
import org.datavec.api.writable.*;
 | 
			
		||||
import org.datavec.api.writable.NDArrayWritable;
 | 
			
		||||
import org.junit.Test;
 | 
			
		||||
import org.nd4j.common.tests.BaseND4JTest;
 | 
			
		||||
import org.nd4j.linalg.api.ndarray.INDArray;
 | 
			
		||||
 | 
			
		||||
@ -20,7 +20,6 @@ import org.datavec.api.writable.Writable;
 | 
			
		||||
import org.junit.Test;
 | 
			
		||||
import org.nd4j.common.tests.BaseND4JTest;
 | 
			
		||||
 | 
			
		||||
import java.io.Serializable;
 | 
			
		||||
import java.util.*;
 | 
			
		||||
 | 
			
		||||
import static org.junit.Assert.assertTrue;
 | 
			
		||||
 | 
			
		||||
@ -22,8 +22,6 @@ import org.datavec.image.data.Image;
 | 
			
		||||
import org.datavec.image.transform.ImageTransform;
 | 
			
		||||
import org.nd4j.linalg.api.ndarray.INDArray;
 | 
			
		||||
import org.nd4j.common.util.ArchiveUtils;
 | 
			
		||||
import org.slf4j.Logger;
 | 
			
		||||
import org.slf4j.LoggerFactory;
 | 
			
		||||
 | 
			
		||||
import java.io.File;
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
 | 
			
		||||
@ -328,7 +328,7 @@ public class CifarLoader extends NativeImageLoader implements Serializable {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public Pair<INDArray, Mat> convertMat(byte[] byteFeature) {
 | 
			
		||||
        INDArray label = FeatureUtil.toOutcomeVector(byteFeature[0], NUM_LABELS);; // first value in the 3073 byte array
 | 
			
		||||
        INDArray label = FeatureUtil.toOutcomeVector(byteFeature[0], NUM_LABELS);// first value in the 3073 byte array
 | 
			
		||||
        Mat image = new Mat(HEIGHT, WIDTH, CV_8UC(CHANNELS)); // feature are 3072
 | 
			
		||||
        ByteBuffer imageData = image.createBuffer();
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -29,7 +29,6 @@ import org.nd4j.shade.jackson.annotation.JsonProperty;
 | 
			
		||||
import java.util.Random;
 | 
			
		||||
 | 
			
		||||
import org.bytedeco.opencv.opencv_core.*;
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_core.*;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Boxes images to a given width and height without changing their aspect ratios,
 | 
			
		||||
 | 
			
		||||
@ -24,8 +24,7 @@ import org.nd4j.shade.jackson.annotation.JsonInclude;
 | 
			
		||||
import java.util.Random;
 | 
			
		||||
 | 
			
		||||
import org.bytedeco.opencv.opencv_core.*;
 | 
			
		||||
import org.bytedeco.opencv.opencv_imgproc.*;
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_core.*;
 | 
			
		||||
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_imgproc.*;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 | 
			
		||||
@ -25,7 +25,6 @@ import org.nd4j.shade.jackson.annotation.JsonProperty;
 | 
			
		||||
import java.util.Random;
 | 
			
		||||
 | 
			
		||||
import org.bytedeco.opencv.opencv_core.*;
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_core.*;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Crops images deterministically or randomly.
 | 
			
		||||
 | 
			
		||||
@ -25,7 +25,7 @@ import org.nd4j.shade.jackson.annotation.JsonInclude;
 | 
			
		||||
import java.util.Random;
 | 
			
		||||
 | 
			
		||||
import org.bytedeco.opencv.opencv_core.*;
 | 
			
		||||
import org.bytedeco.opencv.opencv_imgproc.*;
 | 
			
		||||
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_core.*;
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_imgproc.*;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -24,8 +24,7 @@ import org.nd4j.linalg.factory.Nd4j;
 | 
			
		||||
import java.util.Random;
 | 
			
		||||
 | 
			
		||||
import org.bytedeco.opencv.opencv_core.*;
 | 
			
		||||
import org.bytedeco.opencv.opencv_imgproc.*;
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_core.*;
 | 
			
		||||
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_imgproc.*;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 | 
			
		||||
@ -22,7 +22,6 @@ import org.datavec.image.data.ImageWritable;
 | 
			
		||||
import java.util.Random;
 | 
			
		||||
 | 
			
		||||
import org.bytedeco.opencv.opencv_core.*;
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_core.*;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Transforms images deterministically or randomly with the help of an array of ImageTransform
 | 
			
		||||
 | 
			
		||||
@ -27,7 +27,6 @@ import org.nd4j.shade.jackson.annotation.JsonProperty;
 | 
			
		||||
import java.util.Random;
 | 
			
		||||
 | 
			
		||||
import org.bytedeco.opencv.opencv_core.*;
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_core.*;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Randomly crops an image to a desired output size. Will determine if
 | 
			
		||||
 | 
			
		||||
@ -25,8 +25,7 @@ import org.nd4j.shade.jackson.annotation.JsonProperty;
 | 
			
		||||
import java.util.Random;
 | 
			
		||||
 | 
			
		||||
import org.bytedeco.opencv.opencv_core.*;
 | 
			
		||||
import org.bytedeco.opencv.opencv_imgproc.*;
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_core.*;
 | 
			
		||||
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_imgproc.*;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 | 
			
		||||
@ -31,7 +31,7 @@ import java.nio.FloatBuffer;
 | 
			
		||||
import java.util.Random;
 | 
			
		||||
 | 
			
		||||
import org.bytedeco.opencv.opencv_core.*;
 | 
			
		||||
import org.bytedeco.opencv.opencv_imgproc.*;
 | 
			
		||||
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_core.*;
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_imgproc.*;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -25,8 +25,7 @@ import org.nd4j.shade.jackson.annotation.JsonProperty;
 | 
			
		||||
import java.util.Random;
 | 
			
		||||
 | 
			
		||||
import org.bytedeco.opencv.opencv_core.*;
 | 
			
		||||
import org.bytedeco.opencv.opencv_imgproc.*;
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_core.*;
 | 
			
		||||
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_imgproc.*;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 | 
			
		||||
@ -31,7 +31,7 @@ import java.nio.FloatBuffer;
 | 
			
		||||
import java.util.Random;
 | 
			
		||||
 | 
			
		||||
import org.bytedeco.opencv.opencv_core.*;
 | 
			
		||||
import org.bytedeco.opencv.opencv_imgproc.*;
 | 
			
		||||
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_core.*;
 | 
			
		||||
import static org.bytedeco.opencv.global.opencv_imgproc.*;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -16,13 +16,8 @@
 | 
			
		||||
 | 
			
		||||
package org.datavec.image.loader;
 | 
			
		||||
 | 
			
		||||
import org.apache.commons.io.FileUtils;
 | 
			
		||||
import org.apache.commons.io.FilenameUtils;
 | 
			
		||||
import org.datavec.api.io.filters.BalancedPathFilter;
 | 
			
		||||
import org.datavec.api.records.reader.RecordReader;
 | 
			
		||||
import org.datavec.api.split.FileSplit;
 | 
			
		||||
import org.datavec.api.split.InputSplit;
 | 
			
		||||
import org.datavec.image.recordreader.ImageRecordReader;
 | 
			
		||||
import org.junit.Ignore;
 | 
			
		||||
import org.junit.Test;
 | 
			
		||||
import org.nd4j.linalg.dataset.DataSet;
 | 
			
		||||
@ -30,9 +25,6 @@ import org.nd4j.linalg.dataset.DataSet;
 | 
			
		||||
import java.io.File;
 | 
			
		||||
import java.io.FileInputStream;
 | 
			
		||||
import java.io.InputStream;
 | 
			
		||||
import java.io.SequenceInputStream;
 | 
			
		||||
import java.util.Collection;
 | 
			
		||||
import java.util.Iterator;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
import java.util.Random;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -35,7 +35,6 @@ import org.datavec.api.writable.batch.NDArrayRecordBatch;
 | 
			
		||||
import org.junit.Rule;
 | 
			
		||||
import org.junit.Test;
 | 
			
		||||
import org.junit.rules.TemporaryFolder;
 | 
			
		||||
import org.nd4j.common.resources.Resources;
 | 
			
		||||
import org.nd4j.linalg.api.buffer.DataType;
 | 
			
		||||
import org.nd4j.linalg.api.ndarray.INDArray;
 | 
			
		||||
import org.nd4j.linalg.factory.Nd4j;
 | 
			
		||||
 | 
			
		||||
@ -20,7 +20,6 @@ package org.datavec.nlp.tokenization.tokenizerfactory;
 | 
			
		||||
 | 
			
		||||
import org.datavec.nlp.tokenization.tokenizer.TokenPreProcess;
 | 
			
		||||
import org.datavec.nlp.tokenization.tokenizer.Tokenizer;
 | 
			
		||||
import org.nd4j.shade.jackson.annotation.JsonSubTypes;
 | 
			
		||||
import org.nd4j.shade.jackson.annotation.JsonTypeInfo;
 | 
			
		||||
 | 
			
		||||
import java.io.InputStream;
 | 
			
		||||
 | 
			
		||||
@ -27,7 +27,6 @@ import org.datavec.api.writable.DoubleWritable;
 | 
			
		||||
import org.datavec.api.writable.Text;
 | 
			
		||||
import org.datavec.api.writable.Writable;
 | 
			
		||||
import org.junit.AfterClass;
 | 
			
		||||
import org.junit.Before;
 | 
			
		||||
import org.junit.BeforeClass;
 | 
			
		||||
import org.junit.Test;
 | 
			
		||||
import org.nd4j.common.io.ClassPathResource;
 | 
			
		||||
 | 
			
		||||
@ -20,8 +20,6 @@ import org.apache.poi.ss.usermodel.*;
 | 
			
		||||
import org.datavec.api.conf.Configuration;
 | 
			
		||||
import org.datavec.api.records.Record;
 | 
			
		||||
import org.datavec.api.records.metadata.RecordMetaDataIndex;
 | 
			
		||||
import org.datavec.api.records.metadata.RecordMetaDataLine;
 | 
			
		||||
import org.datavec.api.records.metadata.RecordMetaDataURI;
 | 
			
		||||
import org.datavec.api.records.reader.impl.FileRecordReader;
 | 
			
		||||
import org.datavec.api.split.InputSplit;
 | 
			
		||||
import org.datavec.api.writable.BooleanWritable;
 | 
			
		||||
@ -29,7 +27,6 @@ import org.datavec.api.writable.DoubleWritable;
 | 
			
		||||
import org.datavec.api.writable.Text;
 | 
			
		||||
import org.datavec.api.writable.Writable;
 | 
			
		||||
 | 
			
		||||
import java.io.File;
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.io.InputStream;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
 | 
			
		||||
@ -25,7 +25,6 @@ import org.datavec.api.transform.analysis.DataVecAnalysisUtils;
 | 
			
		||||
import org.datavec.api.transform.analysis.columns.ColumnAnalysis;
 | 
			
		||||
import org.datavec.api.transform.analysis.histogram.HistogramCounter;
 | 
			
		||||
import org.datavec.api.transform.analysis.quality.QualityAnalysisAddFunction;
 | 
			
		||||
import org.datavec.api.transform.analysis.quality.QualityAnalysisCombineFunction;
 | 
			
		||||
import org.datavec.api.transform.analysis.quality.QualityAnalysisState;
 | 
			
		||||
import org.datavec.api.transform.quality.DataQualityAnalysis;
 | 
			
		||||
import org.datavec.api.transform.quality.columns.ColumnQuality;
 | 
			
		||||
 | 
			
		||||
@ -18,7 +18,6 @@ package org.datavec.local.transforms;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
import org.datavec.local.transforms.functions.FlatMapFunctionAdapter;
 | 
			
		||||
import org.nd4j.linalg.exception.ND4JIllegalStateException;
 | 
			
		||||
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -19,11 +19,6 @@ package org.datavec.local.transforms;
 | 
			
		||||
import org.datavec.api.records.reader.RecordReader;
 | 
			
		||||
import org.datavec.api.records.reader.impl.transform.TransformProcessRecordReader;
 | 
			
		||||
import org.datavec.api.transform.TransformProcess;
 | 
			
		||||
import org.datavec.api.writable.Writable;
 | 
			
		||||
 | 
			
		||||
import java.util.Arrays;
 | 
			
		||||
import java.util.Collections;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * A wrapper around the {@link TransformProcessRecordReader}
 | 
			
		||||
 | 
			
		||||
@ -21,8 +21,6 @@ import lombok.extern.slf4j.Slf4j;
 | 
			
		||||
import org.bytedeco.cpython.PyThreadState;
 | 
			
		||||
 | 
			
		||||
import static org.bytedeco.cpython.global.python.*;
 | 
			
		||||
import static org.bytedeco.cpython.global.python.PyEval_RestoreThread;
 | 
			
		||||
import static org.bytedeco.cpython.global.python.PyEval_SaveThread;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@Slf4j
 | 
			
		||||
 | 
			
		||||
@ -22,8 +22,6 @@ import lombok.Data;
 | 
			
		||||
import lombok.NoArgsConstructor;
 | 
			
		||||
 | 
			
		||||
import javax.annotation.Nonnull;
 | 
			
		||||
import java.util.HashMap;
 | 
			
		||||
import java.util.Map;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@Data
 | 
			
		||||
 | 
			
		||||
@ -26,7 +26,6 @@ import org.nd4j.common.base.Preconditions;
 | 
			
		||||
import org.nd4j.common.holder.ObjectMapperHolder;
 | 
			
		||||
import org.nd4j.linalg.api.ndarray.INDArray;
 | 
			
		||||
import org.nd4j.shade.jackson.core.JsonProcessingException;
 | 
			
		||||
import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties;
 | 
			
		||||
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
@ -16,16 +16,9 @@
 | 
			
		||||
 | 
			
		||||
package org.datavec.python;
 | 
			
		||||
 | 
			
		||||
import lombok.Data;
 | 
			
		||||
import org.bytedeco.javacpp.BytePointer;
 | 
			
		||||
import org.bytedeco.javacpp.Pointer;
 | 
			
		||||
import org.json.JSONObject;
 | 
			
		||||
import org.json.JSONArray;
 | 
			
		||||
import org.nd4j.linalg.api.ndarray.INDArray;
 | 
			
		||||
import org.nd4j.nativeblas.NativeOpsHolder;
 | 
			
		||||
 | 
			
		||||
import java.io.Serializable;
 | 
			
		||||
import java.nio.ByteBuffer;
 | 
			
		||||
import java.util.*;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -18,18 +18,13 @@
 | 
			
		||||
package org.datavec.python;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
import lombok.var;
 | 
			
		||||
import org.json.JSONArray;
 | 
			
		||||
import org.junit.Test;
 | 
			
		||||
import org.nd4j.linalg.api.ndarray.INDArray;
 | 
			
		||||
import org.nd4j.linalg.factory.Nd4j;
 | 
			
		||||
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.Arrays;
 | 
			
		||||
import java.util.HashMap;
 | 
			
		||||
import java.util.Map;
 | 
			
		||||
 | 
			
		||||
import static org.junit.Assert.assertArrayEquals;
 | 
			
		||||
import static org.junit.Assert.assertEquals;
 | 
			
		||||
 | 
			
		||||
@javax.annotation.concurrent.NotThreadSafe
 | 
			
		||||
 | 
			
		||||
@ -15,10 +15,7 @@
 | 
			
		||||
 ******************************************************************************/
 | 
			
		||||
 | 
			
		||||
package org.datavec.python;
 | 
			
		||||
import org.junit.Assert;
 | 
			
		||||
import org.junit.Test;
 | 
			
		||||
import org.nd4j.linalg.api.buffer.DataType;
 | 
			
		||||
import org.nd4j.linalg.api.ndarray.INDArray;
 | 
			
		||||
import org.nd4j.linalg.factory.Nd4j;
 | 
			
		||||
 | 
			
		||||
import static org.junit.Assert.assertEquals;
 | 
			
		||||
 | 
			
		||||
@ -18,15 +18,11 @@
 | 
			
		||||
package org.datavec.python;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
import lombok.var;
 | 
			
		||||
import org.json.JSONArray;
 | 
			
		||||
import org.junit.Test;
 | 
			
		||||
import org.nd4j.linalg.api.ndarray.INDArray;
 | 
			
		||||
import org.nd4j.linalg.factory.Nd4j;
 | 
			
		||||
 | 
			
		||||
import java.util.*;
 | 
			
		||||
 | 
			
		||||
import static org.junit.Assert.assertArrayEquals;
 | 
			
		||||
import static org.junit.Assert.assertEquals;
 | 
			
		||||
 | 
			
		||||
@javax.annotation.concurrent.NotThreadSafe
 | 
			
		||||
 | 
			
		||||
@ -21,6 +21,7 @@ import java.io.DataOutput;
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.io.Serializable;
 | 
			
		||||
import java.nio.charset.Charset;
 | 
			
		||||
import java.nio.charset.StandardCharsets;
 | 
			
		||||
 | 
			
		||||
/**A Hadoop writable class for a pair of byte arrays, plus the original URIs (as Strings) of the files they came from
 | 
			
		||||
 * @author Alex Black
 | 
			
		||||
@ -44,8 +45,8 @@ public class BytesPairWritable implements Serializable, org.apache.hadoop.io.Wri
 | 
			
		||||
    public void write(DataOutput dataOutput) throws IOException {
 | 
			
		||||
        int length1 = (first != null ? first.length : 0);
 | 
			
		||||
        int length2 = (second != null ? second.length : 0);
 | 
			
		||||
        byte[] s1Bytes = (uriFirst != null ? uriFirst.getBytes(Charset.forName("UTF-8")) : null);
 | 
			
		||||
        byte[] s2Bytes = (uriSecond != null ? uriSecond.getBytes(Charset.forName("UTF-8")) : null);
 | 
			
		||||
        byte[] s1Bytes = (uriFirst != null ? uriFirst.getBytes(StandardCharsets.UTF_8) : null);
 | 
			
		||||
        byte[] s2Bytes = (uriSecond != null ? uriSecond.getBytes(StandardCharsets.UTF_8) : null);
 | 
			
		||||
        int s1Len = (s1Bytes != null ? s1Bytes.length : 0);
 | 
			
		||||
        int s2Len = (s2Bytes != null ? s2Bytes.length : 0);
 | 
			
		||||
        dataOutput.writeInt(length1);
 | 
			
		||||
@ -79,12 +80,12 @@ public class BytesPairWritable implements Serializable, org.apache.hadoop.io.Wri
 | 
			
		||||
        if (s1Len > 0) {
 | 
			
		||||
            byte[] s1Bytes = new byte[s1Len];
 | 
			
		||||
            dataInput.readFully(s1Bytes);
 | 
			
		||||
            uriFirst = new String(s1Bytes, Charset.forName("UTF-8"));
 | 
			
		||||
            uriFirst = new String(s1Bytes, StandardCharsets.UTF_8);
 | 
			
		||||
        }
 | 
			
		||||
        if (s2Len > 0) {
 | 
			
		||||
            byte[] s2Bytes = new byte[s2Len];
 | 
			
		||||
            dataInput.readFully(s2Bytes);
 | 
			
		||||
            uriSecond = new String(s2Bytes, Charset.forName("UTF-8"));
 | 
			
		||||
            uriSecond = new String(s2Bytes, StandardCharsets.UTF_8);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -18,7 +18,6 @@ package org.datavec.spark.transform.join;
 | 
			
		||||
 | 
			
		||||
import lombok.AllArgsConstructor;
 | 
			
		||||
import org.apache.spark.api.java.function.PairFunction;
 | 
			
		||||
import org.datavec.api.transform.schema.Schema;
 | 
			
		||||
import org.datavec.api.writable.Writable;
 | 
			
		||||
import scala.Tuple2;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -33,6 +33,7 @@ import org.datavec.api.transform.ui.HtmlAnalysis;
 | 
			
		||||
import org.datavec.api.writable.*;
 | 
			
		||||
 | 
			
		||||
import java.io.*;
 | 
			
		||||
import java.nio.charset.StandardCharsets;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.Arrays;
 | 
			
		||||
import java.util.Collections;
 | 
			
		||||
@ -96,7 +97,7 @@ public class SparkUtils {
 | 
			
		||||
    public static void writeStringToFile(String path, String toWrite, Configuration hadoopConfig) throws IOException {
 | 
			
		||||
        FileSystem fileSystem = FileSystem.get(hadoopConfig);
 | 
			
		||||
        try (BufferedOutputStream bos = new BufferedOutputStream(fileSystem.create(new Path(path)))) {
 | 
			
		||||
            bos.write(toWrite.getBytes("UTF-8"));
 | 
			
		||||
            bos.write(toWrite.getBytes(StandardCharsets.UTF_8));
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -130,7 +131,7 @@ public class SparkUtils {
 | 
			
		||||
        FileSystem fileSystem = FileSystem.get(hadoopConfig);
 | 
			
		||||
        try (BufferedInputStream bis = new BufferedInputStream(fileSystem.open(new Path(path)))) {
 | 
			
		||||
            byte[] asBytes = IOUtils.toByteArray(bis);
 | 
			
		||||
            return new String(asBytes, "UTF-8");
 | 
			
		||||
            return new String(asBytes, StandardCharsets.UTF_8);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -17,7 +17,6 @@
 | 
			
		||||
package org.datavec.spark.functions;
 | 
			
		||||
 | 
			
		||||
import org.datavec.api.writable.*;
 | 
			
		||||
import org.datavec.spark.transform.misc.SequenceWritablesToStringFunction;
 | 
			
		||||
import org.datavec.spark.transform.misc.WritablesToNDArrayFunction;
 | 
			
		||||
import org.junit.Test;
 | 
			
		||||
import org.nd4j.linalg.api.buffer.DataType;
 | 
			
		||||
@ -25,7 +24,6 @@ import org.nd4j.linalg.api.ndarray.INDArray;
 | 
			
		||||
import org.nd4j.linalg.factory.Nd4j;
 | 
			
		||||
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.Arrays;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
import static org.junit.Assert.assertEquals;
 | 
			
		||||
 | 
			
		||||
@ -17,7 +17,6 @@
 | 
			
		||||
package org.datavec.spark.functions;
 | 
			
		||||
 | 
			
		||||
import org.apache.spark.api.java.JavaPairRDD;
 | 
			
		||||
import org.apache.spark.api.java.JavaRDD;
 | 
			
		||||
import org.apache.spark.api.java.JavaSparkContext;
 | 
			
		||||
import org.apache.spark.api.java.function.PairFunction;
 | 
			
		||||
import org.datavec.api.writable.DoubleWritable;
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user