Fix javadoc and cleanup

master
Brian Rosenberger 2022-10-21 15:19:32 +02:00
parent 5c98c5e1ed
commit 07c052d822
1274 changed files with 4997 additions and 5292 deletions

View File

@ -139,7 +139,6 @@ public class BrianTest /*extends BaseDL4JTest*/ {
//.setExecutorEnv("spark.executor.cores", "2") //.setExecutorEnv("spark.executor.cores", "2")
//.setExecutorEnv("spark.executor.memory", "2g") //.setExecutorEnv("spark.executor.memory", "2g")
//.set("spark.submit.deployMode", "client") //.set("spark.submit.deployMode", "client")
;
/* /*
SparkSession spark = SparkSession SparkSession spark = SparkSession
@ -240,7 +239,7 @@ public class BrianTest /*extends BaseDL4JTest*/ {
*/ */
TransformProcess tp = new TransformProcess.Builder(inputSchema) TransformProcess tp = new TransformProcess.Builder(inputSchema)
.removeAllColumnsExceptFor("country_code", "lat", "lon") .removeAllColumnsExceptFor("country_code", "lat", "lon")
.stringToCategorical("country_code", Arrays.asList(new String[] {"GR", "FR", "DE", "CH"})) .stringToCategorical("country_code", Arrays.asList("GR", "FR", "DE", "CH"))
.filter(new FilterInvalidValues()) .filter(new FilterInvalidValues())
.categoricalToOneHot("country_code") .categoricalToOneHot("country_code")
.build(); .build();

View File

@ -225,7 +225,7 @@ public class BrianTest2 /*extends BaseDL4JTest*/ {
*/ */
TransformProcess tp = new TransformProcess.Builder(inputSchema) TransformProcess tp = new TransformProcess.Builder(inputSchema)
.removeAllColumnsExceptFor("country_code", "lat", "lon") .removeAllColumnsExceptFor("country_code", "lat", "lon")
.stringToCategorical("country_code", Arrays.asList(new String[] {"GR", "FR", "DE", "CH"})) .stringToCategorical("country_code", Arrays.asList("GR", "FR", "DE", "CH"))
.filter(new FilterInvalidValues()) .filter(new FilterInvalidValues())
.categoricalToOneHot("country_code") .categoricalToOneHot("country_code")
.build(); .build();

View File

@ -91,10 +91,10 @@ public class IntegrationTestRunner {
public static final double MAX_REL_ERROR_SCORES = 1e-4; public static final double MAX_REL_ERROR_SCORES = 1e-4;
private static List<Class<?>> layerClasses = new ArrayList<>(); private static final List<Class<?>> layerClasses = new ArrayList<>();
private static List<Class<?>> preprocClasses = new ArrayList<>(); private static final List<Class<?>> preprocClasses = new ArrayList<>();
private static List<Class<?>> graphVertexClasses = new ArrayList<>(); private static final List<Class<?>> graphVertexClasses = new ArrayList<>();
private static List<Class<?>> evaluationClasses = new ArrayList<>(); private static final List<Class<?>> evaluationClasses = new ArrayList<>();
private static Map<Class<?>, Integer> layerConfClassesSeen = new HashMap<>(); private static Map<Class<?>, Integer> layerConfClassesSeen = new HashMap<>();
private static Map<Class<?>, Integer> preprocessorConfClassesSeen = new HashMap<>(); private static Map<Class<?>, Integer> preprocessorConfClassesSeen = new HashMap<>();

View File

@ -67,8 +67,8 @@ public class CNN1DTestCases {
testOverfitting = false; testOverfitting = false;
} }
int miniBatchSize = 16; final int miniBatchSize = 16;
int exampleLength = 128; final int exampleLength = 128;
@Override @Override
public ModelType modelType() { public ModelType modelType() {

View File

@ -271,11 +271,11 @@ public class CNN2DTestCases {
public static TestCase getYoloHouseNumbers() { public static TestCase getYoloHouseNumbers() {
return new TestCase() { return new TestCase() {
private int width = 416; private final int width = 416;
private int height = 416; private final int height = 416;
private int nChannels = 3; private final int nChannels = 3;
private int gridWidth = 13; private final int gridWidth = 13;
private int gridHeight = 13; private final int gridHeight = 13;
{ {
testName = "YOLOHouseNumbers"; testName = "YOLOHouseNumbers";

View File

@ -108,7 +108,7 @@ public class CNN3DTestCases {
public MultiDataSet getGradientsTestData() throws Exception { public MultiDataSet getGradientsTestData() throws Exception {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
//NCDHW format //NCDHW format
INDArray arr = Nd4j.rand(new int[]{2, 3, 8, 8, 8}); INDArray arr = Nd4j.rand(2, 3, 8, 8, 8);
INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10); INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10);
return new org.nd4j.linalg.dataset.MultiDataSet(arr, labels); return new org.nd4j.linalg.dataset.MultiDataSet(arr, labels);
} }
@ -135,6 +135,6 @@ public class CNN3DTestCases {
} }
}; };
}; }
} }

View File

@ -93,8 +93,8 @@ public class RNNTestCases {
minAbsErrorParamsPostTraining = 2e-3; minAbsErrorParamsPostTraining = 2e-3;
} }
private int miniBatchSize = 32; private final int miniBatchSize = 32;
private int exampleLength = 200; private final int exampleLength = 200;
@Override @Override

View File

@ -31,23 +31,24 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.net.URL;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.util.*; import java.util.*;
public class CharacterIterator implements DataSetIterator { public class CharacterIterator implements DataSetIterator {
//Valid characters //Valid characters
private char[] validCharacters; private final char[] validCharacters;
//Maps each character to an index ind the input/output //Maps each character to an index ind the input/output
private Map<Character, Integer> charToIdxMap; private final Map<Character, Integer> charToIdxMap;
//All characters of the input file (after filtering to only those that are valid //All characters of the input file (after filtering to only those that are valid
private char[] fileCharacters; private final char[] fileCharacters;
//Length of each example/minibatch (number of characters) //Length of each example/minibatch (number of characters)
private int exampleLength; private final int exampleLength;
//Size of each minibatch (number of examples) //Size of each minibatch (number of examples)
private int miniBatchSize; private final int miniBatchSize;
private Random rng; private final Random rng;
//Offsets for the start of each example //Offsets for the start of each example
private LinkedList<Integer> exampleStartOffsets = new LinkedList<>(); private final LinkedList<Integer> exampleStartOffsets = new LinkedList<>();
/** /**
* @param textFilePath Path to text file to use for generating samples * @param textFilePath Path to text file to use for generating samples
@ -299,7 +300,7 @@ public class CharacterIterator implements DataSetIterator {
if (!f.exists()) throw new IOException("File does not exist: " + fileLocation); //Download problem? if (!f.exists()) throw new IOException("File does not exist: " + fileLocation); //Download problem?
char[] validCharacters = CharacterIterator.getMinimalCharacterSet(); //Which characters are allowed? Others will be removed char[] validCharacters = CharacterIterator.getMinimalCharacterSet(); //Which characters are allowed? Others will be removed
return new CharacterIterator(fileLocation, Charset.forName("UTF-8"), return new CharacterIterator(fileLocation, StandardCharsets.UTF_8,
miniBatchSize, sequenceLength, validCharacters, new Random(12345)); miniBatchSize, sequenceLength, validCharacters, new Random(12345));
} }

View File

@ -305,7 +305,7 @@ public class SameDiffCNNCases {
// [minibatch,8,1,1,1] // [minibatch,8,1,1,1]
int channels_height_width_depth = 8 * 1 * 1 * 1; int channels_height_width_depth = 8;
SDVariable layer1_reshaped = layer1.reshape(-1, channels_height_width_depth); SDVariable layer1_reshaped = layer1.reshape(-1, channels_height_width_depth);
@ -331,7 +331,7 @@ public class SameDiffCNNCases {
public Map<String,INDArray> getGradientsTestDataSameDiff() throws Exception { public Map<String,INDArray> getGradientsTestDataSameDiff() throws Exception {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
//NCDHW format //NCDHW format
INDArray arr = Nd4j.rand(new int[]{2, 3, 8, 8, 8}); INDArray arr = Nd4j.rand(2, 3, 8, 8, 8);
INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10); INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10);
Map<String, INDArray> map = new HashMap<>(); Map<String, INDArray> map = new HashMap<>();
@ -357,7 +357,7 @@ public class SameDiffCNNCases {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
List<Map<String, INDArray>> list = new ArrayList<>(); List<Map<String, INDArray>> list = new ArrayList<>();
INDArray arr = Nd4j.rand(new int[]{2, 3, 8, 8, 8}); INDArray arr = Nd4j.rand(2, 3, 8, 8, 8);
list.add(Collections.singletonMap("in", arr)); list.add(Collections.singletonMap("in", arr));
@ -368,7 +368,7 @@ public class SameDiffCNNCases {
public MultiDataSet getGradientsTestData() throws Exception { public MultiDataSet getGradientsTestData() throws Exception {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
//NCDHW format //NCDHW format
INDArray arr = Nd4j.rand(new int[]{2, 3, 8, 8, 8}); INDArray arr = Nd4j.rand(2, 3, 8, 8, 8);
INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10); INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10);
return new org.nd4j.linalg.dataset.MultiDataSet(arr, labels); return new org.nd4j.linalg.dataset.MultiDataSet(arr, labels);
} }

View File

@ -130,3 +130,19 @@ echo "nameserver 8.8.8.8" | sudo tee -a /etc/resolv.conf
-P\<xxx>\ -P\<xxx>\
CAVIS_AVX_EXTENSION = {avx2 | avx512}, default is avx2 CAVIS_AVX_EXTENSION = {avx2 | avx512}, default is avx2
# Zeppelin Spark dependencies #
3
To add the dependency to the language models, use the following format in the Dependencies section of the of the Spark Interpreter configuration (Interpreters -> Spark -> Edit -> Dependencies):
groupId:artifactId:packaging:classifier:version
In your case it should work with
edu.stanford.nlp:stanford-corenlp:jar:models:3.8.0
Native cpu code under linux needs libc6-dev
/lib/x86_64-linux-gnu/libm.so.6: version `GLIBC_2.29' not found

View File

@ -266,7 +266,7 @@ public class Configuration implements Iterable<Map.Entry<String, String>>, Writa
reloadConfiguration(); reloadConfiguration();
} }
private static Pattern varPat = Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}"); private static final Pattern varPat = Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
private String substituteVars(String expr) { private String substituteVars(String expr) {
if (expr == null) { if (expr == null) {
@ -555,7 +555,7 @@ public class Configuration implements Iterable<Map.Entry<String, String>>, Writa
} }
/** /**
* Get the value of the <code>name</code> property as a <ocde>Pattern</code>. * Get the value of the <code>name</code> property as a {@code Pattern}.
* If no such property is specified, or if the specified value is not a valid * If no such property is specified, or if the specified value is not a valid
* <code>Pattern</code>, then <code>DefaultValue</code> is returned. * <code>Pattern</code>, then <code>DefaultValue</code> is returned.
* *

View File

@ -27,7 +27,7 @@ import org.datavec.api.records.writer.RecordWriter;
public interface OutputFormat { public interface OutputFormat {
public static final String OUTPUT_PATH = "org.nd4j.outputpath"; String OUTPUT_PATH = "org.nd4j.outputpath";
/** /**
* Create a record writer * Create a record writer

View File

@ -34,7 +34,7 @@ public abstract class BinaryComparable implements Comparable<BinaryComparable> {
/** /**
* Compare bytes from {#getBytes()}. * Compare bytes from {#getBytes()}.
* @see org.apache.hadoop.io.WritableComparator#compareBytes(byte[],int,int,byte[],int,int) * {@code org.apache.hadoop.io.WritableComparator#compareBytes(byte[], int, int, byte[], int, int)}
*/ */
public int compareTo(BinaryComparable other) { public int compareTo(BinaryComparable other) {
if (this == other) if (this == other)
@ -63,7 +63,7 @@ public abstract class BinaryComparable implements Comparable<BinaryComparable> {
/** /**
* Return a hash of the bytes returned from {#getBytes()}. * Return a hash of the bytes returned from {#getBytes()}.
* @see org.apache.hadoop.io.WritableComparator#hashBytes(byte[],int) * {@code org.apache.hadoop.io.WritableComparator#hashBytes(byte[],int)}
*/ */
public int hashCode() { public int hashCode() {
return WritableComparator.hashBytes(getBytes(), getLength()); return WritableComparator.hashBytes(getBytes(), getLength());

View File

@ -50,7 +50,7 @@ public class DataInputBuffer extends DataInputStream {
} }
} }
private Buffer buffer; private final Buffer buffer;
/** Constructs a new empty buffer. */ /** Constructs a new empty buffer. */
public DataInputBuffer() { public DataInputBuffer() {

View File

@ -44,7 +44,7 @@ public class DataOutputBuffer extends DataOutputStream {
public void write(DataInput in, int len) throws IOException { public void write(DataInput in, int len) throws IOException {
int newcount = count + len; int newcount = count + len;
if (newcount > buf.length) { if (newcount > buf.length) {
byte newbuf[] = new byte[Math.max(buf.length << 1, newcount)]; byte[] newbuf = new byte[Math.max(buf.length << 1, newcount)];
System.arraycopy(buf, 0, newbuf, 0, count); System.arraycopy(buf, 0, newbuf, 0, count);
buf = newbuf; buf = newbuf;
} }
@ -53,7 +53,7 @@ public class DataOutputBuffer extends DataOutputStream {
} }
} }
private Buffer buffer; private final Buffer buffer;
/** Constructs a new empty buffer. */ /** Constructs a new empty buffer. */
public DataOutputBuffer() { public DataOutputBuffer() {

View File

@ -25,6 +25,6 @@ import java.util.Comparator;
public interface RawComparator<T> extends Comparator<T> { public interface RawComparator<T> extends Comparator<T> {
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2); int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2);
} }

View File

@ -31,7 +31,7 @@ import java.util.HashMap;
public class WritableComparator implements RawComparator { public class WritableComparator implements RawComparator {
private static HashMap<Class, WritableComparator> comparators = new HashMap<>(); // registry private static final HashMap<Class, WritableComparator> comparators = new HashMap<>(); // registry
/** Get a comparator for a {@link WritableComparable} implementation. */ /** Get a comparator for a {@link WritableComparable} implementation. */
public static synchronized WritableComparator get(Class<? extends WritableComparable> c) { public static synchronized WritableComparator get(Class<? extends WritableComparable> c) {

View File

@ -229,7 +229,7 @@ public final class WritableUtils {
/** /**
* Serializes an integer to a binary stream with zero-compressed encoding. * Serializes an integer to a binary stream with zero-compressed encoding.
* For -120 <= i <= 127, only one byte is used with the actual value. * For -120 &lt;= i &lt;= 127, only one byte is used with the actual value.
* For other values of i, the first byte value indicates whether the * For other values of i, the first byte value indicates whether the
* integer is positive or negative, and the number of bytes that follow. * integer is positive or negative, and the number of bytes that follow.
* If the first byte value v is between -121 and -124, the following integer * If the first byte value v is between -121 and -124, the following integer
@ -248,7 +248,7 @@ public final class WritableUtils {
/** /**
* Serializes a long to a binary stream with zero-compressed encoding. * Serializes a long to a binary stream with zero-compressed encoding.
* For -112 <= i <= 127, only one byte is used with the actual value. * For -112 &lt;= i lt;= 127, only one byte is used with the actual value.
* For other values of i, the first byte value indicates whether the * For other values of i, the first byte value indicates whether the
* long is positive or negative, and the number of bytes that follow. * long is positive or negative, and the number of bytes that follow.
* If the first byte value v is between -113 and -120, the following long * If the first byte value v is between -113 and -120, the following long

View File

@ -27,7 +27,7 @@ import org.datavec.api.writable.Writable;
import java.util.List; import java.util.List;
public class LabelWriterConverter implements WritableConverter { public class LabelWriterConverter implements WritableConverter {
private List<String> labels; private final List<String> labels;
public LabelWriterConverter(List<String> labels) { public LabelWriterConverter(List<String> labels) {
this.labels = labels; this.labels = labels;

View File

@ -35,7 +35,7 @@ public interface PathLabelGenerator extends Serializable {
* If true: infer the set of possible label classes, and convert these to integer indexes. If when true, the * If true: infer the set of possible label classes, and convert these to integer indexes. If when true, the
* returned Writables should be text writables.<br> * returned Writables should be text writables.<br>
* <br> * <br>
* For regression use cases (or PathLabelGenerator classification instances that do their own label -> integer * For regression use cases (or PathLabelGenerator classification instances that do their own label -&gt; integer
* assignment), this should return false. * assignment), this should return false.
* *
* @return whether label classes should be inferred * @return whether label classes should be inferred

View File

@ -35,7 +35,7 @@ public class SerializationFactory extends Configured {
private static final Logger LOG = LoggerFactory.getLogger(SerializationFactory.class.getName()); private static final Logger LOG = LoggerFactory.getLogger(SerializationFactory.class.getName());
private List<Serialization<?>> serializations = new ArrayList<>(); private final List<Serialization<?>> serializations = new ArrayList<>();
/** /**
* <p> * <p>
@ -47,7 +47,7 @@ public class SerializationFactory extends Configured {
public SerializationFactory(Configuration conf) { public SerializationFactory(Configuration conf) {
super(conf); super(conf);
for (String serializerName : conf.getStrings("io.serializations", for (String serializerName : conf.getStrings("io.serializations",
new String[] {"org.apache.hadoop.io.serializer.WritableSerialization"})) { "org.apache.hadoop.io.serializer.WritableSerialization")) {
add(conf, serializerName); add(conf, serializerName);
} }
} }

View File

@ -113,7 +113,7 @@ public class Buffer implements Comparable, Cloneable {
/** /**
* Change the capacity of the backing storage. * Change the capacity of the backing storage.
* The data is preserved if newCapacity >= getCount(). * The data is preserved if newCapacity &gt;= getCount().
* @param newCapacity The new capacity in bytes. * @param newCapacity The new capacity in bytes.
*/ */
public void setCapacity(int newCapacity) { public void setCapacity(int newCapacity) {

View File

@ -209,9 +209,7 @@ public class IOUtils {
* @return * @return
*/ */
static String toCSVBuffer(Buffer buf) { static String toCSVBuffer(Buffer buf) {
StringBuilder sb = new StringBuilder("#"); return "#" + buf.toString();
sb.append(buf.toString());
return sb.toString();
} }
/** /**
@ -441,7 +439,7 @@ public class IOUtils {
/** /**
* Serializes a long to a binary stream with zero-compressed encoding. * Serializes a long to a binary stream with zero-compressed encoding.
* For -112 <= i <= 127, only one byte is used with the actual value. * For -112 &lt;= i &lt;= 127, only one byte is used with the actual value.
* For other values of i, the first byte value indicates whether the * For other values of i, the first byte value indicates whether the
* long is positive or negative, and the number of bytes that follow. * long is positive or negative, and the number of bytes that follow.
* If the first byte value v is between -113 and -120, the following long * If the first byte value v is between -113 and -120, the following long

View File

@ -99,8 +99,6 @@ public interface RecordReader extends AutoCloseable, Serializable, Configurable
/** /**
* Reset record reader iterator * Reset record reader iterator
*
* @return
*/ */
void reset(); void reset();

View File

@ -39,7 +39,7 @@ import java.util.List;
*/ */
public class ComposableRecordReader extends BaseRecordReader { public class ComposableRecordReader extends BaseRecordReader {
private RecordReader[] readers; private final RecordReader[] readers;
public ComposableRecordReader(RecordReader... readers) { public ComposableRecordReader(RecordReader... readers) {
this.readers = readers; this.readers = readers;

View File

@ -35,7 +35,7 @@ import java.util.List;
public class ConcatenatingRecordReader extends BaseRecordReader { public class ConcatenatingRecordReader extends BaseRecordReader {
private RecordReader[] readers; private final RecordReader[] readers;
public ConcatenatingRecordReader(RecordReader... readers) { public ConcatenatingRecordReader(RecordReader... readers) {
this.readers = readers; this.readers = readers;

View File

@ -23,14 +23,14 @@ package org.datavec.api.records.reader.impl;
import lombok.Getter; import lombok.Getter;
import lombok.Setter; import lombok.Setter;
import org.datavec.api.conf.Configuration; import org.datavec.api.conf.Configuration;
import org.datavec.api.Record; import org.datavec.api.records.Record;
import org.datavec.api.records.metadata.RecordMetaData; import org.datavec.api.records.metadata.RecordMetaData;
import org.datavec.api.records.metadata.RecordMetaDataURI; import org.datavec.api.records.metadata.RecordMetaDataURI;
import org.datavec.api.records.reader.BaseRecordReader; import org.datavec.api.records.reader.BaseRecordReader;
import org.datavec.api.split.InputSplit; import org.datavec.api.split.InputSplit;
import org.datavec.api.writable.IntWritable; import org.datavec.api.writable.IntWritable;
import org.datavec.api.writable.Text; import org.datavec.api.writable.Text;
import org.datavec.api.Writable; import org.datavec.api.writable.Writable;
import java.io.*; import java.io.*;
import java.net.URI; import java.net.URI;
@ -40,6 +40,8 @@ import java.util.*;
/** /**
* File reader/writer * File reader/writer
*
* @author Adam Gibson
*/ */
public class FileRecordReader extends BaseRecordReader { public class FileRecordReader extends BaseRecordReader {
@ -48,12 +50,10 @@ public class FileRecordReader extends BaseRecordReader {
protected URI currentUri; protected URI currentUri;
protected List<String> labels; protected List<String> labels;
protected boolean appendLabel = false; protected boolean appendLabel = false;
@Getter @Getter @Setter
@Setter
protected String charset = StandardCharsets.UTF_8.name(); //Using String as StandardCharsets.UTF_8 is not serializable protected String charset = StandardCharsets.UTF_8.name(); //Using String as StandardCharsets.UTF_8 is not serializable
public FileRecordReader() { public FileRecordReader() {}
}
@Override @Override
public void initialize(InputSplit split) throws IOException, InterruptedException { public void initialize(InputSplit split) throws IOException, InterruptedException {
@ -68,9 +68,9 @@ public class FileRecordReader extends BaseRecordReader {
URI[] locations = split.locations(); URI[] locations = split.locations();
if (locations.length > 0) { if (locations.length > 0) {
Set<String> labels = new HashSet<>(); Set<String> labels = new HashSet<>();
for (URI u : locations) { for(URI u : locations){
String[] pathSplit = u.toString().split("[/\\\\]"); String[] pathSplit = u.toString().split("[/\\\\]");
labels.add(pathSplit[pathSplit.length - 2]); labels.add(pathSplit[pathSplit.length-2]);
} }
this.labels = new ArrayList<>(labels); this.labels = new ArrayList<>(labels);
Collections.sort(this.labels); Collections.sort(this.labels);
@ -80,8 +80,7 @@ public class FileRecordReader extends BaseRecordReader {
} }
@Override @Override
public void initialize(Configuration conf, InputSplit split) public void initialize(Configuration conf, InputSplit split) throws IOException, InterruptedException {
throws IOException, InterruptedException {
appendLabel = conf.getBoolean(APPEND_LABEL, true); appendLabel = conf.getBoolean(APPEND_LABEL, true);
doInitialize(split); doInitialize(split);
this.inputSplit = split; this.inputSplit = split;
@ -96,7 +95,7 @@ public class FileRecordReader extends BaseRecordReader {
private List<Writable> loadFromStream(URI uri, InputStream next, Charset charset) { private List<Writable> loadFromStream(URI uri, InputStream next, Charset charset) {
List<Writable> ret = new ArrayList<>(); List<Writable> ret = new ArrayList<>();
try { try {
if (!(next instanceof BufferedInputStream)) { if(!(next instanceof BufferedInputStream)){
next = new BufferedInputStream(next); next = new BufferedInputStream(next);
} }
String s = org.apache.commons.io.IOUtils.toString(next, charset); String s = org.apache.commons.io.IOUtils.toString(next, charset);
@ -112,21 +111,21 @@ public class FileRecordReader extends BaseRecordReader {
} }
/** /**
* Return the current label. The index of the current file's parent directory in the label list * Return the current label.
* * The index of the current file's parent directory
* in the label list
* @return The index of the current file's parent directory * @return The index of the current file's parent directory
*/ */
public int getCurrentLabel() { public int getCurrentLabel() {
return getLabel(currentUri); return getLabel(currentUri);
} }
public int getLabel(URI uri) { public int getLabel(URI uri){
String s = uri.toString(); String s = uri.toString();
int lastIdx = Math.max(s.lastIndexOf('/'), int lastIdx = Math.max(s.lastIndexOf('/'), s.lastIndexOf('\\')); //Note: if neither are found, -1 is fine here
s.lastIndexOf('\\')); //Note: if neither are found, -1 is fine here
String sub = s.substring(0, lastIdx); String sub = s.substring(0, lastIdx);
int secondLastIdx = Math.max(sub.lastIndexOf('/'), sub.lastIndexOf('\\')); int secondLastIdx = Math.max(sub.lastIndexOf('/'), sub.lastIndexOf('\\'));
String name = s.substring(secondLastIdx + 1, lastIdx); String name = s.substring(secondLastIdx+1, lastIdx);
return labels.indexOf(name); return labels.indexOf(name);
} }
@ -168,12 +167,10 @@ public class FileRecordReader extends BaseRecordReader {
return ret; return ret;
} }
@Override @Override
public void reset() { public void reset() {
if (inputSplit == null) { if (inputSplit == null)
throw new UnsupportedOperationException("Cannot reset without first initializing"); throw new UnsupportedOperationException("Cannot reset without first initializing");
}
try { try {
doInitialize(inputSplit); doInitialize(inputSplit);
} catch (Exception e) { } catch (Exception e) {
@ -183,7 +180,7 @@ public class FileRecordReader extends BaseRecordReader {
@Override @Override
public boolean resetSupported() { public boolean resetSupported() {
if (inputSplit != null) { if(inputSplit != null){
return inputSplit.resetSupported(); return inputSplit.resetSupported();
} }
return false; //reset() throws exception on reset() if inputSplit is null return false; //reset() throws exception on reset() if inputSplit is null
@ -208,14 +205,13 @@ public class FileRecordReader extends BaseRecordReader {
invokeListeners(next); invokeListeners(next);
List<Writable> ret; List<Writable> ret;
try (InputStream s = streamCreatorFn.apply(next)) { try(InputStream s = streamCreatorFn.apply(next)) {
ret = loadFromStream(next, s, Charset.forName(charset)); ret = loadFromStream(next, s, Charset.forName(charset));
} catch (IOException e) { } catch (IOException e){
throw new RuntimeException("Error reading from stream for URI: " + next); throw new RuntimeException("Error reading from stream for URI: " + next);
} }
return new org.datavec.api.records.impl.Record(ret, return new org.datavec.api.records.impl.Record(ret,new RecordMetaDataURI(next, FileRecordReader.class));
new RecordMetaDataURI(next, FileRecordReader.class));
} }
@Override @Override
@ -231,9 +227,9 @@ public class FileRecordReader extends BaseRecordReader {
URI uri = meta.getURI(); URI uri = meta.getURI();
List<Writable> list; List<Writable> list;
try (InputStream s = streamCreatorFn.apply(uri)) { try(InputStream s = streamCreatorFn.apply(uri)) {
list = loadFromStream(uri, s, Charset.forName(charset)); list = loadFromStream(uri, s, Charset.forName(charset));
} catch (IOException e) { } catch (IOException e){
throw new RuntimeException("Error reading from stream for URI: " + uri); throw new RuntimeException("Error reading from stream for URI: " + uri);
} }

View File

@ -200,7 +200,7 @@ public class LineRecordReader extends BaseRecordReader {
//Here: we are reading a single line from the DataInputStream //Here: we are reading a single line from the DataInputStream
BufferedReader br = new BufferedReader(new InputStreamReader(dataInputStream)); BufferedReader br = new BufferedReader(new InputStreamReader(dataInputStream));
String line = br.readLine(); String line = br.readLine();
return Collections.singletonList((Writable) new Text(line)); return Collections.singletonList(new Text(line));
} }
protected Iterator<String> getIterator(int location) { protected Iterator<String> getIterator(int location) {
@ -265,7 +265,7 @@ public class LineRecordReader extends BaseRecordReader {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"Invalid metadata; expected RecordMetaDataLine instance; got: " + rmd); "Invalid metadata; expected RecordMetaDataLine instance; got: " + rmd);
} }
list.add(new Triple<>(count++, (RecordMetaDataLine) rmd, (List<Writable>) null)); list.add(new Triple<>(count++, (RecordMetaDataLine) rmd, null));
if (rmd.getURI() != null) if (rmd.getURI() != null)
uris.add(rmd.getURI()); uris.add(rmd.getURI());
} }
@ -332,7 +332,7 @@ public class LineRecordReader extends BaseRecordReader {
throw new IllegalStateException("Could not get line " + nextLineIdx + " from URI " + currentURI throw new IllegalStateException("Could not get line " + nextLineIdx + " from URI " + currentURI
+ ": has only " + currentLineIdx + " lines"); + ": has only " + currentLineIdx + " lines");
} }
t.setThird(Collections.<Writable>singletonList(new Text(line))); t.setThird(Collections.singletonList(new Text(line)));
} }
} else { } else {
//Not URI based: String split, etc //Not URI based: String split, etc
@ -347,7 +347,7 @@ public class LineRecordReader extends BaseRecordReader {
line = iterator.next(); line = iterator.next();
currentLineIdx++; currentLineIdx++;
} }
t.setThird(Collections.<Writable>singletonList(new Text(line))); t.setThird(Collections.singletonList(new Text(line)));
} }
closeIfRequired(iterator); closeIfRequired(iterator);
} }

View File

@ -43,7 +43,7 @@ public class CollectionSequenceRecordReader extends BaseRecordReader implements
/** /**
* *
* @param records Collection of sequences. For example, List<List<List<Writable>>> where the inner two lists * @param records Collection of sequences. For example, {@code List<List<List<Writable>>>} where the inner two lists
* are a sequence, and the outer list/collection is a list of sequences * are a sequence, and the outer list/collection is a list of sequences
*/ */
public CollectionSequenceRecordReader(Collection<? extends Collection<? extends Collection<Writable>>> records) { public CollectionSequenceRecordReader(Collection<? extends Collection<? extends Collection<Writable>>> records) {

View File

@ -45,9 +45,9 @@ public class CSVMultiSequenceRecordReader extends CSVRecordReader implements Seq
PAD PAD
} }
private String sequenceSeparatorRegex; private final String sequenceSeparatorRegex;
private Mode mode; private final Mode mode;
private Writable padValue; private final Writable padValue;
/** /**
* Create a sequence reader using the default value for skip lines (0), the default delimiter (',') and the default * Create a sequence reader using the default value for skip lines (0), the default delimiter (',') and the default

View File

@ -41,7 +41,7 @@ public class CSVNLinesSequenceRecordReader extends CSVRecordReader implements Se
public static final String LINES_PER_SEQUENCE = NAME_SPACE + ".nlinespersequence"; public static final String LINES_PER_SEQUENCE = NAME_SPACE + ".nlinespersequence";
private int nLinesPerSequence; private int nLinesPerSequence;
private String delimiter; private final String delimiter;
/** /**
* No-arg constructor with the default number of lines per sequence (10) * No-arg constructor with the default number of lines per sequence (10)
@ -124,7 +124,7 @@ public class CSVNLinesSequenceRecordReader extends CSVRecordReader implements Se
"Invalid metadata; expected RecordMetaDataLineInterval instance; got: " + rmd); "Invalid metadata; expected RecordMetaDataLineInterval instance; got: " + rmd);
} }
list.add(new Triple<>(count++, (RecordMetaDataLineInterval) rmd, list.add(new Triple<>(count++, (RecordMetaDataLineInterval) rmd,
(List<List<Writable>>) new ArrayList<List<Writable>>())); new ArrayList<List<Writable>>()));
} }
//Sort by starting line number: //Sort by starting line number:

View File

@ -39,8 +39,8 @@ public class CSVVariableSlidingWindowRecordReader extends CSVRecordReader implem
public static final String LINES_PER_SEQUENCE = NAME_SPACE + ".nlinespersequence"; public static final String LINES_PER_SEQUENCE = NAME_SPACE + ".nlinespersequence";
private int maxLinesPerSequence; private int maxLinesPerSequence;
private String delimiter; private final String delimiter;
private int stride; private final int stride;
private LinkedList<List<Writable>> queue; private LinkedList<List<Writable>> queue;
private boolean exhausted; private boolean exhausted;
@ -60,7 +60,7 @@ public class CSVVariableSlidingWindowRecordReader extends CSVRecordReader implem
/** /**
* @param maxLinesPerSequence Number of lines in each sequence, use default delemiter(,) between entries in the same line * @param maxLinesPerSequence Number of lines in each sequence, use default delemiter(,) between entries in the same line
* @param stride Number of lines between records (increment window > 1 line) * @param stride Number of lines between records (increment window &gt; 1 line)
*/ */
public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int stride) { public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int stride) {
this(maxLinesPerSequence, 0, stride, String.valueOf(CSVRecordReader.DEFAULT_DELIMITER)); this(maxLinesPerSequence, 0, stride, String.valueOf(CSVRecordReader.DEFAULT_DELIMITER));
@ -68,7 +68,7 @@ public class CSVVariableSlidingWindowRecordReader extends CSVRecordReader implem
/** /**
* @param maxLinesPerSequence Number of lines in each sequence, use default delemiter(,) between entries in the same line * @param maxLinesPerSequence Number of lines in each sequence, use default delemiter(,) between entries in the same line
* @param stride Number of lines between records (increment window > 1 line) * @param stride Number of lines between records (increment window &gt; 1 line)
*/ */
public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int stride, String delimiter) { public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int stride, String delimiter) {
this(maxLinesPerSequence, 0, stride, String.valueOf(CSVRecordReader.DEFAULT_DELIMITER)); this(maxLinesPerSequence, 0, stride, String.valueOf(CSVRecordReader.DEFAULT_DELIMITER));
@ -78,7 +78,7 @@ public class CSVVariableSlidingWindowRecordReader extends CSVRecordReader implem
* *
* @param maxLinesPerSequence Number of lines in each sequences * @param maxLinesPerSequence Number of lines in each sequences
* @param skipNumLines Number of lines to skip at the start of the file (only skipped once, not per sequence) * @param skipNumLines Number of lines to skip at the start of the file (only skipped once, not per sequence)
* @param stride Number of lines between records (increment window > 1 line) * @param stride Number of lines between records (increment window &gt; 1 line)
* @param delimiter Delimiter between entries in the same line, for example "," * @param delimiter Delimiter between entries in the same line, for example ","
*/ */
public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int skipNumLines, int stride, String delimiter) { public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int skipNumLines, int stride, String delimiter) {

View File

@ -302,7 +302,7 @@ public class SerializableCSVParser implements Serializable {
} }
/** /**
* precondition: sb.length() > 0 * precondition: sb.length() &gt; 0
* *
* @param sb A sequence of characters to examine * @param sb A sequence of characters to examine
* @return true if every character in the sequence is whitespace * @return true if every character in the sequence is whitespace

View File

@ -114,8 +114,6 @@ public class InMemoryRecordReader implements RecordReader {
/** /**
* Reset record reader iterator * Reset record reader iterator
*
* @return
*/ */
@Override @Override
public void reset() { public void reset() {

View File

@ -195,8 +195,6 @@ public class InMemorySequenceRecordReader implements SequenceRecordReader {
/** /**
* Reset record reader iterator * Reset record reader iterator
*
* @return
*/ */
@Override @Override
public void reset() { public void reset() {

View File

@ -31,8 +31,8 @@ public class FieldSelection implements Serializable {
public static final Writable DEFAULT_MISSING_VALUE = new Text(""); public static final Writable DEFAULT_MISSING_VALUE = new Text("");
private List<String[]> fieldPaths; private final List<String[]> fieldPaths;
private List<Writable> valueIfMissing; private final List<Writable> valueIfMissing;
private FieldSelection(Builder builder) { private FieldSelection(Builder builder) {
this.fieldPaths = builder.fieldPaths; this.fieldPaths = builder.fieldPaths;
@ -53,8 +53,8 @@ public class FieldSelection implements Serializable {
public static class Builder { public static class Builder {
private List<String[]> fieldPaths = new ArrayList<>(); private final List<String[]> fieldPaths = new ArrayList<>();
private List<Writable> valueIfMissing = new ArrayList<>(); private final List<Writable> valueIfMissing = new ArrayList<>();
/** /**

View File

@ -29,8 +29,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
public class JacksonLineRecordReader extends LineRecordReader { public class JacksonLineRecordReader extends LineRecordReader {
private FieldSelection selection; private final FieldSelection selection;
private ObjectMapper mapper; private final ObjectMapper mapper;
public JacksonLineRecordReader(FieldSelection selection, ObjectMapper mapper) { public JacksonLineRecordReader(FieldSelection selection, ObjectMapper mapper) {
this.selection = selection; this.selection = selection;

View File

@ -39,8 +39,8 @@ import java.util.NoSuchElementException;
public class JacksonLineSequenceRecordReader extends FileRecordReader implements SequenceRecordReader { public class JacksonLineSequenceRecordReader extends FileRecordReader implements SequenceRecordReader {
private FieldSelection selection; private final FieldSelection selection;
private ObjectMapper mapper; private final ObjectMapper mapper;
/** /**
* *

View File

@ -45,12 +45,12 @@ public class JacksonRecordReader extends BaseRecordReader {
private static final TypeReference<Map<String, Object>> typeRef = new TypeReference<Map<String, Object>>() {}; private static final TypeReference<Map<String, Object>> typeRef = new TypeReference<Map<String, Object>>() {};
private FieldSelection selection; private final FieldSelection selection;
private ObjectMapper mapper; private final ObjectMapper mapper;
private boolean shuffle; private final boolean shuffle;
private long rngSeed; private final long rngSeed;
private PathLabelGenerator labelGenerator; private final PathLabelGenerator labelGenerator;
private int labelPosition; private final int labelPosition;
private InputSplit is; private InputSplit is;
private Random r; private Random r;
@Getter @Setter @Getter @Setter

View File

@ -35,7 +35,7 @@ import java.util.List;
public class MatlabRecordReader extends FileRecordReader { public class MatlabRecordReader extends FileRecordReader {
private List<List<Writable>> records = new ArrayList<>(); private final List<List<Writable>> records = new ArrayList<>();
private Iterator<List<Writable>> currIter; private Iterator<List<Writable>> currIter;
@Override @Override

View File

@ -96,8 +96,6 @@ public class SVMLightRecordReader extends LineRecordReader {
* Set configuration. * Set configuration.
* *
* @param conf DataVec configuration * @param conf DataVec configuration
* @throws IOException
* @throws InterruptedException
*/ */
@Override @Override
public void setConf(Configuration conf) { public void setConf(Configuration conf) {
@ -181,7 +179,7 @@ public class SVMLightRecordReader extends LineRecordReader {
if (index < 0) if (index < 0)
throw new NumberFormatException(""); throw new NumberFormatException("");
} catch (NumberFormatException e) { } catch (NumberFormatException e) {
String msg = String.format("Feature index must be positive integer (found %s)", featureTokens[i].toString()); String msg = String.format("Feature index must be positive integer (found %s)", featureTokens[i]);
throw new NumberFormatException(msg); throw new NumberFormatException(msg);
} }
@ -218,7 +216,7 @@ public class SVMLightRecordReader extends LineRecordReader {
if (index < 0) if (index < 0)
throw new NumberFormatException(""); throw new NumberFormatException("");
} catch (NumberFormatException e) { } catch (NumberFormatException e) {
String msg = String.format("Multilabel index must be positive integer (found %s)", labelTokens[i].toString()); String msg = String.format("Multilabel index must be positive integer (found %s)", labelTokens[i]);
throw new NumberFormatException(msg); throw new NumberFormatException(msg);
} }

View File

@ -41,11 +41,11 @@ import java.util.regex.Pattern;
public class RegexLineRecordReader extends LineRecordReader { public class RegexLineRecordReader extends LineRecordReader {
public final static String SKIP_NUM_LINES = NAME_SPACE + ".skipnumlines"; public final static String SKIP_NUM_LINES = NAME_SPACE + ".skipnumlines";
private String regex; private final String regex;
private int skipNumLines; private int skipNumLines;
private Pattern pattern; private final Pattern pattern;
private int numLinesSkipped; private int numLinesSkipped;
private int currLine = 0; private final int currLine = 0;
public RegexLineRecordReader(String regex, int skipNumLines) { public RegexLineRecordReader(String regex, int skipNumLines) {
this.regex = regex; this.regex = regex;

View File

@ -61,11 +61,11 @@ public class RegexSequenceRecordReader extends FileRecordReader implements Seque
public static final Logger LOG = LoggerFactory.getLogger(RegexSequenceRecordReader.class); public static final Logger LOG = LoggerFactory.getLogger(RegexSequenceRecordReader.class);
private String regex; private final String regex;
private int skipNumLines; private int skipNumLines;
private Pattern pattern; private final Pattern pattern;
private transient Charset charset; private transient Charset charset;
private LineErrorHandling errorHandling; private final LineErrorHandling errorHandling;
public RegexSequenceRecordReader(String regex, int skipNumLines) { public RegexSequenceRecordReader(String regex, int skipNumLines) {
this(regex, skipNumLines, DEFAULT_CHARSET, DEFAULT_ERROR_HANDLING); this(regex, skipNumLines, DEFAULT_CHARSET, DEFAULT_ERROR_HANDLING);
@ -92,7 +92,7 @@ public class RegexSequenceRecordReader extends FileRecordReader implements Seque
@Override @Override
public List<List<Writable>> sequenceRecord(URI uri, DataInputStream dataInputStream) throws IOException { public List<List<Writable>> sequenceRecord(URI uri, DataInputStream dataInputStream) throws IOException {
String fileContents = IOUtils.toString(new BufferedInputStream(dataInputStream), charset.name()); String fileContents = IOUtils.toString(new BufferedInputStream(dataInputStream), charset);
return loadSequence(fileContents, uri); return loadSequence(fileContents, uri);
} }

View File

@ -145,8 +145,6 @@ public class TransformProcessRecordReader implements RecordReader {
/** /**
* Reset record reader iterator * Reset record reader iterator
*
* @return
*/ */
@Override @Override
public void reset() { public void reset() {

View File

@ -195,8 +195,6 @@ public class TransformProcessSequenceRecordReader implements SequenceRecordReade
/** /**
* Reset record reader iterator * Reset record reader iterator
*
* @return
*/ */
@Override @Override
public void reset() { public void reset() {

View File

@ -94,7 +94,7 @@ public class SVMLightRecordWriter extends FileRecordWriter {
@Override @Override
public PartitionMetaData write(List<Writable> record) throws IOException { public PartitionMetaData write(List<Writable> record) throws IOException {
if (!record.isEmpty()) { if (!record.isEmpty()) {
List<Writable> recordList = record instanceof List ? (List<Writable>) record : new ArrayList<>(record); List<Writable> recordList = record instanceof List ? record : new ArrayList<>(record);
/* Infer label columns, if necessary. The default is /* Infer label columns, if necessary. The default is
* to assume that last column is a label and that the * to assume that last column is a label and that the
@ -198,7 +198,7 @@ public class SVMLightRecordWriter extends FileRecordWriter {
} }
// Remove extra label delimiter at beginning // Remove extra label delimiter at beginning
String line = result.substring(1).toString(); String line = result.substring(1);
out.write(line.getBytes()); out.write(line.getBytes());
out.write(NEW_LINE.getBytes()); out.write(NEW_LINE.getBytes());

View File

@ -124,9 +124,7 @@ public abstract class BaseInputSplit implements InputSplit {
for (int i = 0; i < weights.length; i++) { for (int i = 0; i < weights.length; i++) {
List<URI> uris = new ArrayList<>(); List<URI> uris = new ArrayList<>();
for (int j = partitions[i]; j < partitions[i + 1]; j++) { uris.addAll(Arrays.asList(paths).subList(partitions[i], partitions[i + 1]));
uris.add(paths[j]);
}
splits[i] = new CollectionInputSplit(uris); splits[i] = new CollectionInputSplit(uris);
} }
return splits; return splits;

View File

@ -138,7 +138,7 @@ public class FileSplit extends BaseInputSplit {
return addNewLocation(new File(rootDir, UUID.randomUUID().toString()).toURI().toString()); return addNewLocation(new File(rootDir, UUID.randomUUID().toString()).toURI().toString());
else { else {
//add a file in the same directory as the file with the same extension as the original file //add a file in the same directory as the file with the same extension as the original file
return addNewLocation(new File(rootDir.getParent(), UUID.randomUUID().toString() + "." + FilenameUtils.getExtension(rootDir.getAbsolutePath())).toURI().toString()); return addNewLocation(new File(rootDir.getParent(), UUID.randomUUID() + "." + FilenameUtils.getExtension(rootDir.getAbsolutePath())).toURI().toString());
} }
} }

View File

@ -31,7 +31,7 @@ import java.util.Iterator;
public class InputStreamInputSplit implements InputSplit { public class InputStreamInputSplit implements InputSplit {
private InputStream is; private InputStream is;
private URI[] location; private final URI[] location;
/** /**
* Instantiate with the given * Instantiate with the given
@ -130,7 +130,7 @@ public class InputStreamInputSplit implements InputSplit {
public Iterator<String> locationsPathIterator() { public Iterator<String> locationsPathIterator() {
if(location.length >= 1) if(location.length >= 1)
return Collections.singletonList(location[0].getPath()).iterator(); return Collections.singletonList(location[0].getPath()).iterator();
return Arrays.asList("").iterator(); return Collections.singletonList("").iterator();
} }
@Override @Override

View File

@ -33,7 +33,7 @@ import java.util.List;
* has delimited data of some kind. * has delimited data of some kind.
*/ */
public class ListStringSplit implements InputSplit { public class ListStringSplit implements InputSplit {
private List<List<String>> data; private final List<List<String>> data;
public ListStringSplit(List<List<String>> data) { public ListStringSplit(List<List<String>> data) {

View File

@ -43,12 +43,12 @@ public class NumberedFileInputSplit implements InputSplit {
* the index of the file, possibly zero-padded to x digits if the pattern is in the form %0xd. * the index of the file, possibly zero-padded to x digits if the pattern is in the form %0xd.
* @param minIdxInclusive Minimum index/number (starting number in sequence of files, inclusive) * @param minIdxInclusive Minimum index/number (starting number in sequence of files, inclusive)
* @param maxIdxInclusive Maximum index/number (last number in sequence of files, inclusive) * @param maxIdxInclusive Maximum index/number (last number in sequence of files, inclusive)
* @see {NumberedFileInputSplitTest} *
*/ */
public NumberedFileInputSplit(String baseString, int minIdxInclusive, int maxIdxInclusive) { public NumberedFileInputSplit(String baseString, int minIdxInclusive, int maxIdxInclusive) {
Matcher m = p.matcher(baseString); Matcher m = p.matcher(baseString);
if (baseString == null || !m.find()) { if (baseString == null || !m.find()) {
throw new IllegalArgumentException("Base String must match this regular expression: " + p.toString()); throw new IllegalArgumentException("Base String must match this regular expression: " + p);
} }
this.baseString = baseString; this.baseString = baseString;
this.minIdx = minIdxInclusive; this.minIdx = minIdxInclusive;

View File

@ -31,7 +31,7 @@ import java.util.Iterator;
* @author Adam Gibson * @author Adam Gibson
*/ */
public class StringSplit implements InputSplit { public class StringSplit implements InputSplit {
private String data; private final String data;
public StringSplit(String data) { public StringSplit(String data) {
this.data = data; this.data = data;

View File

@ -449,7 +449,7 @@ public class TransformProcess implements Serializable {
/** /**
* Infer the categories for the given record reader for a particular column * Infer the categories for the given record reader for a particular column
* Note that each "column index" is a column in the context of: * Note that each "column index" is a column in the context of:
* List<Writable> record = ...; * {@code List<Writable> record = ...;}
* record.get(columnIndex); * record.get(columnIndex);
* *
* Note that anything passed in as a column will be automatically converted to a * Note that anything passed in as a column will be automatically converted to a
@ -483,7 +483,7 @@ public class TransformProcess implements Serializable {
* if you have more than one column you plan on inferring categories for) * if you have more than one column you plan on inferring categories for)
* *
* Note that each "column index" is a column in the context of: * Note that each "column index" is a column in the context of:
* List<Writable> record = ...; * {@code List<Writable> record = ...;}
* record.get(columnIndex); * record.get(columnIndex);
* *
* *
@ -607,8 +607,8 @@ public class TransformProcess implements Serializable {
*/ */
public static class Builder { public static class Builder {
private List<DataAction> actionList = new ArrayList<>(); private final List<DataAction> actionList = new ArrayList<>();
private Schema initialSchema; private final Schema initialSchema;
public Builder(Schema initialSchema) { public Builder(Schema initialSchema) {
this.initialSchema = initialSchema; this.initialSchema = initialSchema;
@ -1274,7 +1274,7 @@ public class TransformProcess implements Serializable {
* not be modified. * not be modified.
* *
* @param columnName Name of the column in which to do replacement * @param columnName Name of the column in which to do replacement
* @param mapping Map of oldValues -> newValues * @param mapping Map of oldValues -&gt; newValues
*/ */
public Builder stringMapTransform(String columnName, Map<String, String> mapping) { public Builder stringMapTransform(String columnName, Map<String, String> mapping) {
return transform(new StringMapTransform(columnName, mapping)); return transform(new StringMapTransform(columnName, mapping));
@ -1358,7 +1358,8 @@ public class TransformProcess implements Serializable {
* Keys in the map are the regular expressions; the Values in the map are their String replacements. * Keys in the map are the regular expressions; the Values in the map are their String replacements.
* For example: * For example:
* <blockquote> * <blockquote>
* <table cellpadding="2"> * <table>
* <caption></caption>
* <tr> * <tr>
* <th>Original</th> * <th>Original</th>
* <th>Regex</th> * <th>Regex</th>
@ -1378,7 +1379,7 @@ public class TransformProcess implements Serializable {
* <td>BoneConeTone</td> * <td>BoneConeTone</td>
* </tr> * </tr>
* <tr> * <tr>
* <td>'&nbsp&nbsp4.25&nbsp'</td> * <td>'&nbsp;&nbsp;4.25&nbsp;'</td>
* <td>^\\s+|\\s+$</td> * <td>^\\s+|\\s+$</td>
* <td></td> * <td></td>
* <td>'4.25'</td> * <td>'4.25'</td>

View File

@ -55,7 +55,7 @@ public class NDArrayAnalysis implements ColumnAnalysis {
public String toString() { public String toString() {
Map<Integer, Long> sortedCountsByRank = new LinkedHashMap<>(); Map<Integer, Long> sortedCountsByRank = new LinkedHashMap<>();
List<Integer> keys = List<Integer> keys =
new ArrayList<>(countsByRank == null ? Collections.<Integer>emptySet() : countsByRank.keySet()); new ArrayList<>(countsByRank == null ? Collections.emptySet() : countsByRank.keySet());
Collections.sort(keys); Collections.sort(keys);
for (Integer i : keys) { for (Integer i : keys) {
sortedCountsByRank.put(i, countsByRank.get(i)); sortedCountsByRank.put(i, countsByRank.get(i));

View File

@ -101,8 +101,8 @@ public class IntegerAnalysisCounter implements AnalysisCounter<IntegerAnalysisCo
countNegative++; countNegative++;
} }
digest.add((double) value); digest.add(value);
counter.add((double) value); counter.add(value);
return this; return this;
} }

View File

@ -38,7 +38,7 @@ public class NDArrayAnalysisCounter implements AnalysisCounter<NDArrayAnalysisCo
private long minLength = Long.MAX_VALUE; private long minLength = Long.MAX_VALUE;
private long maxLength = -1; private long maxLength = -1;
private long totalNDArrayValues; private long totalNDArrayValues;
private Map<Integer, Long> countsByRank = new HashMap<>(); private final Map<Integer, Long> countsByRank = new HashMap<>();
private double minValue = Double.MAX_VALUE; private double minValue = Double.MAX_VALUE;
private double maxValue = -Double.MAX_VALUE; private double maxValue = -Double.MAX_VALUE;

View File

@ -83,7 +83,7 @@ public class StringAnalysisCounter implements AnalysisCounter<StringAnalysisCoun
countMaxLength = 1; countMaxLength = 1;
} }
counter.add((double) length); counter.add(length);
return this; return this;
} }

View File

@ -27,9 +27,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
public class CategoricalHistogramCounter implements HistogramCounter { public class CategoricalHistogramCounter implements HistogramCounter {
private HashMap<String, Integer> counts = new HashMap<>(); private final HashMap<String, Integer> counts = new HashMap<>();
private List<String> stateNames; private final List<String> stateNames;
public CategoricalHistogramCounter(List<String> stateNames) { public CategoricalHistogramCounter(List<String> stateNames) {
this.stateNames = stateNames; this.stateNames = stateNames;

View File

@ -34,8 +34,8 @@ import java.io.ObjectInputStream;
public class TDigestDeserializer extends JsonDeserializer<TDigest> { public class TDigestDeserializer extends JsonDeserializer<TDigest> {
@Override @Override
public TDigest deserialize(JsonParser jp, DeserializationContext d) throws IOException, JsonProcessingException { public TDigest deserialize(JsonParser jp, DeserializationContext d) throws IOException {
JsonNode node = (JsonNode)jp.getCodec().readTree(jp); JsonNode node = jp.getCodec().readTree(jp);
String field = node.get("digest").asText(); String field = node.get("digest").asText();
Base64 b = new Base64(); Base64 b = new Base64();
byte[] bytes = b.decode(field); byte[] bytes = b.decode(field);

View File

@ -33,7 +33,7 @@ import java.io.ObjectOutputStream;
public class TDigestSerializer extends JsonSerializer<TDigest> { public class TDigestSerializer extends JsonSerializer<TDigest> {
@Override @Override
public void serialize(TDigest td, JsonGenerator j, SerializerProvider sp) throws IOException, JsonProcessingException { public void serialize(TDigest td, JsonGenerator j, SerializerProvider sp) throws IOException {
try(ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos)){ try(ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos)){
oos.writeObject(td); oos.writeObject(td);
oos.close(); oos.close();

View File

@ -29,7 +29,7 @@ import org.datavec.api.writable.Writable;
public class BytesQualityAnalysisState implements QualityAnalysisState<BytesQualityAnalysisState> { public class BytesQualityAnalysisState implements QualityAnalysisState<BytesQualityAnalysisState> {
@Getter @Getter
private BytesQuality bytesQuality; private final BytesQuality bytesQuality;
public BytesQualityAnalysisState() { public BytesQualityAnalysisState() {
this.bytesQuality = new BytesQuality(); this.bytesQuality = new BytesQuality();

View File

@ -31,8 +31,8 @@ public class CategoricalQualityAnalysisState implements QualityAnalysisState<Cat
@Getter @Getter
private CategoricalQuality categoricalQuality; private CategoricalQuality categoricalQuality;
private CategoricalQualityAddFunction addFunction; private final CategoricalQualityAddFunction addFunction;
private CategoricalQualityMergeFunction mergeFunction; private final CategoricalQualityMergeFunction mergeFunction;
public CategoricalQualityAnalysisState(CategoricalMetaData integerMetaData) { public CategoricalQualityAnalysisState(CategoricalMetaData integerMetaData) {
this.categoricalQuality = new CategoricalQuality(); this.categoricalQuality = new CategoricalQuality();

View File

@ -31,8 +31,8 @@ public class IntegerQualityAnalysisState implements QualityAnalysisState<Integer
@Getter @Getter
private IntegerQuality integerQuality; private IntegerQuality integerQuality;
private IntegerQualityAddFunction addFunction; private final IntegerQualityAddFunction addFunction;
private IntegerQualityMergeFunction mergeFunction; private final IntegerQualityMergeFunction mergeFunction;
public IntegerQualityAnalysisState(IntegerMetaData integerMetaData) { public IntegerQualityAnalysisState(IntegerMetaData integerMetaData) {
this.integerQuality = new IntegerQuality(0, 0, 0, 0, 0); this.integerQuality = new IntegerQuality(0, 0, 0, 0, 0);

View File

@ -31,8 +31,8 @@ public class LongQualityAnalysisState implements QualityAnalysisState<LongQualit
@Getter @Getter
private LongQuality longQuality; private LongQuality longQuality;
private LongQualityAddFunction addFunction; private final LongQualityAddFunction addFunction;
private LongQualityMergeFunction mergeFunction; private final LongQualityMergeFunction mergeFunction;
public LongQualityAnalysisState(LongMetaData longMetaData) { public LongQualityAnalysisState(LongMetaData longMetaData) {
this.longQuality = new LongQuality(); this.longQuality = new LongQuality();

View File

@ -31,8 +31,8 @@ public class RealQualityAnalysisState implements QualityAnalysisState<RealQualit
@Getter @Getter
private DoubleQuality realQuality; private DoubleQuality realQuality;
private RealQualityAddFunction addFunction; private final RealQualityAddFunction addFunction;
private RealQualityMergeFunction mergeFunction; private final RealQualityMergeFunction mergeFunction;
public RealQualityAnalysisState(DoubleMetaData realMetaData) { public RealQualityAnalysisState(DoubleMetaData realMetaData) {
this.realQuality = new DoubleQuality(); this.realQuality = new DoubleQuality();

View File

@ -31,8 +31,8 @@ public class StringQualityAnalysisState implements QualityAnalysisState<StringQu
@Getter @Getter
private StringQuality stringQuality; private StringQuality stringQuality;
private StringQualityAddFunction addFunction; private final StringQualityAddFunction addFunction;
private StringQualityMergeFunction mergeFunction; private final StringQualityMergeFunction mergeFunction;
public StringQualityAnalysisState(StringMetaData stringMetaData) { public StringQualityAnalysisState(StringMetaData stringMetaData) {
this.stringQuality = new StringQuality(); this.stringQuality = new StringQuality();

View File

@ -31,8 +31,8 @@ public class TimeQualityAnalysisState implements QualityAnalysisState<TimeQualit
@Getter @Getter
private TimeQuality timeQuality; private TimeQuality timeQuality;
private TimeQualityAddFunction addFunction; private final TimeQualityAddFunction addFunction;
private TimeQualityMergeFunction mergeFunction; private final TimeQualityMergeFunction mergeFunction;
public TimeQualityAnalysisState(TimeMetaData timeMetaData) { public TimeQualityAnalysisState(TimeMetaData timeMetaData) {
this.timeQuality = new TimeQuality(); this.timeQuality = new TimeQuality();

View File

@ -46,12 +46,11 @@ public class SequenceLengthAnalysis implements Serializable {
@Override @Override
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(); String sb = "SequenceLengthAnalysis(" + "totalNumSequences=" + totalNumSequences +
sb.append("SequenceLengthAnalysis(").append("totalNumSequences=").append(totalNumSequences) ",minSeqLength=" + minSeqLength + ",maxSeqLength=" + maxSeqLength +
.append(",minSeqLength=").append(minSeqLength).append(",maxSeqLength=").append(maxSeqLength) ",countZeroLength=" + countZeroLength + ",countOneLength=" +
.append(",countZeroLength=").append(countZeroLength).append(",countOneLength=") countOneLength + ",meanLength=" + meanLength + ")";
.append(countOneLength).append(",meanLength=").append(meanLength).append(")"); return sb;
return sb.toString();
} }
} }

View File

@ -92,7 +92,7 @@ public abstract class BaseColumnCondition implements ColumnCondition {
return false; return false;
case NoSequenceMode: case NoSequenceMode:
throw new IllegalStateException( throw new IllegalStateException(
"Column condition " + toString() + " does not support sequence execution"); "Column condition " + this + " does not support sequence execution");
default: default:
throw new RuntimeException("Unknown/not implemented sequence mode: " + sequenceMode); throw new RuntimeException("Unknown/not implemented sequence mode: " + sequenceMode);
} }
@ -116,7 +116,7 @@ public abstract class BaseColumnCondition implements ColumnCondition {
return false; return false;
case NoSequenceMode: case NoSequenceMode:
throw new IllegalStateException( throw new IllegalStateException(
"Column condition " + toString() + " does not support sequence execution"); "Column condition " + this + " does not support sequence execution");
default: default:
throw new RuntimeException("Unknown/not implemented sequence mode: " + sequenceMode); throw new RuntimeException("Unknown/not implemented sequence mode: " + sequenceMode);
} }

View File

@ -42,7 +42,7 @@ public class DoubleColumnCondition extends BaseColumnCondition {
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE} * Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
* *
* @param columnName Column to check for the condition * @param columnName Column to check for the condition
* @param op Operation (<, >=, !=, etc) * @param op Operation {@code (<, >=, !=, etc)}
* @param value Value to use in the condition * @param value Value to use in the condition
*/ */
public DoubleColumnCondition(String columnName, ConditionOp op, double value) { public DoubleColumnCondition(String columnName, ConditionOp op, double value) {
@ -54,7 +54,7 @@ public class DoubleColumnCondition extends BaseColumnCondition {
* *
* @param column Column to check for the condition * @param column Column to check for the condition
* @param sequenceConditionMode Mode for handling sequence data * @param sequenceConditionMode Mode for handling sequence data
* @param op Operation (<, >=, !=, etc) * @param op Operation {@code (<, >=, !=, etc)}
* @param value Value to use in the condition * @param value Value to use in the condition
*/ */
public DoubleColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op, public DoubleColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op,

View File

@ -42,7 +42,7 @@ public class FloatColumnCondition extends BaseColumnCondition {
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE} * Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
* *
* @param columnName Column to check for the condition * @param columnName Column to check for the condition
* @param op Operation (<, >=, !=, etc) * @param op Operation {@code (<, >=, !=, etc)}
* @param value Value to use in the condition * @param value Value to use in the condition
*/ */
public FloatColumnCondition(String columnName, ConditionOp op, float value) { public FloatColumnCondition(String columnName, ConditionOp op, float value) {
@ -54,7 +54,7 @@ public class FloatColumnCondition extends BaseColumnCondition {
* *
* @param column Column to check for the condition * @param column Column to check for the condition
* @param sequenceConditionMode Mode for handling sequence data * @param sequenceConditionMode Mode for handling sequence data
* @param op Operation (<, >=, !=, etc) * @param op Operation {@code (<, >=, !=, etc)}
* @param value Value to use in the condition * @param value Value to use in the condition
*/ */
public FloatColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op, public FloatColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op,

View File

@ -42,7 +42,7 @@ public class IntegerColumnCondition extends BaseColumnCondition {
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE} * Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
* *
* @param columnName Column to check for the condition * @param columnName Column to check for the condition
* @param op Operation (<, >=, !=, etc) * @param op Operation {@code (<, >=, !=, etc)}
* @param value Value to use in the condition * @param value Value to use in the condition
*/ */
public IntegerColumnCondition(String columnName, ConditionOp op, int value) { public IntegerColumnCondition(String columnName, ConditionOp op, int value) {
@ -54,7 +54,7 @@ public class IntegerColumnCondition extends BaseColumnCondition {
* *
* @param column Column to check for the condition * @param column Column to check for the condition
* @param sequenceConditionMode Mode for handling sequence data * @param sequenceConditionMode Mode for handling sequence data
* @param op Operation (<, >=, !=, etc) * @param op Operation {@code (<, >=, !=, etc)}
* @param value Value to use in the condition * @param value Value to use in the condition
*/ */
public IntegerColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op, public IntegerColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op,

View File

@ -42,7 +42,7 @@ public class LongColumnCondition extends BaseColumnCondition {
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE} * Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
* *
* @param columnName Column to check for the condition * @param columnName Column to check for the condition
* @param op Operation (<, >=, !=, etc) * @param op Operation {@code (<, >=, !=, etc)}
* @param value Value to use in the condition * @param value Value to use in the condition
*/ */
public LongColumnCondition(String columnName, ConditionOp op, long value) { public LongColumnCondition(String columnName, ConditionOp op, long value) {
@ -54,7 +54,7 @@ public class LongColumnCondition extends BaseColumnCondition {
* *
* @param column Column to check for the condition * @param column Column to check for the condition
* @param sequenceConditionMode Mode for handling sequence data * @param sequenceConditionMode Mode for handling sequence data
* @param op Operation (<, >=, !=, etc) * @param op Operation {@code (<, >=, !=, etc)}
* @param value Value to use in the condition * @param value Value to use in the condition
*/ */
public LongColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op, long value) { public LongColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op, long value) {

View File

@ -42,7 +42,7 @@ public class TimeColumnCondition extends BaseColumnCondition {
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE} * Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
* *
* @param columnName Column to check for the condition * @param columnName Column to check for the condition
* @param op Operation (<, >=, !=, etc) * @param op Operation {@code (<, >=, !=, etc)}
* @param value Time value (in epoch millisecond format) to use in the condition * @param value Time value (in epoch millisecond format) to use in the condition
*/ */
public TimeColumnCondition(String columnName, ConditionOp op, long value) { public TimeColumnCondition(String columnName, ConditionOp op, long value) {
@ -54,7 +54,7 @@ public class TimeColumnCondition extends BaseColumnCondition {
* *
* @param column Column to check for the condition * @param column Column to check for the condition
* @param sequenceConditionMode Mode for handling sequence data * @param sequenceConditionMode Mode for handling sequence data
* @param op Operation (<, >=, !=, etc) * @param op Operation {@code (<, >=, !=, etc)}
* @param value Time value (in epoch millisecond format) to use in the condition * @param value Time value (in epoch millisecond format) to use in the condition
*/ */
public TimeColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op, long value) { public TimeColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op, long value) {

View File

@ -111,24 +111,18 @@ public class FilterInvalidValues implements Filter {
private boolean filterColumn(List<?> row, int i) { private boolean filterColumn(List<?> row, int i) {
ColumnMetaData meta = schema.getMetaData(i); ColumnMetaData meta = schema.getMetaData(i);
if (row.get(i) instanceof Float) { if (row.get(i) instanceof Float) {
if (!meta.isValid(new FloatWritable((Float) row.get(i)))) return !meta.isValid(new FloatWritable((Float) row.get(i)));
return true;
} else if (row.get(i) instanceof Double) { } else if (row.get(i) instanceof Double) {
if (!meta.isValid(new DoubleWritable((Double) row.get(i)))) return !meta.isValid(new DoubleWritable((Double) row.get(i)));
return true;
} else if (row.get(i) instanceof String) { } else if (row.get(i) instanceof String) {
if (!meta.isValid(new Text(((String) row.get(i)).toString()))) return !meta.isValid(new Text(((String) row.get(i))));
return true;
} else if (row.get(i) instanceof Integer) { } else if (row.get(i) instanceof Integer) {
if (!meta.isValid(new IntWritable((Integer) row.get(i)))) return !meta.isValid(new IntWritable((Integer) row.get(i)));
return true;
} else if (row.get(i) instanceof Long) { } else if (row.get(i) instanceof Long) {
if (!meta.isValid(new LongWritable((Long) row.get(i)))) return !meta.isValid(new LongWritable((Long) row.get(i)));
return true;
} else if (row.get(i) instanceof Boolean) { } else if (row.get(i) instanceof Boolean) {
if (!meta.isValid(new BooleanWritable((Boolean) row.get(i)))) return !meta.isValid(new BooleanWritable((Boolean) row.get(i)));
return true;
} }
return false; return false;
} }

View File

@ -96,7 +96,7 @@ public class Join implements Serializable {
public static class Builder { public static class Builder {
private JoinType joinType; private final JoinType joinType;
private Schema leftSchema; private Schema leftSchema;
private Schema rightSchema; private Schema rightSchema;
private String[] joinColumnsLeft; private String[] joinColumnsLeft;

View File

@ -84,9 +84,8 @@ public class BinaryMetaData extends BaseColumnMetaData {
@Override @Override
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(); String sb = "BinaryMetaData(name=\"" + name + "\"," +
sb.append("BinaryMetaData(name=\"").append(name).append("\","); ")";
sb.append(")"); return sb;
return sb.toString();
} }
} }

View File

@ -84,9 +84,8 @@ public class BooleanMetaData extends BaseColumnMetaData {
@Override @Override
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(); String sb = "BooleanMetaData(name=\"" + name + "\"," +
sb.append("BooleanMetaData(name=\"").append(name).append("\","); ")";
sb.append(")"); return sb;
return sb.toString();
} }
} }

View File

@ -84,10 +84,7 @@ public class DoubleMetaData extends BaseColumnMetaData {
if (minAllowedValue != null && d < minAllowedValue) if (minAllowedValue != null && d < minAllowedValue)
return false; return false;
if (maxAllowedValue != null && d > maxAllowedValue) return maxAllowedValue == null || !(d > maxAllowedValue);
return false;
return true;
} }
/** /**
@ -115,10 +112,7 @@ public class DoubleMetaData extends BaseColumnMetaData {
if (minAllowedValue != null && d < minAllowedValue) if (minAllowedValue != null && d < minAllowedValue)
return false; return false;
if (maxAllowedValue != null && d > maxAllowedValue) return maxAllowedValue == null || !(d > maxAllowedValue);
return false;
return true;
} }
@Override @Override

View File

@ -84,10 +84,7 @@ public class FloatMetaData extends BaseColumnMetaData {
if (minAllowedValue != null && d < minAllowedValue) if (minAllowedValue != null && d < minAllowedValue)
return false; return false;
if (maxAllowedValue != null && d > maxAllowedValue) return maxAllowedValue == null || d <= maxAllowedValue;
return false;
return true;
} }
/** /**
@ -115,10 +112,7 @@ public class FloatMetaData extends BaseColumnMetaData {
if (minAllowedValue != null && d < minAllowedValue) if (minAllowedValue != null && d < minAllowedValue)
return false; return false;
if (maxAllowedValue != null && d > maxAllowedValue) return maxAllowedValue == null || d <= maxAllowedValue;
return false;
return true;
} }
@Override @Override

View File

@ -65,9 +65,7 @@ public class IntegerMetaData extends BaseColumnMetaData {
if (minAllowedValue != null && value < minAllowedValue) if (minAllowedValue != null && value < minAllowedValue)
return false; return false;
if (maxAllowedValue != null && value > maxAllowedValue) return maxAllowedValue == null || value <= maxAllowedValue;
return false;
return true;
} }
/** /**
@ -90,9 +88,7 @@ public class IntegerMetaData extends BaseColumnMetaData {
if (minAllowedValue != null && value < minAllowedValue) if (minAllowedValue != null && value < minAllowedValue)
return false; return false;
if (maxAllowedValue != null && value > maxAllowedValue) return maxAllowedValue == null || value <= maxAllowedValue;
return false;
return true;
} }
@Override @Override

View File

@ -66,10 +66,7 @@ public class LongMetaData extends BaseColumnMetaData {
} }
if (minAllowedValue != null && value < minAllowedValue) if (minAllowedValue != null && value < minAllowedValue)
return false; return false;
if (maxAllowedValue != null && value > maxAllowedValue) return maxAllowedValue == null || value <= maxAllowedValue;
return false;
return true;
} }
/** /**
@ -92,10 +89,7 @@ public class LongMetaData extends BaseColumnMetaData {
if (minAllowedValue != null && value < minAllowedValue) if (minAllowedValue != null && value < minAllowedValue)
return false; return false;
if (maxAllowedValue != null && value > maxAllowedValue) return maxAllowedValue == null || value <= maxAllowedValue;
return false;
return true;
} }
@Override @Override

View File

@ -97,9 +97,9 @@ public class AggregatorImpls {
} else if (a instanceof Float || b instanceof Float) { } else if (a instanceof Float || b instanceof Float) {
return new Float(a.floatValue() + b.floatValue()); return new Float(a.floatValue() + b.floatValue());
} else if (a instanceof Long || b instanceof Long) { } else if (a instanceof Long || b instanceof Long) {
return new Long(a.longValue() + b.longValue()); return Long.valueOf(a.longValue() + b.longValue());
} else { } else {
return new Integer(a.intValue() + b.intValue()); return Integer.valueOf(a.intValue() + b.intValue());
} }
} }
@ -146,9 +146,9 @@ public class AggregatorImpls {
} else if (a instanceof Float || b instanceof Float) { } else if (a instanceof Float || b instanceof Float) {
return new Float(a.floatValue() * b.floatValue()); return new Float(a.floatValue() * b.floatValue());
} else if (a instanceof Long || b instanceof Long) { } else if (a instanceof Long || b instanceof Long) {
return new Long(a.longValue() * b.longValue()); return Long.valueOf(a.longValue() * b.longValue());
} else { } else {
return new Integer(a.intValue() * b.intValue()); return Integer.valueOf(a.intValue() * b.intValue());
} }
} }
@ -347,7 +347,7 @@ public class AggregatorImpls {
* of the square root of the arithmetic mean of squared differences to the mean, corrected with Bessel's correction. * of the square root of the arithmetic mean of squared differences to the mean, corrected with Bessel's correction.
* *
* See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a> * See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a>
* This is computed with Welford's method for increased numerical stability & aggregability. * This is computed with Welford's method for increased numerical stability &amp; aggregability.
*/ */
public static class AggregableStdDev<T extends Number> implements IAggregableReduceOp<T, Writable> { public static class AggregableStdDev<T extends Number> implements IAggregableReduceOp<T, Writable> {
@ -402,7 +402,7 @@ public class AggregatorImpls {
* of the square root of the arithmetic mean of squared differences to the mean. * of the square root of the arithmetic mean of squared differences to the mean.
* *
* See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a> * See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a>
* This is computed with Welford's method for increased numerical stability & aggregability. * This is computed with Welford's method for increased numerical stability &amp; aggregability.
*/ */
public static class AggregableUncorrectedStdDev<T extends Number> extends AggregableStdDev<T> { public static class AggregableUncorrectedStdDev<T extends Number> extends AggregableStdDev<T> {
@ -418,7 +418,7 @@ public class AggregatorImpls {
* of the arithmetic mean of squared differences to the mean, corrected with Bessel's correction. * of the arithmetic mean of squared differences to the mean, corrected with Bessel's correction.
* *
* See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a> * See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a>
* This is computed with Welford's method for increased numerical stability & aggregability. * This is computed with Welford's method for increased numerical stability &amp; aggregability.
*/ */
public static class AggregableVariance<T extends Number> implements IAggregableReduceOp<T, Writable> { public static class AggregableVariance<T extends Number> implements IAggregableReduceOp<T, Writable> {
@ -474,7 +474,7 @@ public class AggregatorImpls {
* of the arithmetic mean of squared differences to the mean. * of the arithmetic mean of squared differences to the mean.
* *
* See <a href="https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance">https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance</a> * See <a href="https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance">https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance</a>
* This is computed with Welford's method for increased numerical stability & aggregability. * This is computed with Welford's method for increased numerical stability &amp; aggregability.
*/ */
public static class AggregablePopulationVariance<T extends Number> extends AggregableVariance<T> { public static class AggregablePopulationVariance<T extends Number> extends AggregableVariance<T> {
@ -491,7 +491,7 @@ public class AggregatorImpls {
* <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>. * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
* *
* The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting
* a nonzero `sp > p` in HyperLogLogPlus(p, sp) would trigger sparse * a nonzero `sp &gt; p` in HyperLogLogPlus(p, sp) would trigger sparse
* representation of registers, which may reduce the memory consumption * representation of registers, which may reduce the memory consumption
* and increase accuracy when the cardinality is small. * and increase accuracy when the cardinality is small.
* @param <T> * @param <T>
@ -501,7 +501,7 @@ public class AggregatorImpls {
private float p = 0.05f; private float p = 0.05f;
@Getter @Getter
private HyperLogLogPlus hll = new HyperLogLogPlus((int) Math.ceil(2.0 * Math.log(1.054 / p) / Math.log(2)), 0); private final HyperLogLogPlus hll = new HyperLogLogPlus((int) Math.ceil(2.0 * Math.log(1.054 / p) / Math.log(2)), 0);
public AggregableCountUnique(float precision) { public AggregableCountUnique(float precision) {
this.p = precision; this.p = precision;

View File

@ -36,7 +36,7 @@ public class DispatchWithConditionOp<U> extends DispatchOp<Writable, U>
@Getter @Getter
@NonNull @NonNull
private List<Condition> conditions; private final List<Condition> conditions;
public DispatchWithConditionOp(List<IAggregableReduceOp<Writable, List<U>>> ops, List<Condition> conds) { public DispatchWithConditionOp(List<IAggregableReduceOp<Writable, List<U>>> ops, List<Condition> conds) {

View File

@ -37,14 +37,13 @@ public interface AggregableColumnReduction extends Serializable, ColumnOp {
* and NOT the single row * and NOT the single row
* (as is usually the case for {@code List<Writable>} instances * (as is usually the case for {@code List<Writable>} instances
* *
* @param columnData The Writable objects for a column
* @return Writable containing the reduced data * @return Writable containing the reduced data
*/ */
IAggregableReduceOp<Writable, List<Writable>> reduceOp(); IAggregableReduceOp<Writable, List<Writable>> reduceOp();
/** /**
* Post-reduce: what is the name of the column? * Post-reduce: what is the name of the column?
* For example, "myColumn" -> "mean(myColumn)" * For example, "myColumn" -&gt; "mean(myColumn)"
* *
* @param columnInputName Name of the column before reduction * @param columnInputName Name of the column before reduction
* @return Name of the column after the reduction * @return Name of the column after the reduction

View File

@ -43,7 +43,7 @@ public interface ColumnReduction extends Serializable, ColumnOp {
/** /**
* Post-reduce: what is the name of the column? * Post-reduce: what is the name of the column?
* For example, "myColumn" -> "mean(myColumn)" * For example, "myColumn" -&gt; "mean(myColumn)"
* *
* @param columnInputName Name of the column before reduction * @param columnInputName Name of the column before reduction
* @return Name of the column after the reduction * @return Name of the column after the reduction

View File

@ -291,11 +291,11 @@ public class Reducer implements IAssociativeReducer {
public static class Builder { public static class Builder {
private ReduceOp defaultOp; private final ReduceOp defaultOp;
private Map<String, List<ReduceOp>> opMap = new HashMap<>(); private final Map<String, List<ReduceOp>> opMap = new HashMap<>();
private Map<String, AggregableColumnReduction> customReductions = new HashMap<>(); private final Map<String, AggregableColumnReduction> customReductions = new HashMap<>();
private Map<String, ConditionalReduction> conditionalReductions = new HashMap<>(); private final Map<String, ConditionalReduction> conditionalReductions = new HashMap<>();
private Set<String> ignoreInvalidInColumns = new HashSet<>(); private final Set<String> ignoreInvalidInColumns = new HashSet<>();
private String[] keyColumns; private String[] keyColumns;
@ -480,7 +480,6 @@ public class Reducer implements IAssociativeReducer {
* ignored/excluded. * ignored/excluded.
* *
* @param column Name of the column to execute the conditional reduction on * @param column Name of the column to execute the conditional reduction on
* @param outputName Name of the column, after the reduction has been executed
* @param reductions Reductions to execute * @param reductions Reductions to execute
* @param condition Condition to use in the reductions * @param condition Condition to use in the reductions
*/ */
@ -500,7 +499,6 @@ public class Reducer implements IAssociativeReducer {
* *
* @param column Name of the column to execute the conditional reduction on * @param column Name of the column to execute the conditional reduction on
* @param outputName Name of the column, after the reduction has been executed * @param outputName Name of the column, after the reduction has been executed
* @param reductions Reductions to execute
* @param condition Condition to use in the reductions * @param condition Condition to use in the reductions
*/ */
public Builder conditionalReduction(String column, String outputName, ReduceOp reduction, Condition condition) { public Builder conditionalReduction(String column, String outputName, ReduceOp reduction, Condition condition) {

View File

@ -69,7 +69,7 @@ public class GeographicMidpointReduction implements AggregableColumnReduction {
@Override @Override
public List<ColumnMetaData> getColumnOutputMetaData(List<String> newColumnName, ColumnMetaData columnInputMeta) { public List<ColumnMetaData> getColumnOutputMetaData(List<String> newColumnName, ColumnMetaData columnInputMeta) {
return Collections.<ColumnMetaData>singletonList(new StringMetaData(newColumnName.get(0))); return Collections.singletonList(new StringMetaData(newColumnName.get(0)));
} }
@Override @Override
@ -111,7 +111,7 @@ public class GeographicMidpointReduction implements AggregableColumnReduction {
public static class AverageCoordinateReduceOp implements IAggregableReduceOp<Writable, List<Writable>> { public static class AverageCoordinateReduceOp implements IAggregableReduceOp<Writable, List<Writable>> {
private static final double PI_180 = Math.PI / 180.0; private static final double PI_180 = Math.PI / 180.0;
private String delim; private final String delim;
private double sumx; private double sumx;
private double sumy; private double sumy;
@ -186,7 +186,7 @@ public class GeographicMidpointReduction implements AggregableColumnReduction {
Preconditions.checkState(!Double.isNaN(longDeg), "Final longitude is NaN"); Preconditions.checkState(!Double.isNaN(longDeg), "Final longitude is NaN");
String str = latDeg + delim + longDeg; String str = latDeg + delim + longDeg;
return Collections.<Writable>singletonList(new Text(str)); return Collections.singletonList(new Text(str));
} }
} }
} }

View File

@ -24,7 +24,7 @@ import org.datavec.api.writable.Writable;
public class TypeConversion { public class TypeConversion {
private static TypeConversion SINGLETON = new TypeConversion(); private static final TypeConversion SINGLETON = new TypeConversion();
private TypeConversion() {} private TypeConversion() {}

View File

@ -44,7 +44,7 @@ public class SplitMaxLengthSequence implements SequenceSplit {
/** /**
* @param maxSequenceLength max length of sequences * @param maxSequenceLength max length of sequences
* @param equalSplits if true: split larger sequences into equal sized subsequences. If false: split into * @param equalSplits if true: split larger sequences into equal sized subsequences. If false: split into
* n maxSequenceLength sequences, and (if necessary) 1 with 1 <= length < maxSequenceLength * n maxSequenceLength sequences, and (if necessary) 1 with 1 &lt;= length &lt; maxSequenceLength
*/ */
public SplitMaxLengthSequence(@JsonProperty("maxSequenceLength") int maxSequenceLength, public SplitMaxLengthSequence(@JsonProperty("maxSequenceLength") int maxSequenceLength,
@JsonProperty("equalSplits") boolean equalSplits) { @JsonProperty("equalSplits") boolean equalSplits) {

View File

@ -295,7 +295,7 @@ public abstract class BaseSerializer {
/** /**
* Deserialize an IStringReducer List serialized using {@link #serializeReducerList(List)}, or * Deserialize an IStringReducer List serialized using {@link #serializeReducerList(List)}, or
* an array serialized using {@link #serialize(IReducer[])} * an array serialized using {@code #serialize(IReducer[])}
* *
* @param str String representation (YAML/JSON) of the IStringReducer list * @param str String representation (YAML/JSON) of the IStringReducer list
* @return {@code List<IStringReducer>} * @return {@code List<IStringReducer>}

View File

@ -34,8 +34,8 @@ import com.fasterxml.jackson.datatype.joda.JodaModule;
@Slf4j @Slf4j
public class JsonMappers { public class JsonMappers {
private static ObjectMapper jsonMapper; private static final ObjectMapper jsonMapper;
private static ObjectMapper yamlMapper; private static final ObjectMapper yamlMapper;
private static ObjectMapper legacyMapper; //For 1.0.0-alpha and earlier TransformProcess etc private static ObjectMapper legacyMapper; //For 1.0.0-alpha and earlier TransformProcess etc
static { static {

View File

@ -24,7 +24,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
public class JsonSerializer extends BaseSerializer { public class JsonSerializer extends BaseSerializer {
private ObjectMapper om; private final ObjectMapper om;
public JsonSerializer() { public JsonSerializer() {
this.om = JsonMappers.getMapper(); this.om = JsonMappers.getMapper();

View File

@ -37,7 +37,7 @@ public class ListWrappers {
@Getter @Getter
public static class TransformList { public static class TransformList {
private List<Transform> list; private final List<Transform> list;
public TransformList(@JsonProperty("list") List<Transform> list) { public TransformList(@JsonProperty("list") List<Transform> list) {
this.list = list; this.list = list;
@ -46,7 +46,7 @@ public class ListWrappers {
@Getter @Getter
public static class FilterList { public static class FilterList {
private List<Filter> list; private final List<Filter> list;
public FilterList(@JsonProperty("list") List<Filter> list) { public FilterList(@JsonProperty("list") List<Filter> list) {
this.list = list; this.list = list;
@ -55,7 +55,7 @@ public class ListWrappers {
@Getter @Getter
public static class ConditionList { public static class ConditionList {
private List<Condition> list; private final List<Condition> list;
public ConditionList(@JsonProperty("list") List<Condition> list) { public ConditionList(@JsonProperty("list") List<Condition> list) {
this.list = list; this.list = list;
@ -64,7 +64,7 @@ public class ListWrappers {
@Getter @Getter
public static class ReducerList { public static class ReducerList {
private List<IAssociativeReducer> list; private final List<IAssociativeReducer> list;
public ReducerList(@JsonProperty("list") List<IAssociativeReducer> list) { public ReducerList(@JsonProperty("list") List<IAssociativeReducer> list) {
this.list = list; this.list = list;
@ -73,7 +73,7 @@ public class ListWrappers {
@Getter @Getter
public static class SequenceComparatorList { public static class SequenceComparatorList {
private List<SequenceComparator> list; private final List<SequenceComparator> list;
public SequenceComparatorList(@JsonProperty("list") List<SequenceComparator> list) { public SequenceComparatorList(@JsonProperty("list") List<SequenceComparator> list) {
this.list = list; this.list = list;
@ -82,7 +82,7 @@ public class ListWrappers {
@Getter @Getter
public static class DataActionList { public static class DataActionList {
private List<DataAction> list; private final List<DataAction> list;
public DataActionList(@JsonProperty("list") List<DataAction> list) { public DataActionList(@JsonProperty("list") List<DataAction> list) {
this.list = list; this.list = list;

View File

@ -24,7 +24,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
public class YamlSerializer extends BaseSerializer { public class YamlSerializer extends BaseSerializer {
private ObjectMapper om; private final ObjectMapper om;
public YamlSerializer() { public YamlSerializer() {
this.om = JsonMappers.getMapperYaml(); this.om = JsonMappers.getMapperYaml();

View File

@ -177,10 +177,10 @@ public class StringReducer implements IStringReducer {
public static class Builder { public static class Builder {
private StringReduceOp defaultOp; private final StringReduceOp defaultOp;
private Map<String, StringReduceOp> opMap = new HashMap<>(); private final Map<String, StringReduceOp> opMap = new HashMap<>();
private Map<String, ColumnReduction> customReductions = new HashMap<>(); private final Map<String, ColumnReduction> customReductions = new HashMap<>();
private Set<String> ignoreInvalidInColumns = new HashSet<>(); private final Set<String> ignoreInvalidInColumns = new HashSet<>();
private String outputColumnName; private String outputColumnName;
private List<String> inputColumns; private List<String> inputColumns;

View File

@ -80,7 +80,7 @@ public abstract class BaseColumnTransform extends BaseTransform implements Colum
if (writables.size() != inputSchema.numColumns()) { if (writables.size() != inputSchema.numColumns()) {
throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size() throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size()
+ ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns() + ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns()
+ "). Transform = " + toString()); + "). Transform = " + this);
} }
int n = writables.size(); int n = writables.size();
List<Writable> out = new ArrayList<>(n); List<Writable> out = new ArrayList<>(n);

View File

@ -96,7 +96,7 @@ public class CategoricalToIntegerTransform extends BaseTransform {
if (writables.size() != inputSchema.numColumns()) { if (writables.size() != inputSchema.numColumns()) {
throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size() throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size()
+ ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns() + ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns()
+ "). Transform = " + toString()); + "). Transform = " + this);
} }
int idx = getColumnIdx(); int idx = getColumnIdx();

View File

@ -123,7 +123,7 @@ public class CategoricalToOneHotTransform extends BaseTransform {
if (writables.size() != inputSchema.numColumns()) { if (writables.size() != inputSchema.numColumns()) {
throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size() throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size()
+ ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns() + ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns()
+ "). Transform = " + toString()); + "). Transform = " + this);
} }
int idx = getColumnIdx(); int idx = getColumnIdx();

View File

@ -89,7 +89,7 @@ public class IntegerToCategoricalTransform extends BaseColumnTransform {
IntegerToCategoricalTransform o2 = (IntegerToCategoricalTransform) o; IntegerToCategoricalTransform o2 = (IntegerToCategoricalTransform) o;
return map != null ? map.equals(o2.map) : o2.map == null; return Objects.equals(map, o2.map);
} }

Some files were not shown because too many files have changed in this diff Show More