Fix javadoc and cleanup
parent
5c98c5e1ed
commit
07c052d822
|
@ -139,7 +139,6 @@ public class BrianTest /*extends BaseDL4JTest*/ {
|
|||
//.setExecutorEnv("spark.executor.cores", "2")
|
||||
//.setExecutorEnv("spark.executor.memory", "2g")
|
||||
//.set("spark.submit.deployMode", "client")
|
||||
;
|
||||
|
||||
/*
|
||||
SparkSession spark = SparkSession
|
||||
|
@ -240,7 +239,7 @@ public class BrianTest /*extends BaseDL4JTest*/ {
|
|||
*/
|
||||
TransformProcess tp = new TransformProcess.Builder(inputSchema)
|
||||
.removeAllColumnsExceptFor("country_code", "lat", "lon")
|
||||
.stringToCategorical("country_code", Arrays.asList(new String[] {"GR", "FR", "DE", "CH"}))
|
||||
.stringToCategorical("country_code", Arrays.asList("GR", "FR", "DE", "CH"))
|
||||
.filter(new FilterInvalidValues())
|
||||
.categoricalToOneHot("country_code")
|
||||
.build();
|
||||
|
|
|
@ -225,7 +225,7 @@ public class BrianTest2 /*extends BaseDL4JTest*/ {
|
|||
*/
|
||||
TransformProcess tp = new TransformProcess.Builder(inputSchema)
|
||||
.removeAllColumnsExceptFor("country_code", "lat", "lon")
|
||||
.stringToCategorical("country_code", Arrays.asList(new String[] {"GR", "FR", "DE", "CH"}))
|
||||
.stringToCategorical("country_code", Arrays.asList("GR", "FR", "DE", "CH"))
|
||||
.filter(new FilterInvalidValues())
|
||||
.categoricalToOneHot("country_code")
|
||||
.build();
|
||||
|
|
|
@ -91,10 +91,10 @@ public class IntegrationTestRunner {
|
|||
|
||||
public static final double MAX_REL_ERROR_SCORES = 1e-4;
|
||||
|
||||
private static List<Class<?>> layerClasses = new ArrayList<>();
|
||||
private static List<Class<?>> preprocClasses = new ArrayList<>();
|
||||
private static List<Class<?>> graphVertexClasses = new ArrayList<>();
|
||||
private static List<Class<?>> evaluationClasses = new ArrayList<>();
|
||||
private static final List<Class<?>> layerClasses = new ArrayList<>();
|
||||
private static final List<Class<?>> preprocClasses = new ArrayList<>();
|
||||
private static final List<Class<?>> graphVertexClasses = new ArrayList<>();
|
||||
private static final List<Class<?>> evaluationClasses = new ArrayList<>();
|
||||
|
||||
private static Map<Class<?>, Integer> layerConfClassesSeen = new HashMap<>();
|
||||
private static Map<Class<?>, Integer> preprocessorConfClassesSeen = new HashMap<>();
|
||||
|
|
|
@ -67,8 +67,8 @@ public class CNN1DTestCases {
|
|||
testOverfitting = false;
|
||||
}
|
||||
|
||||
int miniBatchSize = 16;
|
||||
int exampleLength = 128;
|
||||
final int miniBatchSize = 16;
|
||||
final int exampleLength = 128;
|
||||
|
||||
@Override
|
||||
public ModelType modelType() {
|
||||
|
|
|
@ -271,11 +271,11 @@ public class CNN2DTestCases {
|
|||
public static TestCase getYoloHouseNumbers() {
|
||||
return new TestCase() {
|
||||
|
||||
private int width = 416;
|
||||
private int height = 416;
|
||||
private int nChannels = 3;
|
||||
private int gridWidth = 13;
|
||||
private int gridHeight = 13;
|
||||
private final int width = 416;
|
||||
private final int height = 416;
|
||||
private final int nChannels = 3;
|
||||
private final int gridWidth = 13;
|
||||
private final int gridHeight = 13;
|
||||
|
||||
{
|
||||
testName = "YOLOHouseNumbers";
|
||||
|
|
|
@ -108,7 +108,7 @@ public class CNN3DTestCases {
|
|||
public MultiDataSet getGradientsTestData() throws Exception {
|
||||
Nd4j.getRandom().setSeed(12345);
|
||||
//NCDHW format
|
||||
INDArray arr = Nd4j.rand(new int[]{2, 3, 8, 8, 8});
|
||||
INDArray arr = Nd4j.rand(2, 3, 8, 8, 8);
|
||||
INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10);
|
||||
return new org.nd4j.linalg.dataset.MultiDataSet(arr, labels);
|
||||
}
|
||||
|
@ -135,6 +135,6 @@ public class CNN3DTestCases {
|
|||
}
|
||||
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -93,8 +93,8 @@ public class RNNTestCases {
|
|||
minAbsErrorParamsPostTraining = 2e-3;
|
||||
}
|
||||
|
||||
private int miniBatchSize = 32;
|
||||
private int exampleLength = 200;
|
||||
private final int miniBatchSize = 32;
|
||||
private final int exampleLength = 200;
|
||||
|
||||
|
||||
@Override
|
||||
|
|
|
@ -31,23 +31,24 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.util.*;
|
||||
|
||||
public class CharacterIterator implements DataSetIterator {
|
||||
//Valid characters
|
||||
private char[] validCharacters;
|
||||
private final char[] validCharacters;
|
||||
//Maps each character to an index ind the input/output
|
||||
private Map<Character, Integer> charToIdxMap;
|
||||
private final Map<Character, Integer> charToIdxMap;
|
||||
//All characters of the input file (after filtering to only those that are valid
|
||||
private char[] fileCharacters;
|
||||
private final char[] fileCharacters;
|
||||
//Length of each example/minibatch (number of characters)
|
||||
private int exampleLength;
|
||||
private final int exampleLength;
|
||||
//Size of each minibatch (number of examples)
|
||||
private int miniBatchSize;
|
||||
private Random rng;
|
||||
private final int miniBatchSize;
|
||||
private final Random rng;
|
||||
//Offsets for the start of each example
|
||||
private LinkedList<Integer> exampleStartOffsets = new LinkedList<>();
|
||||
private final LinkedList<Integer> exampleStartOffsets = new LinkedList<>();
|
||||
|
||||
/**
|
||||
* @param textFilePath Path to text file to use for generating samples
|
||||
|
@ -299,7 +300,7 @@ public class CharacterIterator implements DataSetIterator {
|
|||
if (!f.exists()) throw new IOException("File does not exist: " + fileLocation); //Download problem?
|
||||
|
||||
char[] validCharacters = CharacterIterator.getMinimalCharacterSet(); //Which characters are allowed? Others will be removed
|
||||
return new CharacterIterator(fileLocation, Charset.forName("UTF-8"),
|
||||
return new CharacterIterator(fileLocation, StandardCharsets.UTF_8,
|
||||
miniBatchSize, sequenceLength, validCharacters, new Random(12345));
|
||||
}
|
||||
|
||||
|
|
|
@ -305,7 +305,7 @@ public class SameDiffCNNCases {
|
|||
// [minibatch,8,1,1,1]
|
||||
|
||||
|
||||
int channels_height_width_depth = 8 * 1 * 1 * 1;
|
||||
int channels_height_width_depth = 8;
|
||||
|
||||
SDVariable layer1_reshaped = layer1.reshape(-1, channels_height_width_depth);
|
||||
|
||||
|
@ -331,7 +331,7 @@ public class SameDiffCNNCases {
|
|||
public Map<String,INDArray> getGradientsTestDataSameDiff() throws Exception {
|
||||
Nd4j.getRandom().setSeed(12345);
|
||||
//NCDHW format
|
||||
INDArray arr = Nd4j.rand(new int[]{2, 3, 8, 8, 8});
|
||||
INDArray arr = Nd4j.rand(2, 3, 8, 8, 8);
|
||||
INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10);
|
||||
|
||||
Map<String, INDArray> map = new HashMap<>();
|
||||
|
@ -357,7 +357,7 @@ public class SameDiffCNNCases {
|
|||
Nd4j.getRandom().setSeed(12345);
|
||||
|
||||
List<Map<String, INDArray>> list = new ArrayList<>();
|
||||
INDArray arr = Nd4j.rand(new int[]{2, 3, 8, 8, 8});
|
||||
INDArray arr = Nd4j.rand(2, 3, 8, 8, 8);
|
||||
|
||||
list.add(Collections.singletonMap("in", arr));
|
||||
|
||||
|
@ -368,7 +368,7 @@ public class SameDiffCNNCases {
|
|||
public MultiDataSet getGradientsTestData() throws Exception {
|
||||
Nd4j.getRandom().setSeed(12345);
|
||||
//NCDHW format
|
||||
INDArray arr = Nd4j.rand(new int[]{2, 3, 8, 8, 8});
|
||||
INDArray arr = Nd4j.rand(2, 3, 8, 8, 8);
|
||||
INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10);
|
||||
return new org.nd4j.linalg.dataset.MultiDataSet(arr, labels);
|
||||
}
|
||||
|
|
|
@ -130,3 +130,19 @@ echo "nameserver 8.8.8.8" | sudo tee -a /etc/resolv.conf
|
|||
|
||||
-P\<xxx>\
|
||||
CAVIS_AVX_EXTENSION = {avx2 | avx512}, default is avx2
|
||||
|
||||
# Zeppelin Spark dependencies #
|
||||
3
|
||||
|
||||
|
||||
To add the dependency to the language models, use the following format in the Dependencies section of the of the Spark Interpreter configuration (Interpreters -> Spark -> Edit -> Dependencies):
|
||||
|
||||
groupId:artifactId:packaging:classifier:version
|
||||
|
||||
In your case it should work with
|
||||
|
||||
edu.stanford.nlp:stanford-corenlp:jar:models:3.8.0
|
||||
|
||||
|
||||
Native cpu code under linux needs libc6-dev
|
||||
/lib/x86_64-linux-gnu/libm.so.6: version `GLIBC_2.29' not found
|
|
@ -266,7 +266,7 @@ public class Configuration implements Iterable<Map.Entry<String, String>>, Writa
|
|||
reloadConfiguration();
|
||||
}
|
||||
|
||||
private static Pattern varPat = Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
|
||||
private static final Pattern varPat = Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
|
||||
|
||||
private String substituteVars(String expr) {
|
||||
if (expr == null) {
|
||||
|
@ -555,7 +555,7 @@ public class Configuration implements Iterable<Map.Entry<String, String>>, Writa
|
|||
}
|
||||
|
||||
/**
|
||||
* Get the value of the <code>name</code> property as a <ocde>Pattern</code>.
|
||||
* Get the value of the <code>name</code> property as a {@code Pattern}.
|
||||
* If no such property is specified, or if the specified value is not a valid
|
||||
* <code>Pattern</code>, then <code>DefaultValue</code> is returned.
|
||||
*
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.datavec.api.records.writer.RecordWriter;
|
|||
|
||||
public interface OutputFormat {
|
||||
|
||||
public static final String OUTPUT_PATH = "org.nd4j.outputpath";
|
||||
String OUTPUT_PATH = "org.nd4j.outputpath";
|
||||
|
||||
/**
|
||||
* Create a record writer
|
||||
|
|
|
@ -34,7 +34,7 @@ public abstract class BinaryComparable implements Comparable<BinaryComparable> {
|
|||
|
||||
/**
|
||||
* Compare bytes from {#getBytes()}.
|
||||
* @see org.apache.hadoop.io.WritableComparator#compareBytes(byte[],int,int,byte[],int,int)
|
||||
* {@code org.apache.hadoop.io.WritableComparator#compareBytes(byte[], int, int, byte[], int, int)}
|
||||
*/
|
||||
public int compareTo(BinaryComparable other) {
|
||||
if (this == other)
|
||||
|
@ -63,7 +63,7 @@ public abstract class BinaryComparable implements Comparable<BinaryComparable> {
|
|||
|
||||
/**
|
||||
* Return a hash of the bytes returned from {#getBytes()}.
|
||||
* @see org.apache.hadoop.io.WritableComparator#hashBytes(byte[],int)
|
||||
* {@code org.apache.hadoop.io.WritableComparator#hashBytes(byte[],int)}
|
||||
*/
|
||||
public int hashCode() {
|
||||
return WritableComparator.hashBytes(getBytes(), getLength());
|
||||
|
|
|
@ -50,7 +50,7 @@ public class DataInputBuffer extends DataInputStream {
|
|||
}
|
||||
}
|
||||
|
||||
private Buffer buffer;
|
||||
private final Buffer buffer;
|
||||
|
||||
/** Constructs a new empty buffer. */
|
||||
public DataInputBuffer() {
|
||||
|
|
|
@ -44,7 +44,7 @@ public class DataOutputBuffer extends DataOutputStream {
|
|||
public void write(DataInput in, int len) throws IOException {
|
||||
int newcount = count + len;
|
||||
if (newcount > buf.length) {
|
||||
byte newbuf[] = new byte[Math.max(buf.length << 1, newcount)];
|
||||
byte[] newbuf = new byte[Math.max(buf.length << 1, newcount)];
|
||||
System.arraycopy(buf, 0, newbuf, 0, count);
|
||||
buf = newbuf;
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ public class DataOutputBuffer extends DataOutputStream {
|
|||
}
|
||||
}
|
||||
|
||||
private Buffer buffer;
|
||||
private final Buffer buffer;
|
||||
|
||||
/** Constructs a new empty buffer. */
|
||||
public DataOutputBuffer() {
|
||||
|
|
|
@ -25,6 +25,6 @@ import java.util.Comparator;
|
|||
|
||||
public interface RawComparator<T> extends Comparator<T> {
|
||||
|
||||
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2);
|
||||
int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2);
|
||||
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ import java.util.HashMap;
|
|||
|
||||
public class WritableComparator implements RawComparator {
|
||||
|
||||
private static HashMap<Class, WritableComparator> comparators = new HashMap<>(); // registry
|
||||
private static final HashMap<Class, WritableComparator> comparators = new HashMap<>(); // registry
|
||||
|
||||
/** Get a comparator for a {@link WritableComparable} implementation. */
|
||||
public static synchronized WritableComparator get(Class<? extends WritableComparable> c) {
|
||||
|
|
|
@ -229,7 +229,7 @@ public final class WritableUtils {
|
|||
|
||||
/**
|
||||
* Serializes an integer to a binary stream with zero-compressed encoding.
|
||||
* For -120 <= i <= 127, only one byte is used with the actual value.
|
||||
* For -120 <= i <= 127, only one byte is used with the actual value.
|
||||
* For other values of i, the first byte value indicates whether the
|
||||
* integer is positive or negative, and the number of bytes that follow.
|
||||
* If the first byte value v is between -121 and -124, the following integer
|
||||
|
@ -248,7 +248,7 @@ public final class WritableUtils {
|
|||
|
||||
/**
|
||||
* Serializes a long to a binary stream with zero-compressed encoding.
|
||||
* For -112 <= i <= 127, only one byte is used with the actual value.
|
||||
* For -112 <= i lt;= 127, only one byte is used with the actual value.
|
||||
* For other values of i, the first byte value indicates whether the
|
||||
* long is positive or negative, and the number of bytes that follow.
|
||||
* If the first byte value v is between -113 and -120, the following long
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.datavec.api.writable.Writable;
|
|||
import java.util.List;
|
||||
|
||||
public class LabelWriterConverter implements WritableConverter {
|
||||
private List<String> labels;
|
||||
private final List<String> labels;
|
||||
|
||||
public LabelWriterConverter(List<String> labels) {
|
||||
this.labels = labels;
|
||||
|
|
|
@ -35,7 +35,7 @@ public interface PathLabelGenerator extends Serializable {
|
|||
* If true: infer the set of possible label classes, and convert these to integer indexes. If when true, the
|
||||
* returned Writables should be text writables.<br>
|
||||
* <br>
|
||||
* For regression use cases (or PathLabelGenerator classification instances that do their own label -> integer
|
||||
* For regression use cases (or PathLabelGenerator classification instances that do their own label -> integer
|
||||
* assignment), this should return false.
|
||||
*
|
||||
* @return whether label classes should be inferred
|
||||
|
|
|
@ -35,7 +35,7 @@ public class SerializationFactory extends Configured {
|
|||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(SerializationFactory.class.getName());
|
||||
|
||||
private List<Serialization<?>> serializations = new ArrayList<>();
|
||||
private final List<Serialization<?>> serializations = new ArrayList<>();
|
||||
|
||||
/**
|
||||
* <p>
|
||||
|
@ -47,7 +47,7 @@ public class SerializationFactory extends Configured {
|
|||
public SerializationFactory(Configuration conf) {
|
||||
super(conf);
|
||||
for (String serializerName : conf.getStrings("io.serializations",
|
||||
new String[] {"org.apache.hadoop.io.serializer.WritableSerialization"})) {
|
||||
"org.apache.hadoop.io.serializer.WritableSerialization")) {
|
||||
add(conf, serializerName);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -113,7 +113,7 @@ public class Buffer implements Comparable, Cloneable {
|
|||
|
||||
/**
|
||||
* Change the capacity of the backing storage.
|
||||
* The data is preserved if newCapacity >= getCount().
|
||||
* The data is preserved if newCapacity >= getCount().
|
||||
* @param newCapacity The new capacity in bytes.
|
||||
*/
|
||||
public void setCapacity(int newCapacity) {
|
||||
|
|
|
@ -209,9 +209,7 @@ public class IOUtils {
|
|||
* @return
|
||||
*/
|
||||
static String toCSVBuffer(Buffer buf) {
|
||||
StringBuilder sb = new StringBuilder("#");
|
||||
sb.append(buf.toString());
|
||||
return sb.toString();
|
||||
return "#" + buf.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -441,7 +439,7 @@ public class IOUtils {
|
|||
|
||||
/**
|
||||
* Serializes a long to a binary stream with zero-compressed encoding.
|
||||
* For -112 <= i <= 127, only one byte is used with the actual value.
|
||||
* For -112 <= i <= 127, only one byte is used with the actual value.
|
||||
* For other values of i, the first byte value indicates whether the
|
||||
* long is positive or negative, and the number of bytes that follow.
|
||||
* If the first byte value v is between -113 and -120, the following long
|
||||
|
|
|
@ -99,8 +99,6 @@ public interface RecordReader extends AutoCloseable, Serializable, Configurable
|
|||
|
||||
/**
|
||||
* Reset record reader iterator
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
void reset();
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ import java.util.List;
|
|||
*/
|
||||
public class ComposableRecordReader extends BaseRecordReader {
|
||||
|
||||
private RecordReader[] readers;
|
||||
private final RecordReader[] readers;
|
||||
|
||||
public ComposableRecordReader(RecordReader... readers) {
|
||||
this.readers = readers;
|
||||
|
|
|
@ -35,7 +35,7 @@ import java.util.List;
|
|||
|
||||
public class ConcatenatingRecordReader extends BaseRecordReader {
|
||||
|
||||
private RecordReader[] readers;
|
||||
private final RecordReader[] readers;
|
||||
|
||||
public ConcatenatingRecordReader(RecordReader... readers) {
|
||||
this.readers = readers;
|
||||
|
|
|
@ -23,14 +23,14 @@ package org.datavec.api.records.reader.impl;
|
|||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.datavec.api.conf.Configuration;
|
||||
import org.datavec.api.Record;
|
||||
import org.datavec.api.records.Record;
|
||||
import org.datavec.api.records.metadata.RecordMetaData;
|
||||
import org.datavec.api.records.metadata.RecordMetaDataURI;
|
||||
import org.datavec.api.records.reader.BaseRecordReader;
|
||||
import org.datavec.api.split.InputSplit;
|
||||
import org.datavec.api.writable.IntWritable;
|
||||
import org.datavec.api.writable.Text;
|
||||
import org.datavec.api.Writable;
|
||||
import org.datavec.api.writable.Writable;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.URI;
|
||||
|
@ -40,6 +40,8 @@ import java.util.*;
|
|||
|
||||
/**
|
||||
* File reader/writer
|
||||
*
|
||||
* @author Adam Gibson
|
||||
*/
|
||||
public class FileRecordReader extends BaseRecordReader {
|
||||
|
||||
|
@ -48,12 +50,10 @@ public class FileRecordReader extends BaseRecordReader {
|
|||
protected URI currentUri;
|
||||
protected List<String> labels;
|
||||
protected boolean appendLabel = false;
|
||||
@Getter
|
||||
@Setter
|
||||
@Getter @Setter
|
||||
protected String charset = StandardCharsets.UTF_8.name(); //Using String as StandardCharsets.UTF_8 is not serializable
|
||||
|
||||
public FileRecordReader() {
|
||||
}
|
||||
public FileRecordReader() {}
|
||||
|
||||
@Override
|
||||
public void initialize(InputSplit split) throws IOException, InterruptedException {
|
||||
|
@ -68,9 +68,9 @@ public class FileRecordReader extends BaseRecordReader {
|
|||
URI[] locations = split.locations();
|
||||
if (locations.length > 0) {
|
||||
Set<String> labels = new HashSet<>();
|
||||
for (URI u : locations) {
|
||||
for(URI u : locations){
|
||||
String[] pathSplit = u.toString().split("[/\\\\]");
|
||||
labels.add(pathSplit[pathSplit.length - 2]);
|
||||
labels.add(pathSplit[pathSplit.length-2]);
|
||||
}
|
||||
this.labels = new ArrayList<>(labels);
|
||||
Collections.sort(this.labels);
|
||||
|
@ -80,8 +80,7 @@ public class FileRecordReader extends BaseRecordReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void initialize(Configuration conf, InputSplit split)
|
||||
throws IOException, InterruptedException {
|
||||
public void initialize(Configuration conf, InputSplit split) throws IOException, InterruptedException {
|
||||
appendLabel = conf.getBoolean(APPEND_LABEL, true);
|
||||
doInitialize(split);
|
||||
this.inputSplit = split;
|
||||
|
@ -96,7 +95,7 @@ public class FileRecordReader extends BaseRecordReader {
|
|||
private List<Writable> loadFromStream(URI uri, InputStream next, Charset charset) {
|
||||
List<Writable> ret = new ArrayList<>();
|
||||
try {
|
||||
if (!(next instanceof BufferedInputStream)) {
|
||||
if(!(next instanceof BufferedInputStream)){
|
||||
next = new BufferedInputStream(next);
|
||||
}
|
||||
String s = org.apache.commons.io.IOUtils.toString(next, charset);
|
||||
|
@ -112,21 +111,21 @@ public class FileRecordReader extends BaseRecordReader {
|
|||
}
|
||||
|
||||
/**
|
||||
* Return the current label. The index of the current file's parent directory in the label list
|
||||
*
|
||||
* Return the current label.
|
||||
* The index of the current file's parent directory
|
||||
* in the label list
|
||||
* @return The index of the current file's parent directory
|
||||
*/
|
||||
public int getCurrentLabel() {
|
||||
return getLabel(currentUri);
|
||||
}
|
||||
|
||||
public int getLabel(URI uri) {
|
||||
public int getLabel(URI uri){
|
||||
String s = uri.toString();
|
||||
int lastIdx = Math.max(s.lastIndexOf('/'),
|
||||
s.lastIndexOf('\\')); //Note: if neither are found, -1 is fine here
|
||||
int lastIdx = Math.max(s.lastIndexOf('/'), s.lastIndexOf('\\')); //Note: if neither are found, -1 is fine here
|
||||
String sub = s.substring(0, lastIdx);
|
||||
int secondLastIdx = Math.max(sub.lastIndexOf('/'), sub.lastIndexOf('\\'));
|
||||
String name = s.substring(secondLastIdx + 1, lastIdx);
|
||||
String name = s.substring(secondLastIdx+1, lastIdx);
|
||||
return labels.indexOf(name);
|
||||
}
|
||||
|
||||
|
@ -168,12 +167,10 @@ public class FileRecordReader extends BaseRecordReader {
|
|||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
if (inputSplit == null) {
|
||||
if (inputSplit == null)
|
||||
throw new UnsupportedOperationException("Cannot reset without first initializing");
|
||||
}
|
||||
try {
|
||||
doInitialize(inputSplit);
|
||||
} catch (Exception e) {
|
||||
|
@ -183,7 +180,7 @@ public class FileRecordReader extends BaseRecordReader {
|
|||
|
||||
@Override
|
||||
public boolean resetSupported() {
|
||||
if (inputSplit != null) {
|
||||
if(inputSplit != null){
|
||||
return inputSplit.resetSupported();
|
||||
}
|
||||
return false; //reset() throws exception on reset() if inputSplit is null
|
||||
|
@ -208,14 +205,13 @@ public class FileRecordReader extends BaseRecordReader {
|
|||
invokeListeners(next);
|
||||
|
||||
List<Writable> ret;
|
||||
try (InputStream s = streamCreatorFn.apply(next)) {
|
||||
try(InputStream s = streamCreatorFn.apply(next)) {
|
||||
ret = loadFromStream(next, s, Charset.forName(charset));
|
||||
} catch (IOException e) {
|
||||
} catch (IOException e){
|
||||
throw new RuntimeException("Error reading from stream for URI: " + next);
|
||||
}
|
||||
|
||||
return new org.datavec.api.records.impl.Record(ret,
|
||||
new RecordMetaDataURI(next, FileRecordReader.class));
|
||||
return new org.datavec.api.records.impl.Record(ret,new RecordMetaDataURI(next, FileRecordReader.class));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -231,9 +227,9 @@ public class FileRecordReader extends BaseRecordReader {
|
|||
URI uri = meta.getURI();
|
||||
|
||||
List<Writable> list;
|
||||
try (InputStream s = streamCreatorFn.apply(uri)) {
|
||||
try(InputStream s = streamCreatorFn.apply(uri)) {
|
||||
list = loadFromStream(uri, s, Charset.forName(charset));
|
||||
} catch (IOException e) {
|
||||
} catch (IOException e){
|
||||
throw new RuntimeException("Error reading from stream for URI: " + uri);
|
||||
}
|
||||
|
||||
|
|
|
@ -200,7 +200,7 @@ public class LineRecordReader extends BaseRecordReader {
|
|||
//Here: we are reading a single line from the DataInputStream
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(dataInputStream));
|
||||
String line = br.readLine();
|
||||
return Collections.singletonList((Writable) new Text(line));
|
||||
return Collections.singletonList(new Text(line));
|
||||
}
|
||||
|
||||
protected Iterator<String> getIterator(int location) {
|
||||
|
@ -265,7 +265,7 @@ public class LineRecordReader extends BaseRecordReader {
|
|||
throw new IllegalArgumentException(
|
||||
"Invalid metadata; expected RecordMetaDataLine instance; got: " + rmd);
|
||||
}
|
||||
list.add(new Triple<>(count++, (RecordMetaDataLine) rmd, (List<Writable>) null));
|
||||
list.add(new Triple<>(count++, (RecordMetaDataLine) rmd, null));
|
||||
if (rmd.getURI() != null)
|
||||
uris.add(rmd.getURI());
|
||||
}
|
||||
|
@ -332,7 +332,7 @@ public class LineRecordReader extends BaseRecordReader {
|
|||
throw new IllegalStateException("Could not get line " + nextLineIdx + " from URI " + currentURI
|
||||
+ ": has only " + currentLineIdx + " lines");
|
||||
}
|
||||
t.setThird(Collections.<Writable>singletonList(new Text(line)));
|
||||
t.setThird(Collections.singletonList(new Text(line)));
|
||||
}
|
||||
} else {
|
||||
//Not URI based: String split, etc
|
||||
|
@ -347,7 +347,7 @@ public class LineRecordReader extends BaseRecordReader {
|
|||
line = iterator.next();
|
||||
currentLineIdx++;
|
||||
}
|
||||
t.setThird(Collections.<Writable>singletonList(new Text(line)));
|
||||
t.setThird(Collections.singletonList(new Text(line)));
|
||||
}
|
||||
closeIfRequired(iterator);
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ public class CollectionSequenceRecordReader extends BaseRecordReader implements
|
|||
|
||||
/**
|
||||
*
|
||||
* @param records Collection of sequences. For example, List<List<List<Writable>>> where the inner two lists
|
||||
* @param records Collection of sequences. For example, {@code List<List<List<Writable>>>} where the inner two lists
|
||||
* are a sequence, and the outer list/collection is a list of sequences
|
||||
*/
|
||||
public CollectionSequenceRecordReader(Collection<? extends Collection<? extends Collection<Writable>>> records) {
|
||||
|
|
|
@ -45,9 +45,9 @@ public class CSVMultiSequenceRecordReader extends CSVRecordReader implements Seq
|
|||
PAD
|
||||
}
|
||||
|
||||
private String sequenceSeparatorRegex;
|
||||
private Mode mode;
|
||||
private Writable padValue;
|
||||
private final String sequenceSeparatorRegex;
|
||||
private final Mode mode;
|
||||
private final Writable padValue;
|
||||
|
||||
/**
|
||||
* Create a sequence reader using the default value for skip lines (0), the default delimiter (',') and the default
|
||||
|
|
|
@ -41,7 +41,7 @@ public class CSVNLinesSequenceRecordReader extends CSVRecordReader implements Se
|
|||
public static final String LINES_PER_SEQUENCE = NAME_SPACE + ".nlinespersequence";
|
||||
|
||||
private int nLinesPerSequence;
|
||||
private String delimiter;
|
||||
private final String delimiter;
|
||||
|
||||
/**
|
||||
* No-arg constructor with the default number of lines per sequence (10)
|
||||
|
@ -124,7 +124,7 @@ public class CSVNLinesSequenceRecordReader extends CSVRecordReader implements Se
|
|||
"Invalid metadata; expected RecordMetaDataLineInterval instance; got: " + rmd);
|
||||
}
|
||||
list.add(new Triple<>(count++, (RecordMetaDataLineInterval) rmd,
|
||||
(List<List<Writable>>) new ArrayList<List<Writable>>()));
|
||||
new ArrayList<List<Writable>>()));
|
||||
}
|
||||
|
||||
//Sort by starting line number:
|
||||
|
|
|
@ -39,8 +39,8 @@ public class CSVVariableSlidingWindowRecordReader extends CSVRecordReader implem
|
|||
public static final String LINES_PER_SEQUENCE = NAME_SPACE + ".nlinespersequence";
|
||||
|
||||
private int maxLinesPerSequence;
|
||||
private String delimiter;
|
||||
private int stride;
|
||||
private final String delimiter;
|
||||
private final int stride;
|
||||
private LinkedList<List<Writable>> queue;
|
||||
private boolean exhausted;
|
||||
|
||||
|
@ -60,7 +60,7 @@ public class CSVVariableSlidingWindowRecordReader extends CSVRecordReader implem
|
|||
|
||||
/**
|
||||
* @param maxLinesPerSequence Number of lines in each sequence, use default delemiter(,) between entries in the same line
|
||||
* @param stride Number of lines between records (increment window > 1 line)
|
||||
* @param stride Number of lines between records (increment window > 1 line)
|
||||
*/
|
||||
public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int stride) {
|
||||
this(maxLinesPerSequence, 0, stride, String.valueOf(CSVRecordReader.DEFAULT_DELIMITER));
|
||||
|
@ -68,7 +68,7 @@ public class CSVVariableSlidingWindowRecordReader extends CSVRecordReader implem
|
|||
|
||||
/**
|
||||
* @param maxLinesPerSequence Number of lines in each sequence, use default delemiter(,) between entries in the same line
|
||||
* @param stride Number of lines between records (increment window > 1 line)
|
||||
* @param stride Number of lines between records (increment window > 1 line)
|
||||
*/
|
||||
public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int stride, String delimiter) {
|
||||
this(maxLinesPerSequence, 0, stride, String.valueOf(CSVRecordReader.DEFAULT_DELIMITER));
|
||||
|
@ -78,7 +78,7 @@ public class CSVVariableSlidingWindowRecordReader extends CSVRecordReader implem
|
|||
*
|
||||
* @param maxLinesPerSequence Number of lines in each sequences
|
||||
* @param skipNumLines Number of lines to skip at the start of the file (only skipped once, not per sequence)
|
||||
* @param stride Number of lines between records (increment window > 1 line)
|
||||
* @param stride Number of lines between records (increment window > 1 line)
|
||||
* @param delimiter Delimiter between entries in the same line, for example ","
|
||||
*/
|
||||
public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int skipNumLines, int stride, String delimiter) {
|
||||
|
|
|
@ -302,7 +302,7 @@ public class SerializableCSVParser implements Serializable {
|
|||
}
|
||||
|
||||
/**
|
||||
* precondition: sb.length() > 0
|
||||
* precondition: sb.length() > 0
|
||||
*
|
||||
* @param sb A sequence of characters to examine
|
||||
* @return true if every character in the sequence is whitespace
|
||||
|
|
|
@ -114,8 +114,6 @@ public class InMemoryRecordReader implements RecordReader {
|
|||
|
||||
/**
|
||||
* Reset record reader iterator
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public void reset() {
|
||||
|
|
|
@ -195,8 +195,6 @@ public class InMemorySequenceRecordReader implements SequenceRecordReader {
|
|||
|
||||
/**
|
||||
* Reset record reader iterator
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public void reset() {
|
||||
|
|
|
@ -31,8 +31,8 @@ public class FieldSelection implements Serializable {
|
|||
|
||||
public static final Writable DEFAULT_MISSING_VALUE = new Text("");
|
||||
|
||||
private List<String[]> fieldPaths;
|
||||
private List<Writable> valueIfMissing;
|
||||
private final List<String[]> fieldPaths;
|
||||
private final List<Writable> valueIfMissing;
|
||||
|
||||
private FieldSelection(Builder builder) {
|
||||
this.fieldPaths = builder.fieldPaths;
|
||||
|
@ -53,8 +53,8 @@ public class FieldSelection implements Serializable {
|
|||
|
||||
public static class Builder {
|
||||
|
||||
private List<String[]> fieldPaths = new ArrayList<>();
|
||||
private List<Writable> valueIfMissing = new ArrayList<>();
|
||||
private final List<String[]> fieldPaths = new ArrayList<>();
|
||||
private final List<Writable> valueIfMissing = new ArrayList<>();
|
||||
|
||||
|
||||
/**
|
||||
|
|
|
@ -29,8 +29,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
|
||||
public class JacksonLineRecordReader extends LineRecordReader {
|
||||
|
||||
private FieldSelection selection;
|
||||
private ObjectMapper mapper;
|
||||
private final FieldSelection selection;
|
||||
private final ObjectMapper mapper;
|
||||
|
||||
public JacksonLineRecordReader(FieldSelection selection, ObjectMapper mapper) {
|
||||
this.selection = selection;
|
||||
|
|
|
@ -39,8 +39,8 @@ import java.util.NoSuchElementException;
|
|||
|
||||
public class JacksonLineSequenceRecordReader extends FileRecordReader implements SequenceRecordReader {
|
||||
|
||||
private FieldSelection selection;
|
||||
private ObjectMapper mapper;
|
||||
private final FieldSelection selection;
|
||||
private final ObjectMapper mapper;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
|
@ -45,12 +45,12 @@ public class JacksonRecordReader extends BaseRecordReader {
|
|||
|
||||
private static final TypeReference<Map<String, Object>> typeRef = new TypeReference<Map<String, Object>>() {};
|
||||
|
||||
private FieldSelection selection;
|
||||
private ObjectMapper mapper;
|
||||
private boolean shuffle;
|
||||
private long rngSeed;
|
||||
private PathLabelGenerator labelGenerator;
|
||||
private int labelPosition;
|
||||
private final FieldSelection selection;
|
||||
private final ObjectMapper mapper;
|
||||
private final boolean shuffle;
|
||||
private final long rngSeed;
|
||||
private final PathLabelGenerator labelGenerator;
|
||||
private final int labelPosition;
|
||||
private InputSplit is;
|
||||
private Random r;
|
||||
@Getter @Setter
|
||||
|
|
|
@ -35,7 +35,7 @@ import java.util.List;
|
|||
|
||||
public class MatlabRecordReader extends FileRecordReader {
|
||||
|
||||
private List<List<Writable>> records = new ArrayList<>();
|
||||
private final List<List<Writable>> records = new ArrayList<>();
|
||||
private Iterator<List<Writable>> currIter;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -96,8 +96,6 @@ public class SVMLightRecordReader extends LineRecordReader {
|
|||
* Set configuration.
|
||||
*
|
||||
* @param conf DataVec configuration
|
||||
* @throws IOException
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
@Override
|
||||
public void setConf(Configuration conf) {
|
||||
|
@ -181,7 +179,7 @@ public class SVMLightRecordReader extends LineRecordReader {
|
|||
if (index < 0)
|
||||
throw new NumberFormatException("");
|
||||
} catch (NumberFormatException e) {
|
||||
String msg = String.format("Feature index must be positive integer (found %s)", featureTokens[i].toString());
|
||||
String msg = String.format("Feature index must be positive integer (found %s)", featureTokens[i]);
|
||||
throw new NumberFormatException(msg);
|
||||
}
|
||||
|
||||
|
@ -218,7 +216,7 @@ public class SVMLightRecordReader extends LineRecordReader {
|
|||
if (index < 0)
|
||||
throw new NumberFormatException("");
|
||||
} catch (NumberFormatException e) {
|
||||
String msg = String.format("Multilabel index must be positive integer (found %s)", labelTokens[i].toString());
|
||||
String msg = String.format("Multilabel index must be positive integer (found %s)", labelTokens[i]);
|
||||
throw new NumberFormatException(msg);
|
||||
}
|
||||
|
||||
|
|
|
@ -41,11 +41,11 @@ import java.util.regex.Pattern;
|
|||
public class RegexLineRecordReader extends LineRecordReader {
|
||||
public final static String SKIP_NUM_LINES = NAME_SPACE + ".skipnumlines";
|
||||
|
||||
private String regex;
|
||||
private final String regex;
|
||||
private int skipNumLines;
|
||||
private Pattern pattern;
|
||||
private final Pattern pattern;
|
||||
private int numLinesSkipped;
|
||||
private int currLine = 0;
|
||||
private final int currLine = 0;
|
||||
|
||||
public RegexLineRecordReader(String regex, int skipNumLines) {
|
||||
this.regex = regex;
|
||||
|
|
|
@ -61,11 +61,11 @@ public class RegexSequenceRecordReader extends FileRecordReader implements Seque
|
|||
|
||||
public static final Logger LOG = LoggerFactory.getLogger(RegexSequenceRecordReader.class);
|
||||
|
||||
private String regex;
|
||||
private final String regex;
|
||||
private int skipNumLines;
|
||||
private Pattern pattern;
|
||||
private final Pattern pattern;
|
||||
private transient Charset charset;
|
||||
private LineErrorHandling errorHandling;
|
||||
private final LineErrorHandling errorHandling;
|
||||
|
||||
public RegexSequenceRecordReader(String regex, int skipNumLines) {
|
||||
this(regex, skipNumLines, DEFAULT_CHARSET, DEFAULT_ERROR_HANDLING);
|
||||
|
@ -92,7 +92,7 @@ public class RegexSequenceRecordReader extends FileRecordReader implements Seque
|
|||
|
||||
@Override
|
||||
public List<List<Writable>> sequenceRecord(URI uri, DataInputStream dataInputStream) throws IOException {
|
||||
String fileContents = IOUtils.toString(new BufferedInputStream(dataInputStream), charset.name());
|
||||
String fileContents = IOUtils.toString(new BufferedInputStream(dataInputStream), charset);
|
||||
return loadSequence(fileContents, uri);
|
||||
}
|
||||
|
||||
|
|
|
@ -145,8 +145,6 @@ public class TransformProcessRecordReader implements RecordReader {
|
|||
|
||||
/**
|
||||
* Reset record reader iterator
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public void reset() {
|
||||
|
|
|
@ -195,8 +195,6 @@ public class TransformProcessSequenceRecordReader implements SequenceRecordReade
|
|||
|
||||
/**
|
||||
* Reset record reader iterator
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public void reset() {
|
||||
|
|
|
@ -94,7 +94,7 @@ public class SVMLightRecordWriter extends FileRecordWriter {
|
|||
@Override
|
||||
public PartitionMetaData write(List<Writable> record) throws IOException {
|
||||
if (!record.isEmpty()) {
|
||||
List<Writable> recordList = record instanceof List ? (List<Writable>) record : new ArrayList<>(record);
|
||||
List<Writable> recordList = record instanceof List ? record : new ArrayList<>(record);
|
||||
|
||||
/* Infer label columns, if necessary. The default is
|
||||
* to assume that last column is a label and that the
|
||||
|
@ -198,7 +198,7 @@ public class SVMLightRecordWriter extends FileRecordWriter {
|
|||
}
|
||||
|
||||
// Remove extra label delimiter at beginning
|
||||
String line = result.substring(1).toString();
|
||||
String line = result.substring(1);
|
||||
out.write(line.getBytes());
|
||||
out.write(NEW_LINE.getBytes());
|
||||
|
||||
|
|
|
@ -124,9 +124,7 @@ public abstract class BaseInputSplit implements InputSplit {
|
|||
|
||||
for (int i = 0; i < weights.length; i++) {
|
||||
List<URI> uris = new ArrayList<>();
|
||||
for (int j = partitions[i]; j < partitions[i + 1]; j++) {
|
||||
uris.add(paths[j]);
|
||||
}
|
||||
uris.addAll(Arrays.asList(paths).subList(partitions[i], partitions[i + 1]));
|
||||
splits[i] = new CollectionInputSplit(uris);
|
||||
}
|
||||
return splits;
|
||||
|
|
|
@ -138,7 +138,7 @@ public class FileSplit extends BaseInputSplit {
|
|||
return addNewLocation(new File(rootDir, UUID.randomUUID().toString()).toURI().toString());
|
||||
else {
|
||||
//add a file in the same directory as the file with the same extension as the original file
|
||||
return addNewLocation(new File(rootDir.getParent(), UUID.randomUUID().toString() + "." + FilenameUtils.getExtension(rootDir.getAbsolutePath())).toURI().toString());
|
||||
return addNewLocation(new File(rootDir.getParent(), UUID.randomUUID() + "." + FilenameUtils.getExtension(rootDir.getAbsolutePath())).toURI().toString());
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ import java.util.Iterator;
|
|||
|
||||
public class InputStreamInputSplit implements InputSplit {
|
||||
private InputStream is;
|
||||
private URI[] location;
|
||||
private final URI[] location;
|
||||
|
||||
/**
|
||||
* Instantiate with the given
|
||||
|
@ -130,7 +130,7 @@ public class InputStreamInputSplit implements InputSplit {
|
|||
public Iterator<String> locationsPathIterator() {
|
||||
if(location.length >= 1)
|
||||
return Collections.singletonList(location[0].getPath()).iterator();
|
||||
return Arrays.asList("").iterator();
|
||||
return Collections.singletonList("").iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -33,7 +33,7 @@ import java.util.List;
|
|||
* has delimited data of some kind.
|
||||
*/
|
||||
public class ListStringSplit implements InputSplit {
|
||||
private List<List<String>> data;
|
||||
private final List<List<String>> data;
|
||||
|
||||
|
||||
public ListStringSplit(List<List<String>> data) {
|
||||
|
|
|
@ -43,12 +43,12 @@ public class NumberedFileInputSplit implements InputSplit {
|
|||
* the index of the file, possibly zero-padded to x digits if the pattern is in the form %0xd.
|
||||
* @param minIdxInclusive Minimum index/number (starting number in sequence of files, inclusive)
|
||||
* @param maxIdxInclusive Maximum index/number (last number in sequence of files, inclusive)
|
||||
* @see {NumberedFileInputSplitTest}
|
||||
*
|
||||
*/
|
||||
public NumberedFileInputSplit(String baseString, int minIdxInclusive, int maxIdxInclusive) {
|
||||
Matcher m = p.matcher(baseString);
|
||||
if (baseString == null || !m.find()) {
|
||||
throw new IllegalArgumentException("Base String must match this regular expression: " + p.toString());
|
||||
throw new IllegalArgumentException("Base String must match this regular expression: " + p);
|
||||
}
|
||||
this.baseString = baseString;
|
||||
this.minIdx = minIdxInclusive;
|
||||
|
|
|
@ -31,7 +31,7 @@ import java.util.Iterator;
|
|||
* @author Adam Gibson
|
||||
*/
|
||||
public class StringSplit implements InputSplit {
|
||||
private String data;
|
||||
private final String data;
|
||||
|
||||
public StringSplit(String data) {
|
||||
this.data = data;
|
||||
|
|
|
@ -449,7 +449,7 @@ public class TransformProcess implements Serializable {
|
|||
/**
|
||||
* Infer the categories for the given record reader for a particular column
|
||||
* Note that each "column index" is a column in the context of:
|
||||
* List<Writable> record = ...;
|
||||
* {@code List<Writable> record = ...;}
|
||||
* record.get(columnIndex);
|
||||
*
|
||||
* Note that anything passed in as a column will be automatically converted to a
|
||||
|
@ -483,7 +483,7 @@ public class TransformProcess implements Serializable {
|
|||
* if you have more than one column you plan on inferring categories for)
|
||||
*
|
||||
* Note that each "column index" is a column in the context of:
|
||||
* List<Writable> record = ...;
|
||||
* {@code List<Writable> record = ...;}
|
||||
* record.get(columnIndex);
|
||||
*
|
||||
*
|
||||
|
@ -607,8 +607,8 @@ public class TransformProcess implements Serializable {
|
|||
*/
|
||||
public static class Builder {
|
||||
|
||||
private List<DataAction> actionList = new ArrayList<>();
|
||||
private Schema initialSchema;
|
||||
private final List<DataAction> actionList = new ArrayList<>();
|
||||
private final Schema initialSchema;
|
||||
|
||||
public Builder(Schema initialSchema) {
|
||||
this.initialSchema = initialSchema;
|
||||
|
@ -1274,7 +1274,7 @@ public class TransformProcess implements Serializable {
|
|||
* not be modified.
|
||||
*
|
||||
* @param columnName Name of the column in which to do replacement
|
||||
* @param mapping Map of oldValues -> newValues
|
||||
* @param mapping Map of oldValues -> newValues
|
||||
*/
|
||||
public Builder stringMapTransform(String columnName, Map<String, String> mapping) {
|
||||
return transform(new StringMapTransform(columnName, mapping));
|
||||
|
@ -1358,7 +1358,8 @@ public class TransformProcess implements Serializable {
|
|||
* Keys in the map are the regular expressions; the Values in the map are their String replacements.
|
||||
* For example:
|
||||
* <blockquote>
|
||||
* <table cellpadding="2">
|
||||
* <table>
|
||||
* <caption></caption>
|
||||
* <tr>
|
||||
* <th>Original</th>
|
||||
* <th>Regex</th>
|
||||
|
@ -1378,7 +1379,7 @@ public class TransformProcess implements Serializable {
|
|||
* <td>BoneConeTone</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>'  4.25 '</td>
|
||||
* <td>' 4.25 '</td>
|
||||
* <td>^\\s+|\\s+$</td>
|
||||
* <td></td>
|
||||
* <td>'4.25'</td>
|
||||
|
|
|
@ -55,7 +55,7 @@ public class NDArrayAnalysis implements ColumnAnalysis {
|
|||
public String toString() {
|
||||
Map<Integer, Long> sortedCountsByRank = new LinkedHashMap<>();
|
||||
List<Integer> keys =
|
||||
new ArrayList<>(countsByRank == null ? Collections.<Integer>emptySet() : countsByRank.keySet());
|
||||
new ArrayList<>(countsByRank == null ? Collections.emptySet() : countsByRank.keySet());
|
||||
Collections.sort(keys);
|
||||
for (Integer i : keys) {
|
||||
sortedCountsByRank.put(i, countsByRank.get(i));
|
||||
|
|
|
@ -101,8 +101,8 @@ public class IntegerAnalysisCounter implements AnalysisCounter<IntegerAnalysisCo
|
|||
countNegative++;
|
||||
}
|
||||
|
||||
digest.add((double) value);
|
||||
counter.add((double) value);
|
||||
digest.add(value);
|
||||
counter.add(value);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ public class NDArrayAnalysisCounter implements AnalysisCounter<NDArrayAnalysisCo
|
|||
private long minLength = Long.MAX_VALUE;
|
||||
private long maxLength = -1;
|
||||
private long totalNDArrayValues;
|
||||
private Map<Integer, Long> countsByRank = new HashMap<>();
|
||||
private final Map<Integer, Long> countsByRank = new HashMap<>();
|
||||
private double minValue = Double.MAX_VALUE;
|
||||
private double maxValue = -Double.MAX_VALUE;
|
||||
|
||||
|
|
|
@ -83,7 +83,7 @@ public class StringAnalysisCounter implements AnalysisCounter<StringAnalysisCoun
|
|||
countMaxLength = 1;
|
||||
}
|
||||
|
||||
counter.add((double) length);
|
||||
counter.add(length);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
|
|
@ -27,9 +27,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
public class CategoricalHistogramCounter implements HistogramCounter {
|
||||
private HashMap<String, Integer> counts = new HashMap<>();
|
||||
private final HashMap<String, Integer> counts = new HashMap<>();
|
||||
|
||||
private List<String> stateNames;
|
||||
private final List<String> stateNames;
|
||||
|
||||
public CategoricalHistogramCounter(List<String> stateNames) {
|
||||
this.stateNames = stateNames;
|
||||
|
|
|
@ -34,8 +34,8 @@ import java.io.ObjectInputStream;
|
|||
|
||||
public class TDigestDeserializer extends JsonDeserializer<TDigest> {
|
||||
@Override
|
||||
public TDigest deserialize(JsonParser jp, DeserializationContext d) throws IOException, JsonProcessingException {
|
||||
JsonNode node = (JsonNode)jp.getCodec().readTree(jp);
|
||||
public TDigest deserialize(JsonParser jp, DeserializationContext d) throws IOException {
|
||||
JsonNode node = jp.getCodec().readTree(jp);
|
||||
String field = node.get("digest").asText();
|
||||
Base64 b = new Base64();
|
||||
byte[] bytes = b.decode(field);
|
||||
|
|
|
@ -33,7 +33,7 @@ import java.io.ObjectOutputStream;
|
|||
|
||||
public class TDigestSerializer extends JsonSerializer<TDigest> {
|
||||
@Override
|
||||
public void serialize(TDigest td, JsonGenerator j, SerializerProvider sp) throws IOException, JsonProcessingException {
|
||||
public void serialize(TDigest td, JsonGenerator j, SerializerProvider sp) throws IOException {
|
||||
try(ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos)){
|
||||
oos.writeObject(td);
|
||||
oos.close();
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.datavec.api.writable.Writable;
|
|||
public class BytesQualityAnalysisState implements QualityAnalysisState<BytesQualityAnalysisState> {
|
||||
|
||||
@Getter
|
||||
private BytesQuality bytesQuality;
|
||||
private final BytesQuality bytesQuality;
|
||||
|
||||
public BytesQualityAnalysisState() {
|
||||
this.bytesQuality = new BytesQuality();
|
||||
|
|
|
@ -31,8 +31,8 @@ public class CategoricalQualityAnalysisState implements QualityAnalysisState<Cat
|
|||
|
||||
@Getter
|
||||
private CategoricalQuality categoricalQuality;
|
||||
private CategoricalQualityAddFunction addFunction;
|
||||
private CategoricalQualityMergeFunction mergeFunction;
|
||||
private final CategoricalQualityAddFunction addFunction;
|
||||
private final CategoricalQualityMergeFunction mergeFunction;
|
||||
|
||||
public CategoricalQualityAnalysisState(CategoricalMetaData integerMetaData) {
|
||||
this.categoricalQuality = new CategoricalQuality();
|
||||
|
|
|
@ -31,8 +31,8 @@ public class IntegerQualityAnalysisState implements QualityAnalysisState<Integer
|
|||
|
||||
@Getter
|
||||
private IntegerQuality integerQuality;
|
||||
private IntegerQualityAddFunction addFunction;
|
||||
private IntegerQualityMergeFunction mergeFunction;
|
||||
private final IntegerQualityAddFunction addFunction;
|
||||
private final IntegerQualityMergeFunction mergeFunction;
|
||||
|
||||
public IntegerQualityAnalysisState(IntegerMetaData integerMetaData) {
|
||||
this.integerQuality = new IntegerQuality(0, 0, 0, 0, 0);
|
||||
|
|
|
@ -31,8 +31,8 @@ public class LongQualityAnalysisState implements QualityAnalysisState<LongQualit
|
|||
|
||||
@Getter
|
||||
private LongQuality longQuality;
|
||||
private LongQualityAddFunction addFunction;
|
||||
private LongQualityMergeFunction mergeFunction;
|
||||
private final LongQualityAddFunction addFunction;
|
||||
private final LongQualityMergeFunction mergeFunction;
|
||||
|
||||
public LongQualityAnalysisState(LongMetaData longMetaData) {
|
||||
this.longQuality = new LongQuality();
|
||||
|
|
|
@ -31,8 +31,8 @@ public class RealQualityAnalysisState implements QualityAnalysisState<RealQualit
|
|||
|
||||
@Getter
|
||||
private DoubleQuality realQuality;
|
||||
private RealQualityAddFunction addFunction;
|
||||
private RealQualityMergeFunction mergeFunction;
|
||||
private final RealQualityAddFunction addFunction;
|
||||
private final RealQualityMergeFunction mergeFunction;
|
||||
|
||||
public RealQualityAnalysisState(DoubleMetaData realMetaData) {
|
||||
this.realQuality = new DoubleQuality();
|
||||
|
|
|
@ -31,8 +31,8 @@ public class StringQualityAnalysisState implements QualityAnalysisState<StringQu
|
|||
|
||||
@Getter
|
||||
private StringQuality stringQuality;
|
||||
private StringQualityAddFunction addFunction;
|
||||
private StringQualityMergeFunction mergeFunction;
|
||||
private final StringQualityAddFunction addFunction;
|
||||
private final StringQualityMergeFunction mergeFunction;
|
||||
|
||||
public StringQualityAnalysisState(StringMetaData stringMetaData) {
|
||||
this.stringQuality = new StringQuality();
|
||||
|
|
|
@ -31,8 +31,8 @@ public class TimeQualityAnalysisState implements QualityAnalysisState<TimeQualit
|
|||
|
||||
@Getter
|
||||
private TimeQuality timeQuality;
|
||||
private TimeQualityAddFunction addFunction;
|
||||
private TimeQualityMergeFunction mergeFunction;
|
||||
private final TimeQualityAddFunction addFunction;
|
||||
private final TimeQualityMergeFunction mergeFunction;
|
||||
|
||||
public TimeQualityAnalysisState(TimeMetaData timeMetaData) {
|
||||
this.timeQuality = new TimeQuality();
|
||||
|
|
|
@ -46,12 +46,11 @@ public class SequenceLengthAnalysis implements Serializable {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("SequenceLengthAnalysis(").append("totalNumSequences=").append(totalNumSequences)
|
||||
.append(",minSeqLength=").append(minSeqLength).append(",maxSeqLength=").append(maxSeqLength)
|
||||
.append(",countZeroLength=").append(countZeroLength).append(",countOneLength=")
|
||||
.append(countOneLength).append(",meanLength=").append(meanLength).append(")");
|
||||
return sb.toString();
|
||||
String sb = "SequenceLengthAnalysis(" + "totalNumSequences=" + totalNumSequences +
|
||||
",minSeqLength=" + minSeqLength + ",maxSeqLength=" + maxSeqLength +
|
||||
",countZeroLength=" + countZeroLength + ",countOneLength=" +
|
||||
countOneLength + ",meanLength=" + meanLength + ")";
|
||||
return sb;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -92,7 +92,7 @@ public abstract class BaseColumnCondition implements ColumnCondition {
|
|||
return false;
|
||||
case NoSequenceMode:
|
||||
throw new IllegalStateException(
|
||||
"Column condition " + toString() + " does not support sequence execution");
|
||||
"Column condition " + this + " does not support sequence execution");
|
||||
default:
|
||||
throw new RuntimeException("Unknown/not implemented sequence mode: " + sequenceMode);
|
||||
}
|
||||
|
@ -116,7 +116,7 @@ public abstract class BaseColumnCondition implements ColumnCondition {
|
|||
return false;
|
||||
case NoSequenceMode:
|
||||
throw new IllegalStateException(
|
||||
"Column condition " + toString() + " does not support sequence execution");
|
||||
"Column condition " + this + " does not support sequence execution");
|
||||
default:
|
||||
throw new RuntimeException("Unknown/not implemented sequence mode: " + sequenceMode);
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ public class DoubleColumnCondition extends BaseColumnCondition {
|
|||
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
||||
*
|
||||
* @param columnName Column to check for the condition
|
||||
* @param op Operation (<, >=, !=, etc)
|
||||
* @param op Operation {@code (<, >=, !=, etc)}
|
||||
* @param value Value to use in the condition
|
||||
*/
|
||||
public DoubleColumnCondition(String columnName, ConditionOp op, double value) {
|
||||
|
@ -54,7 +54,7 @@ public class DoubleColumnCondition extends BaseColumnCondition {
|
|||
*
|
||||
* @param column Column to check for the condition
|
||||
* @param sequenceConditionMode Mode for handling sequence data
|
||||
* @param op Operation (<, >=, !=, etc)
|
||||
* @param op Operation {@code (<, >=, !=, etc)}
|
||||
* @param value Value to use in the condition
|
||||
*/
|
||||
public DoubleColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op,
|
||||
|
|
|
@ -42,7 +42,7 @@ public class FloatColumnCondition extends BaseColumnCondition {
|
|||
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
||||
*
|
||||
* @param columnName Column to check for the condition
|
||||
* @param op Operation (<, >=, !=, etc)
|
||||
* @param op Operation {@code (<, >=, !=, etc)}
|
||||
* @param value Value to use in the condition
|
||||
*/
|
||||
public FloatColumnCondition(String columnName, ConditionOp op, float value) {
|
||||
|
@ -54,7 +54,7 @@ public class FloatColumnCondition extends BaseColumnCondition {
|
|||
*
|
||||
* @param column Column to check for the condition
|
||||
* @param sequenceConditionMode Mode for handling sequence data
|
||||
* @param op Operation (<, >=, !=, etc)
|
||||
* @param op Operation {@code (<, >=, !=, etc)}
|
||||
* @param value Value to use in the condition
|
||||
*/
|
||||
public FloatColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op,
|
||||
|
|
|
@ -42,7 +42,7 @@ public class IntegerColumnCondition extends BaseColumnCondition {
|
|||
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
||||
*
|
||||
* @param columnName Column to check for the condition
|
||||
* @param op Operation (<, >=, !=, etc)
|
||||
* @param op Operation {@code (<, >=, !=, etc)}
|
||||
* @param value Value to use in the condition
|
||||
*/
|
||||
public IntegerColumnCondition(String columnName, ConditionOp op, int value) {
|
||||
|
@ -54,7 +54,7 @@ public class IntegerColumnCondition extends BaseColumnCondition {
|
|||
*
|
||||
* @param column Column to check for the condition
|
||||
* @param sequenceConditionMode Mode for handling sequence data
|
||||
* @param op Operation (<, >=, !=, etc)
|
||||
* @param op Operation {@code (<, >=, !=, etc)}
|
||||
* @param value Value to use in the condition
|
||||
*/
|
||||
public IntegerColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op,
|
||||
|
|
|
@ -42,7 +42,7 @@ public class LongColumnCondition extends BaseColumnCondition {
|
|||
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
||||
*
|
||||
* @param columnName Column to check for the condition
|
||||
* @param op Operation (<, >=, !=, etc)
|
||||
* @param op Operation {@code (<, >=, !=, etc)}
|
||||
* @param value Value to use in the condition
|
||||
*/
|
||||
public LongColumnCondition(String columnName, ConditionOp op, long value) {
|
||||
|
@ -54,7 +54,7 @@ public class LongColumnCondition extends BaseColumnCondition {
|
|||
*
|
||||
* @param column Column to check for the condition
|
||||
* @param sequenceConditionMode Mode for handling sequence data
|
||||
* @param op Operation (<, >=, !=, etc)
|
||||
* @param op Operation {@code (<, >=, !=, etc)}
|
||||
* @param value Value to use in the condition
|
||||
*/
|
||||
public LongColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op, long value) {
|
||||
|
|
|
@ -42,7 +42,7 @@ public class TimeColumnCondition extends BaseColumnCondition {
|
|||
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
||||
*
|
||||
* @param columnName Column to check for the condition
|
||||
* @param op Operation (<, >=, !=, etc)
|
||||
* @param op Operation {@code (<, >=, !=, etc)}
|
||||
* @param value Time value (in epoch millisecond format) to use in the condition
|
||||
*/
|
||||
public TimeColumnCondition(String columnName, ConditionOp op, long value) {
|
||||
|
@ -54,7 +54,7 @@ public class TimeColumnCondition extends BaseColumnCondition {
|
|||
*
|
||||
* @param column Column to check for the condition
|
||||
* @param sequenceConditionMode Mode for handling sequence data
|
||||
* @param op Operation (<, >=, !=, etc)
|
||||
* @param op Operation {@code (<, >=, !=, etc)}
|
||||
* @param value Time value (in epoch millisecond format) to use in the condition
|
||||
*/
|
||||
public TimeColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op, long value) {
|
||||
|
|
|
@ -111,24 +111,18 @@ public class FilterInvalidValues implements Filter {
|
|||
private boolean filterColumn(List<?> row, int i) {
|
||||
ColumnMetaData meta = schema.getMetaData(i);
|
||||
if (row.get(i) instanceof Float) {
|
||||
if (!meta.isValid(new FloatWritable((Float) row.get(i))))
|
||||
return true;
|
||||
return !meta.isValid(new FloatWritable((Float) row.get(i)));
|
||||
} else if (row.get(i) instanceof Double) {
|
||||
if (!meta.isValid(new DoubleWritable((Double) row.get(i))))
|
||||
return true;
|
||||
return !meta.isValid(new DoubleWritable((Double) row.get(i)));
|
||||
} else if (row.get(i) instanceof String) {
|
||||
if (!meta.isValid(new Text(((String) row.get(i)).toString())))
|
||||
return true;
|
||||
return !meta.isValid(new Text(((String) row.get(i))));
|
||||
} else if (row.get(i) instanceof Integer) {
|
||||
if (!meta.isValid(new IntWritable((Integer) row.get(i))))
|
||||
return true;
|
||||
return !meta.isValid(new IntWritable((Integer) row.get(i)));
|
||||
|
||||
} else if (row.get(i) instanceof Long) {
|
||||
if (!meta.isValid(new LongWritable((Long) row.get(i))))
|
||||
return true;
|
||||
return !meta.isValid(new LongWritable((Long) row.get(i)));
|
||||
} else if (row.get(i) instanceof Boolean) {
|
||||
if (!meta.isValid(new BooleanWritable((Boolean) row.get(i))))
|
||||
return true;
|
||||
return !meta.isValid(new BooleanWritable((Boolean) row.get(i)));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -96,7 +96,7 @@ public class Join implements Serializable {
|
|||
|
||||
public static class Builder {
|
||||
|
||||
private JoinType joinType;
|
||||
private final JoinType joinType;
|
||||
private Schema leftSchema;
|
||||
private Schema rightSchema;
|
||||
private String[] joinColumnsLeft;
|
||||
|
|
|
@ -84,9 +84,8 @@ public class BinaryMetaData extends BaseColumnMetaData {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("BinaryMetaData(name=\"").append(name).append("\",");
|
||||
sb.append(")");
|
||||
return sb.toString();
|
||||
String sb = "BinaryMetaData(name=\"" + name + "\"," +
|
||||
")";
|
||||
return sb;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -84,9 +84,8 @@ public class BooleanMetaData extends BaseColumnMetaData {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("BooleanMetaData(name=\"").append(name).append("\",");
|
||||
sb.append(")");
|
||||
return sb.toString();
|
||||
String sb = "BooleanMetaData(name=\"" + name + "\"," +
|
||||
")";
|
||||
return sb;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -84,10 +84,7 @@ public class DoubleMetaData extends BaseColumnMetaData {
|
|||
|
||||
if (minAllowedValue != null && d < minAllowedValue)
|
||||
return false;
|
||||
if (maxAllowedValue != null && d > maxAllowedValue)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return maxAllowedValue == null || !(d > maxAllowedValue);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -115,10 +112,7 @@ public class DoubleMetaData extends BaseColumnMetaData {
|
|||
|
||||
if (minAllowedValue != null && d < minAllowedValue)
|
||||
return false;
|
||||
if (maxAllowedValue != null && d > maxAllowedValue)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return maxAllowedValue == null || !(d > maxAllowedValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -84,10 +84,7 @@ public class FloatMetaData extends BaseColumnMetaData {
|
|||
|
||||
if (minAllowedValue != null && d < minAllowedValue)
|
||||
return false;
|
||||
if (maxAllowedValue != null && d > maxAllowedValue)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return maxAllowedValue == null || d <= maxAllowedValue;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -115,10 +112,7 @@ public class FloatMetaData extends BaseColumnMetaData {
|
|||
|
||||
if (minAllowedValue != null && d < minAllowedValue)
|
||||
return false;
|
||||
if (maxAllowedValue != null && d > maxAllowedValue)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return maxAllowedValue == null || d <= maxAllowedValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -65,9 +65,7 @@ public class IntegerMetaData extends BaseColumnMetaData {
|
|||
|
||||
if (minAllowedValue != null && value < minAllowedValue)
|
||||
return false;
|
||||
if (maxAllowedValue != null && value > maxAllowedValue)
|
||||
return false;
|
||||
return true;
|
||||
return maxAllowedValue == null || value <= maxAllowedValue;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -90,9 +88,7 @@ public class IntegerMetaData extends BaseColumnMetaData {
|
|||
|
||||
if (minAllowedValue != null && value < minAllowedValue)
|
||||
return false;
|
||||
if (maxAllowedValue != null && value > maxAllowedValue)
|
||||
return false;
|
||||
return true;
|
||||
return maxAllowedValue == null || value <= maxAllowedValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -66,10 +66,7 @@ public class LongMetaData extends BaseColumnMetaData {
|
|||
}
|
||||
if (minAllowedValue != null && value < minAllowedValue)
|
||||
return false;
|
||||
if (maxAllowedValue != null && value > maxAllowedValue)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return maxAllowedValue == null || value <= maxAllowedValue;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -92,10 +89,7 @@ public class LongMetaData extends BaseColumnMetaData {
|
|||
|
||||
if (minAllowedValue != null && value < minAllowedValue)
|
||||
return false;
|
||||
if (maxAllowedValue != null && value > maxAllowedValue)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return maxAllowedValue == null || value <= maxAllowedValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -97,9 +97,9 @@ public class AggregatorImpls {
|
|||
} else if (a instanceof Float || b instanceof Float) {
|
||||
return new Float(a.floatValue() + b.floatValue());
|
||||
} else if (a instanceof Long || b instanceof Long) {
|
||||
return new Long(a.longValue() + b.longValue());
|
||||
return Long.valueOf(a.longValue() + b.longValue());
|
||||
} else {
|
||||
return new Integer(a.intValue() + b.intValue());
|
||||
return Integer.valueOf(a.intValue() + b.intValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -146,9 +146,9 @@ public class AggregatorImpls {
|
|||
} else if (a instanceof Float || b instanceof Float) {
|
||||
return new Float(a.floatValue() * b.floatValue());
|
||||
} else if (a instanceof Long || b instanceof Long) {
|
||||
return new Long(a.longValue() * b.longValue());
|
||||
return Long.valueOf(a.longValue() * b.longValue());
|
||||
} else {
|
||||
return new Integer(a.intValue() * b.intValue());
|
||||
return Integer.valueOf(a.intValue() * b.intValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -347,7 +347,7 @@ public class AggregatorImpls {
|
|||
* of the square root of the arithmetic mean of squared differences to the mean, corrected with Bessel's correction.
|
||||
*
|
||||
* See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a>
|
||||
* This is computed with Welford's method for increased numerical stability & aggregability.
|
||||
* This is computed with Welford's method for increased numerical stability & aggregability.
|
||||
*/
|
||||
public static class AggregableStdDev<T extends Number> implements IAggregableReduceOp<T, Writable> {
|
||||
|
||||
|
@ -402,7 +402,7 @@ public class AggregatorImpls {
|
|||
* of the square root of the arithmetic mean of squared differences to the mean.
|
||||
*
|
||||
* See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a>
|
||||
* This is computed with Welford's method for increased numerical stability & aggregability.
|
||||
* This is computed with Welford's method for increased numerical stability & aggregability.
|
||||
*/
|
||||
public static class AggregableUncorrectedStdDev<T extends Number> extends AggregableStdDev<T> {
|
||||
|
||||
|
@ -418,7 +418,7 @@ public class AggregatorImpls {
|
|||
* of the arithmetic mean of squared differences to the mean, corrected with Bessel's correction.
|
||||
*
|
||||
* See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a>
|
||||
* This is computed with Welford's method for increased numerical stability & aggregability.
|
||||
* This is computed with Welford's method for increased numerical stability & aggregability.
|
||||
*/
|
||||
public static class AggregableVariance<T extends Number> implements IAggregableReduceOp<T, Writable> {
|
||||
|
||||
|
@ -474,7 +474,7 @@ public class AggregatorImpls {
|
|||
* of the arithmetic mean of squared differences to the mean.
|
||||
*
|
||||
* See <a href="https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance">https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance</a>
|
||||
* This is computed with Welford's method for increased numerical stability & aggregability.
|
||||
* This is computed with Welford's method for increased numerical stability & aggregability.
|
||||
*/
|
||||
public static class AggregablePopulationVariance<T extends Number> extends AggregableVariance<T> {
|
||||
|
||||
|
@ -491,7 +491,7 @@ public class AggregatorImpls {
|
|||
* <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
|
||||
*
|
||||
* The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting
|
||||
* a nonzero `sp > p` in HyperLogLogPlus(p, sp) would trigger sparse
|
||||
* a nonzero `sp > p` in HyperLogLogPlus(p, sp) would trigger sparse
|
||||
* representation of registers, which may reduce the memory consumption
|
||||
* and increase accuracy when the cardinality is small.
|
||||
* @param <T>
|
||||
|
@ -501,7 +501,7 @@ public class AggregatorImpls {
|
|||
|
||||
private float p = 0.05f;
|
||||
@Getter
|
||||
private HyperLogLogPlus hll = new HyperLogLogPlus((int) Math.ceil(2.0 * Math.log(1.054 / p) / Math.log(2)), 0);
|
||||
private final HyperLogLogPlus hll = new HyperLogLogPlus((int) Math.ceil(2.0 * Math.log(1.054 / p) / Math.log(2)), 0);
|
||||
|
||||
public AggregableCountUnique(float precision) {
|
||||
this.p = precision;
|
||||
|
|
|
@ -36,7 +36,7 @@ public class DispatchWithConditionOp<U> extends DispatchOp<Writable, U>
|
|||
|
||||
@Getter
|
||||
@NonNull
|
||||
private List<Condition> conditions;
|
||||
private final List<Condition> conditions;
|
||||
|
||||
|
||||
public DispatchWithConditionOp(List<IAggregableReduceOp<Writable, List<U>>> ops, List<Condition> conds) {
|
||||
|
|
|
@ -37,14 +37,13 @@ public interface AggregableColumnReduction extends Serializable, ColumnOp {
|
|||
* and NOT the single row
|
||||
* (as is usually the case for {@code List<Writable>} instances
|
||||
*
|
||||
* @param columnData The Writable objects for a column
|
||||
* @return Writable containing the reduced data
|
||||
*/
|
||||
IAggregableReduceOp<Writable, List<Writable>> reduceOp();
|
||||
|
||||
/**
|
||||
* Post-reduce: what is the name of the column?
|
||||
* For example, "myColumn" -> "mean(myColumn)"
|
||||
* For example, "myColumn" -> "mean(myColumn)"
|
||||
*
|
||||
* @param columnInputName Name of the column before reduction
|
||||
* @return Name of the column after the reduction
|
||||
|
|
|
@ -43,7 +43,7 @@ public interface ColumnReduction extends Serializable, ColumnOp {
|
|||
|
||||
/**
|
||||
* Post-reduce: what is the name of the column?
|
||||
* For example, "myColumn" -> "mean(myColumn)"
|
||||
* For example, "myColumn" -> "mean(myColumn)"
|
||||
*
|
||||
* @param columnInputName Name of the column before reduction
|
||||
* @return Name of the column after the reduction
|
||||
|
|
|
@ -291,11 +291,11 @@ public class Reducer implements IAssociativeReducer {
|
|||
|
||||
public static class Builder {
|
||||
|
||||
private ReduceOp defaultOp;
|
||||
private Map<String, List<ReduceOp>> opMap = new HashMap<>();
|
||||
private Map<String, AggregableColumnReduction> customReductions = new HashMap<>();
|
||||
private Map<String, ConditionalReduction> conditionalReductions = new HashMap<>();
|
||||
private Set<String> ignoreInvalidInColumns = new HashSet<>();
|
||||
private final ReduceOp defaultOp;
|
||||
private final Map<String, List<ReduceOp>> opMap = new HashMap<>();
|
||||
private final Map<String, AggregableColumnReduction> customReductions = new HashMap<>();
|
||||
private final Map<String, ConditionalReduction> conditionalReductions = new HashMap<>();
|
||||
private final Set<String> ignoreInvalidInColumns = new HashSet<>();
|
||||
private String[] keyColumns;
|
||||
|
||||
|
||||
|
@ -480,7 +480,6 @@ public class Reducer implements IAssociativeReducer {
|
|||
* ignored/excluded.
|
||||
*
|
||||
* @param column Name of the column to execute the conditional reduction on
|
||||
* @param outputName Name of the column, after the reduction has been executed
|
||||
* @param reductions Reductions to execute
|
||||
* @param condition Condition to use in the reductions
|
||||
*/
|
||||
|
@ -500,7 +499,6 @@ public class Reducer implements IAssociativeReducer {
|
|||
*
|
||||
* @param column Name of the column to execute the conditional reduction on
|
||||
* @param outputName Name of the column, after the reduction has been executed
|
||||
* @param reductions Reductions to execute
|
||||
* @param condition Condition to use in the reductions
|
||||
*/
|
||||
public Builder conditionalReduction(String column, String outputName, ReduceOp reduction, Condition condition) {
|
||||
|
|
|
@ -69,7 +69,7 @@ public class GeographicMidpointReduction implements AggregableColumnReduction {
|
|||
|
||||
@Override
|
||||
public List<ColumnMetaData> getColumnOutputMetaData(List<String> newColumnName, ColumnMetaData columnInputMeta) {
|
||||
return Collections.<ColumnMetaData>singletonList(new StringMetaData(newColumnName.get(0)));
|
||||
return Collections.singletonList(new StringMetaData(newColumnName.get(0)));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -111,7 +111,7 @@ public class GeographicMidpointReduction implements AggregableColumnReduction {
|
|||
public static class AverageCoordinateReduceOp implements IAggregableReduceOp<Writable, List<Writable>> {
|
||||
private static final double PI_180 = Math.PI / 180.0;
|
||||
|
||||
private String delim;
|
||||
private final String delim;
|
||||
|
||||
private double sumx;
|
||||
private double sumy;
|
||||
|
@ -186,7 +186,7 @@ public class GeographicMidpointReduction implements AggregableColumnReduction {
|
|||
Preconditions.checkState(!Double.isNaN(longDeg), "Final longitude is NaN");
|
||||
|
||||
String str = latDeg + delim + longDeg;
|
||||
return Collections.<Writable>singletonList(new Text(str));
|
||||
return Collections.singletonList(new Text(str));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.datavec.api.writable.Writable;
|
|||
|
||||
public class TypeConversion {
|
||||
|
||||
private static TypeConversion SINGLETON = new TypeConversion();
|
||||
private static final TypeConversion SINGLETON = new TypeConversion();
|
||||
|
||||
private TypeConversion() {}
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ public class SplitMaxLengthSequence implements SequenceSplit {
|
|||
/**
|
||||
* @param maxSequenceLength max length of sequences
|
||||
* @param equalSplits if true: split larger sequences into equal sized subsequences. If false: split into
|
||||
* n maxSequenceLength sequences, and (if necessary) 1 with 1 <= length < maxSequenceLength
|
||||
* n maxSequenceLength sequences, and (if necessary) 1 with 1 <= length < maxSequenceLength
|
||||
*/
|
||||
public SplitMaxLengthSequence(@JsonProperty("maxSequenceLength") int maxSequenceLength,
|
||||
@JsonProperty("equalSplits") boolean equalSplits) {
|
||||
|
|
|
@ -295,7 +295,7 @@ public abstract class BaseSerializer {
|
|||
|
||||
/**
|
||||
* Deserialize an IStringReducer List serialized using {@link #serializeReducerList(List)}, or
|
||||
* an array serialized using {@link #serialize(IReducer[])}
|
||||
* an array serialized using {@code #serialize(IReducer[])}
|
||||
*
|
||||
* @param str String representation (YAML/JSON) of the IStringReducer list
|
||||
* @return {@code List<IStringReducer>}
|
||||
|
|
|
@ -34,8 +34,8 @@ import com.fasterxml.jackson.datatype.joda.JodaModule;
|
|||
@Slf4j
|
||||
public class JsonMappers {
|
||||
|
||||
private static ObjectMapper jsonMapper;
|
||||
private static ObjectMapper yamlMapper;
|
||||
private static final ObjectMapper jsonMapper;
|
||||
private static final ObjectMapper yamlMapper;
|
||||
private static ObjectMapper legacyMapper; //For 1.0.0-alpha and earlier TransformProcess etc
|
||||
|
||||
static {
|
||||
|
|
|
@ -24,7 +24,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
|
||||
public class JsonSerializer extends BaseSerializer {
|
||||
|
||||
private ObjectMapper om;
|
||||
private final ObjectMapper om;
|
||||
|
||||
public JsonSerializer() {
|
||||
this.om = JsonMappers.getMapper();
|
||||
|
|
|
@ -37,7 +37,7 @@ public class ListWrappers {
|
|||
|
||||
@Getter
|
||||
public static class TransformList {
|
||||
private List<Transform> list;
|
||||
private final List<Transform> list;
|
||||
|
||||
public TransformList(@JsonProperty("list") List<Transform> list) {
|
||||
this.list = list;
|
||||
|
@ -46,7 +46,7 @@ public class ListWrappers {
|
|||
|
||||
@Getter
|
||||
public static class FilterList {
|
||||
private List<Filter> list;
|
||||
private final List<Filter> list;
|
||||
|
||||
public FilterList(@JsonProperty("list") List<Filter> list) {
|
||||
this.list = list;
|
||||
|
@ -55,7 +55,7 @@ public class ListWrappers {
|
|||
|
||||
@Getter
|
||||
public static class ConditionList {
|
||||
private List<Condition> list;
|
||||
private final List<Condition> list;
|
||||
|
||||
public ConditionList(@JsonProperty("list") List<Condition> list) {
|
||||
this.list = list;
|
||||
|
@ -64,7 +64,7 @@ public class ListWrappers {
|
|||
|
||||
@Getter
|
||||
public static class ReducerList {
|
||||
private List<IAssociativeReducer> list;
|
||||
private final List<IAssociativeReducer> list;
|
||||
|
||||
public ReducerList(@JsonProperty("list") List<IAssociativeReducer> list) {
|
||||
this.list = list;
|
||||
|
@ -73,7 +73,7 @@ public class ListWrappers {
|
|||
|
||||
@Getter
|
||||
public static class SequenceComparatorList {
|
||||
private List<SequenceComparator> list;
|
||||
private final List<SequenceComparator> list;
|
||||
|
||||
public SequenceComparatorList(@JsonProperty("list") List<SequenceComparator> list) {
|
||||
this.list = list;
|
||||
|
@ -82,7 +82,7 @@ public class ListWrappers {
|
|||
|
||||
@Getter
|
||||
public static class DataActionList {
|
||||
private List<DataAction> list;
|
||||
private final List<DataAction> list;
|
||||
|
||||
public DataActionList(@JsonProperty("list") List<DataAction> list) {
|
||||
this.list = list;
|
||||
|
|
|
@ -24,7 +24,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
|
||||
public class YamlSerializer extends BaseSerializer {
|
||||
|
||||
private ObjectMapper om;
|
||||
private final ObjectMapper om;
|
||||
|
||||
public YamlSerializer() {
|
||||
this.om = JsonMappers.getMapperYaml();
|
||||
|
|
|
@ -177,10 +177,10 @@ public class StringReducer implements IStringReducer {
|
|||
|
||||
public static class Builder {
|
||||
|
||||
private StringReduceOp defaultOp;
|
||||
private Map<String, StringReduceOp> opMap = new HashMap<>();
|
||||
private Map<String, ColumnReduction> customReductions = new HashMap<>();
|
||||
private Set<String> ignoreInvalidInColumns = new HashSet<>();
|
||||
private final StringReduceOp defaultOp;
|
||||
private final Map<String, StringReduceOp> opMap = new HashMap<>();
|
||||
private final Map<String, ColumnReduction> customReductions = new HashMap<>();
|
||||
private final Set<String> ignoreInvalidInColumns = new HashSet<>();
|
||||
private String outputColumnName;
|
||||
private List<String> inputColumns;
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ public abstract class BaseColumnTransform extends BaseTransform implements Colum
|
|||
if (writables.size() != inputSchema.numColumns()) {
|
||||
throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size()
|
||||
+ ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns()
|
||||
+ "). Transform = " + toString());
|
||||
+ "). Transform = " + this);
|
||||
}
|
||||
int n = writables.size();
|
||||
List<Writable> out = new ArrayList<>(n);
|
||||
|
|
|
@ -96,7 +96,7 @@ public class CategoricalToIntegerTransform extends BaseTransform {
|
|||
if (writables.size() != inputSchema.numColumns()) {
|
||||
throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size()
|
||||
+ ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns()
|
||||
+ "). Transform = " + toString());
|
||||
+ "). Transform = " + this);
|
||||
}
|
||||
int idx = getColumnIdx();
|
||||
|
||||
|
|
|
@ -123,7 +123,7 @@ public class CategoricalToOneHotTransform extends BaseTransform {
|
|||
if (writables.size() != inputSchema.numColumns()) {
|
||||
throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size()
|
||||
+ ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns()
|
||||
+ "). Transform = " + toString());
|
||||
+ "). Transform = " + this);
|
||||
}
|
||||
int idx = getColumnIdx();
|
||||
|
||||
|
|
|
@ -89,7 +89,7 @@ public class IntegerToCategoricalTransform extends BaseColumnTransform {
|
|||
|
||||
IntegerToCategoricalTransform o2 = (IntegerToCategoricalTransform) o;
|
||||
|
||||
return map != null ? map.equals(o2.map) : o2.map == null;
|
||||
return Objects.equals(map, o2.map);
|
||||
|
||||
}
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue