Fix javadoc and cleanup
parent
5c98c5e1ed
commit
07c052d822
|
@ -139,7 +139,6 @@ public class BrianTest /*extends BaseDL4JTest*/ {
|
||||||
//.setExecutorEnv("spark.executor.cores", "2")
|
//.setExecutorEnv("spark.executor.cores", "2")
|
||||||
//.setExecutorEnv("spark.executor.memory", "2g")
|
//.setExecutorEnv("spark.executor.memory", "2g")
|
||||||
//.set("spark.submit.deployMode", "client")
|
//.set("spark.submit.deployMode", "client")
|
||||||
;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
SparkSession spark = SparkSession
|
SparkSession spark = SparkSession
|
||||||
|
@ -240,7 +239,7 @@ public class BrianTest /*extends BaseDL4JTest*/ {
|
||||||
*/
|
*/
|
||||||
TransformProcess tp = new TransformProcess.Builder(inputSchema)
|
TransformProcess tp = new TransformProcess.Builder(inputSchema)
|
||||||
.removeAllColumnsExceptFor("country_code", "lat", "lon")
|
.removeAllColumnsExceptFor("country_code", "lat", "lon")
|
||||||
.stringToCategorical("country_code", Arrays.asList(new String[] {"GR", "FR", "DE", "CH"}))
|
.stringToCategorical("country_code", Arrays.asList("GR", "FR", "DE", "CH"))
|
||||||
.filter(new FilterInvalidValues())
|
.filter(new FilterInvalidValues())
|
||||||
.categoricalToOneHot("country_code")
|
.categoricalToOneHot("country_code")
|
||||||
.build();
|
.build();
|
||||||
|
|
|
@ -225,7 +225,7 @@ public class BrianTest2 /*extends BaseDL4JTest*/ {
|
||||||
*/
|
*/
|
||||||
TransformProcess tp = new TransformProcess.Builder(inputSchema)
|
TransformProcess tp = new TransformProcess.Builder(inputSchema)
|
||||||
.removeAllColumnsExceptFor("country_code", "lat", "lon")
|
.removeAllColumnsExceptFor("country_code", "lat", "lon")
|
||||||
.stringToCategorical("country_code", Arrays.asList(new String[] {"GR", "FR", "DE", "CH"}))
|
.stringToCategorical("country_code", Arrays.asList("GR", "FR", "DE", "CH"))
|
||||||
.filter(new FilterInvalidValues())
|
.filter(new FilterInvalidValues())
|
||||||
.categoricalToOneHot("country_code")
|
.categoricalToOneHot("country_code")
|
||||||
.build();
|
.build();
|
||||||
|
|
|
@ -91,10 +91,10 @@ public class IntegrationTestRunner {
|
||||||
|
|
||||||
public static final double MAX_REL_ERROR_SCORES = 1e-4;
|
public static final double MAX_REL_ERROR_SCORES = 1e-4;
|
||||||
|
|
||||||
private static List<Class<?>> layerClasses = new ArrayList<>();
|
private static final List<Class<?>> layerClasses = new ArrayList<>();
|
||||||
private static List<Class<?>> preprocClasses = new ArrayList<>();
|
private static final List<Class<?>> preprocClasses = new ArrayList<>();
|
||||||
private static List<Class<?>> graphVertexClasses = new ArrayList<>();
|
private static final List<Class<?>> graphVertexClasses = new ArrayList<>();
|
||||||
private static List<Class<?>> evaluationClasses = new ArrayList<>();
|
private static final List<Class<?>> evaluationClasses = new ArrayList<>();
|
||||||
|
|
||||||
private static Map<Class<?>, Integer> layerConfClassesSeen = new HashMap<>();
|
private static Map<Class<?>, Integer> layerConfClassesSeen = new HashMap<>();
|
||||||
private static Map<Class<?>, Integer> preprocessorConfClassesSeen = new HashMap<>();
|
private static Map<Class<?>, Integer> preprocessorConfClassesSeen = new HashMap<>();
|
||||||
|
|
|
@ -67,8 +67,8 @@ public class CNN1DTestCases {
|
||||||
testOverfitting = false;
|
testOverfitting = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int miniBatchSize = 16;
|
final int miniBatchSize = 16;
|
||||||
int exampleLength = 128;
|
final int exampleLength = 128;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ModelType modelType() {
|
public ModelType modelType() {
|
||||||
|
|
|
@ -271,11 +271,11 @@ public class CNN2DTestCases {
|
||||||
public static TestCase getYoloHouseNumbers() {
|
public static TestCase getYoloHouseNumbers() {
|
||||||
return new TestCase() {
|
return new TestCase() {
|
||||||
|
|
||||||
private int width = 416;
|
private final int width = 416;
|
||||||
private int height = 416;
|
private final int height = 416;
|
||||||
private int nChannels = 3;
|
private final int nChannels = 3;
|
||||||
private int gridWidth = 13;
|
private final int gridWidth = 13;
|
||||||
private int gridHeight = 13;
|
private final int gridHeight = 13;
|
||||||
|
|
||||||
{
|
{
|
||||||
testName = "YOLOHouseNumbers";
|
testName = "YOLOHouseNumbers";
|
||||||
|
|
|
@ -108,7 +108,7 @@ public class CNN3DTestCases {
|
||||||
public MultiDataSet getGradientsTestData() throws Exception {
|
public MultiDataSet getGradientsTestData() throws Exception {
|
||||||
Nd4j.getRandom().setSeed(12345);
|
Nd4j.getRandom().setSeed(12345);
|
||||||
//NCDHW format
|
//NCDHW format
|
||||||
INDArray arr = Nd4j.rand(new int[]{2, 3, 8, 8, 8});
|
INDArray arr = Nd4j.rand(2, 3, 8, 8, 8);
|
||||||
INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10);
|
INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10);
|
||||||
return new org.nd4j.linalg.dataset.MultiDataSet(arr, labels);
|
return new org.nd4j.linalg.dataset.MultiDataSet(arr, labels);
|
||||||
}
|
}
|
||||||
|
@ -135,6 +135,6 @@ public class CNN3DTestCases {
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
};
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,8 +93,8 @@ public class RNNTestCases {
|
||||||
minAbsErrorParamsPostTraining = 2e-3;
|
minAbsErrorParamsPostTraining = 2e-3;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int miniBatchSize = 32;
|
private final int miniBatchSize = 32;
|
||||||
private int exampleLength = 200;
|
private final int exampleLength = 200;
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -31,23 +31,24 @@ import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class CharacterIterator implements DataSetIterator {
|
public class CharacterIterator implements DataSetIterator {
|
||||||
//Valid characters
|
//Valid characters
|
||||||
private char[] validCharacters;
|
private final char[] validCharacters;
|
||||||
//Maps each character to an index ind the input/output
|
//Maps each character to an index ind the input/output
|
||||||
private Map<Character, Integer> charToIdxMap;
|
private final Map<Character, Integer> charToIdxMap;
|
||||||
//All characters of the input file (after filtering to only those that are valid
|
//All characters of the input file (after filtering to only those that are valid
|
||||||
private char[] fileCharacters;
|
private final char[] fileCharacters;
|
||||||
//Length of each example/minibatch (number of characters)
|
//Length of each example/minibatch (number of characters)
|
||||||
private int exampleLength;
|
private final int exampleLength;
|
||||||
//Size of each minibatch (number of examples)
|
//Size of each minibatch (number of examples)
|
||||||
private int miniBatchSize;
|
private final int miniBatchSize;
|
||||||
private Random rng;
|
private final Random rng;
|
||||||
//Offsets for the start of each example
|
//Offsets for the start of each example
|
||||||
private LinkedList<Integer> exampleStartOffsets = new LinkedList<>();
|
private final LinkedList<Integer> exampleStartOffsets = new LinkedList<>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param textFilePath Path to text file to use for generating samples
|
* @param textFilePath Path to text file to use for generating samples
|
||||||
|
@ -299,7 +300,7 @@ public class CharacterIterator implements DataSetIterator {
|
||||||
if (!f.exists()) throw new IOException("File does not exist: " + fileLocation); //Download problem?
|
if (!f.exists()) throw new IOException("File does not exist: " + fileLocation); //Download problem?
|
||||||
|
|
||||||
char[] validCharacters = CharacterIterator.getMinimalCharacterSet(); //Which characters are allowed? Others will be removed
|
char[] validCharacters = CharacterIterator.getMinimalCharacterSet(); //Which characters are allowed? Others will be removed
|
||||||
return new CharacterIterator(fileLocation, Charset.forName("UTF-8"),
|
return new CharacterIterator(fileLocation, StandardCharsets.UTF_8,
|
||||||
miniBatchSize, sequenceLength, validCharacters, new Random(12345));
|
miniBatchSize, sequenceLength, validCharacters, new Random(12345));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -305,7 +305,7 @@ public class SameDiffCNNCases {
|
||||||
// [minibatch,8,1,1,1]
|
// [minibatch,8,1,1,1]
|
||||||
|
|
||||||
|
|
||||||
int channels_height_width_depth = 8 * 1 * 1 * 1;
|
int channels_height_width_depth = 8;
|
||||||
|
|
||||||
SDVariable layer1_reshaped = layer1.reshape(-1, channels_height_width_depth);
|
SDVariable layer1_reshaped = layer1.reshape(-1, channels_height_width_depth);
|
||||||
|
|
||||||
|
@ -331,7 +331,7 @@ public class SameDiffCNNCases {
|
||||||
public Map<String,INDArray> getGradientsTestDataSameDiff() throws Exception {
|
public Map<String,INDArray> getGradientsTestDataSameDiff() throws Exception {
|
||||||
Nd4j.getRandom().setSeed(12345);
|
Nd4j.getRandom().setSeed(12345);
|
||||||
//NCDHW format
|
//NCDHW format
|
||||||
INDArray arr = Nd4j.rand(new int[]{2, 3, 8, 8, 8});
|
INDArray arr = Nd4j.rand(2, 3, 8, 8, 8);
|
||||||
INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10);
|
INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10);
|
||||||
|
|
||||||
Map<String, INDArray> map = new HashMap<>();
|
Map<String, INDArray> map = new HashMap<>();
|
||||||
|
@ -357,7 +357,7 @@ public class SameDiffCNNCases {
|
||||||
Nd4j.getRandom().setSeed(12345);
|
Nd4j.getRandom().setSeed(12345);
|
||||||
|
|
||||||
List<Map<String, INDArray>> list = new ArrayList<>();
|
List<Map<String, INDArray>> list = new ArrayList<>();
|
||||||
INDArray arr = Nd4j.rand(new int[]{2, 3, 8, 8, 8});
|
INDArray arr = Nd4j.rand(2, 3, 8, 8, 8);
|
||||||
|
|
||||||
list.add(Collections.singletonMap("in", arr));
|
list.add(Collections.singletonMap("in", arr));
|
||||||
|
|
||||||
|
@ -368,7 +368,7 @@ public class SameDiffCNNCases {
|
||||||
public MultiDataSet getGradientsTestData() throws Exception {
|
public MultiDataSet getGradientsTestData() throws Exception {
|
||||||
Nd4j.getRandom().setSeed(12345);
|
Nd4j.getRandom().setSeed(12345);
|
||||||
//NCDHW format
|
//NCDHW format
|
||||||
INDArray arr = Nd4j.rand(new int[]{2, 3, 8, 8, 8});
|
INDArray arr = Nd4j.rand(2, 3, 8, 8, 8);
|
||||||
INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10);
|
INDArray labels = org.deeplearning4j.integration.TestUtils.randomOneHot(2, 10);
|
||||||
return new org.nd4j.linalg.dataset.MultiDataSet(arr, labels);
|
return new org.nd4j.linalg.dataset.MultiDataSet(arr, labels);
|
||||||
}
|
}
|
||||||
|
|
|
@ -130,3 +130,19 @@ echo "nameserver 8.8.8.8" | sudo tee -a /etc/resolv.conf
|
||||||
|
|
||||||
-P\<xxx>\
|
-P\<xxx>\
|
||||||
CAVIS_AVX_EXTENSION = {avx2 | avx512}, default is avx2
|
CAVIS_AVX_EXTENSION = {avx2 | avx512}, default is avx2
|
||||||
|
|
||||||
|
# Zeppelin Spark dependencies #
|
||||||
|
3
|
||||||
|
|
||||||
|
|
||||||
|
To add the dependency to the language models, use the following format in the Dependencies section of the of the Spark Interpreter configuration (Interpreters -> Spark -> Edit -> Dependencies):
|
||||||
|
|
||||||
|
groupId:artifactId:packaging:classifier:version
|
||||||
|
|
||||||
|
In your case it should work with
|
||||||
|
|
||||||
|
edu.stanford.nlp:stanford-corenlp:jar:models:3.8.0
|
||||||
|
|
||||||
|
|
||||||
|
Native cpu code under linux needs libc6-dev
|
||||||
|
/lib/x86_64-linux-gnu/libm.so.6: version `GLIBC_2.29' not found
|
|
@ -266,7 +266,7 @@ public class Configuration implements Iterable<Map.Entry<String, String>>, Writa
|
||||||
reloadConfiguration();
|
reloadConfiguration();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Pattern varPat = Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
|
private static final Pattern varPat = Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
|
||||||
|
|
||||||
private String substituteVars(String expr) {
|
private String substituteVars(String expr) {
|
||||||
if (expr == null) {
|
if (expr == null) {
|
||||||
|
@ -555,7 +555,7 @@ public class Configuration implements Iterable<Map.Entry<String, String>>, Writa
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the value of the <code>name</code> property as a <ocde>Pattern</code>.
|
* Get the value of the <code>name</code> property as a {@code Pattern}.
|
||||||
* If no such property is specified, or if the specified value is not a valid
|
* If no such property is specified, or if the specified value is not a valid
|
||||||
* <code>Pattern</code>, then <code>DefaultValue</code> is returned.
|
* <code>Pattern</code>, then <code>DefaultValue</code> is returned.
|
||||||
*
|
*
|
||||||
|
|
|
@ -27,7 +27,7 @@ import org.datavec.api.records.writer.RecordWriter;
|
||||||
|
|
||||||
public interface OutputFormat {
|
public interface OutputFormat {
|
||||||
|
|
||||||
public static final String OUTPUT_PATH = "org.nd4j.outputpath";
|
String OUTPUT_PATH = "org.nd4j.outputpath";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a record writer
|
* Create a record writer
|
||||||
|
|
|
@ -34,7 +34,7 @@ public abstract class BinaryComparable implements Comparable<BinaryComparable> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compare bytes from {#getBytes()}.
|
* Compare bytes from {#getBytes()}.
|
||||||
* @see org.apache.hadoop.io.WritableComparator#compareBytes(byte[],int,int,byte[],int,int)
|
* {@code org.apache.hadoop.io.WritableComparator#compareBytes(byte[], int, int, byte[], int, int)}
|
||||||
*/
|
*/
|
||||||
public int compareTo(BinaryComparable other) {
|
public int compareTo(BinaryComparable other) {
|
||||||
if (this == other)
|
if (this == other)
|
||||||
|
@ -63,7 +63,7 @@ public abstract class BinaryComparable implements Comparable<BinaryComparable> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a hash of the bytes returned from {#getBytes()}.
|
* Return a hash of the bytes returned from {#getBytes()}.
|
||||||
* @see org.apache.hadoop.io.WritableComparator#hashBytes(byte[],int)
|
* {@code org.apache.hadoop.io.WritableComparator#hashBytes(byte[],int)}
|
||||||
*/
|
*/
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return WritableComparator.hashBytes(getBytes(), getLength());
|
return WritableComparator.hashBytes(getBytes(), getLength());
|
||||||
|
|
|
@ -50,7 +50,7 @@ public class DataInputBuffer extends DataInputStream {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Buffer buffer;
|
private final Buffer buffer;
|
||||||
|
|
||||||
/** Constructs a new empty buffer. */
|
/** Constructs a new empty buffer. */
|
||||||
public DataInputBuffer() {
|
public DataInputBuffer() {
|
||||||
|
|
|
@ -44,7 +44,7 @@ public class DataOutputBuffer extends DataOutputStream {
|
||||||
public void write(DataInput in, int len) throws IOException {
|
public void write(DataInput in, int len) throws IOException {
|
||||||
int newcount = count + len;
|
int newcount = count + len;
|
||||||
if (newcount > buf.length) {
|
if (newcount > buf.length) {
|
||||||
byte newbuf[] = new byte[Math.max(buf.length << 1, newcount)];
|
byte[] newbuf = new byte[Math.max(buf.length << 1, newcount)];
|
||||||
System.arraycopy(buf, 0, newbuf, 0, count);
|
System.arraycopy(buf, 0, newbuf, 0, count);
|
||||||
buf = newbuf;
|
buf = newbuf;
|
||||||
}
|
}
|
||||||
|
@ -53,7 +53,7 @@ public class DataOutputBuffer extends DataOutputStream {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Buffer buffer;
|
private final Buffer buffer;
|
||||||
|
|
||||||
/** Constructs a new empty buffer. */
|
/** Constructs a new empty buffer. */
|
||||||
public DataOutputBuffer() {
|
public DataOutputBuffer() {
|
||||||
|
|
|
@ -25,6 +25,6 @@ import java.util.Comparator;
|
||||||
|
|
||||||
public interface RawComparator<T> extends Comparator<T> {
|
public interface RawComparator<T> extends Comparator<T> {
|
||||||
|
|
||||||
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2);
|
int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@ import java.util.HashMap;
|
||||||
|
|
||||||
public class WritableComparator implements RawComparator {
|
public class WritableComparator implements RawComparator {
|
||||||
|
|
||||||
private static HashMap<Class, WritableComparator> comparators = new HashMap<>(); // registry
|
private static final HashMap<Class, WritableComparator> comparators = new HashMap<>(); // registry
|
||||||
|
|
||||||
/** Get a comparator for a {@link WritableComparable} implementation. */
|
/** Get a comparator for a {@link WritableComparable} implementation. */
|
||||||
public static synchronized WritableComparator get(Class<? extends WritableComparable> c) {
|
public static synchronized WritableComparator get(Class<? extends WritableComparable> c) {
|
||||||
|
|
|
@ -229,7 +229,7 @@ public final class WritableUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Serializes an integer to a binary stream with zero-compressed encoding.
|
* Serializes an integer to a binary stream with zero-compressed encoding.
|
||||||
* For -120 <= i <= 127, only one byte is used with the actual value.
|
* For -120 <= i <= 127, only one byte is used with the actual value.
|
||||||
* For other values of i, the first byte value indicates whether the
|
* For other values of i, the first byte value indicates whether the
|
||||||
* integer is positive or negative, and the number of bytes that follow.
|
* integer is positive or negative, and the number of bytes that follow.
|
||||||
* If the first byte value v is between -121 and -124, the following integer
|
* If the first byte value v is between -121 and -124, the following integer
|
||||||
|
@ -248,7 +248,7 @@ public final class WritableUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Serializes a long to a binary stream with zero-compressed encoding.
|
* Serializes a long to a binary stream with zero-compressed encoding.
|
||||||
* For -112 <= i <= 127, only one byte is used with the actual value.
|
* For -112 <= i lt;= 127, only one byte is used with the actual value.
|
||||||
* For other values of i, the first byte value indicates whether the
|
* For other values of i, the first byte value indicates whether the
|
||||||
* long is positive or negative, and the number of bytes that follow.
|
* long is positive or negative, and the number of bytes that follow.
|
||||||
* If the first byte value v is between -113 and -120, the following long
|
* If the first byte value v is between -113 and -120, the following long
|
||||||
|
|
|
@ -27,7 +27,7 @@ import org.datavec.api.writable.Writable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class LabelWriterConverter implements WritableConverter {
|
public class LabelWriterConverter implements WritableConverter {
|
||||||
private List<String> labels;
|
private final List<String> labels;
|
||||||
|
|
||||||
public LabelWriterConverter(List<String> labels) {
|
public LabelWriterConverter(List<String> labels) {
|
||||||
this.labels = labels;
|
this.labels = labels;
|
||||||
|
|
|
@ -35,7 +35,7 @@ public interface PathLabelGenerator extends Serializable {
|
||||||
* If true: infer the set of possible label classes, and convert these to integer indexes. If when true, the
|
* If true: infer the set of possible label classes, and convert these to integer indexes. If when true, the
|
||||||
* returned Writables should be text writables.<br>
|
* returned Writables should be text writables.<br>
|
||||||
* <br>
|
* <br>
|
||||||
* For regression use cases (or PathLabelGenerator classification instances that do their own label -> integer
|
* For regression use cases (or PathLabelGenerator classification instances that do their own label -> integer
|
||||||
* assignment), this should return false.
|
* assignment), this should return false.
|
||||||
*
|
*
|
||||||
* @return whether label classes should be inferred
|
* @return whether label classes should be inferred
|
||||||
|
|
|
@ -35,7 +35,7 @@ public class SerializationFactory extends Configured {
|
||||||
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(SerializationFactory.class.getName());
|
private static final Logger LOG = LoggerFactory.getLogger(SerializationFactory.class.getName());
|
||||||
|
|
||||||
private List<Serialization<?>> serializations = new ArrayList<>();
|
private final List<Serialization<?>> serializations = new ArrayList<>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -47,7 +47,7 @@ public class SerializationFactory extends Configured {
|
||||||
public SerializationFactory(Configuration conf) {
|
public SerializationFactory(Configuration conf) {
|
||||||
super(conf);
|
super(conf);
|
||||||
for (String serializerName : conf.getStrings("io.serializations",
|
for (String serializerName : conf.getStrings("io.serializations",
|
||||||
new String[] {"org.apache.hadoop.io.serializer.WritableSerialization"})) {
|
"org.apache.hadoop.io.serializer.WritableSerialization")) {
|
||||||
add(conf, serializerName);
|
add(conf, serializerName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -113,7 +113,7 @@ public class Buffer implements Comparable, Cloneable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Change the capacity of the backing storage.
|
* Change the capacity of the backing storage.
|
||||||
* The data is preserved if newCapacity >= getCount().
|
* The data is preserved if newCapacity >= getCount().
|
||||||
* @param newCapacity The new capacity in bytes.
|
* @param newCapacity The new capacity in bytes.
|
||||||
*/
|
*/
|
||||||
public void setCapacity(int newCapacity) {
|
public void setCapacity(int newCapacity) {
|
||||||
|
|
|
@ -209,9 +209,7 @@ public class IOUtils {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
static String toCSVBuffer(Buffer buf) {
|
static String toCSVBuffer(Buffer buf) {
|
||||||
StringBuilder sb = new StringBuilder("#");
|
return "#" + buf.toString();
|
||||||
sb.append(buf.toString());
|
|
||||||
return sb.toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -441,7 +439,7 @@ public class IOUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Serializes a long to a binary stream with zero-compressed encoding.
|
* Serializes a long to a binary stream with zero-compressed encoding.
|
||||||
* For -112 <= i <= 127, only one byte is used with the actual value.
|
* For -112 <= i <= 127, only one byte is used with the actual value.
|
||||||
* For other values of i, the first byte value indicates whether the
|
* For other values of i, the first byte value indicates whether the
|
||||||
* long is positive or negative, and the number of bytes that follow.
|
* long is positive or negative, and the number of bytes that follow.
|
||||||
* If the first byte value v is between -113 and -120, the following long
|
* If the first byte value v is between -113 and -120, the following long
|
||||||
|
|
|
@ -99,8 +99,6 @@ public interface RecordReader extends AutoCloseable, Serializable, Configurable
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reset record reader iterator
|
* Reset record reader iterator
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
void reset();
|
void reset();
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
public class ComposableRecordReader extends BaseRecordReader {
|
public class ComposableRecordReader extends BaseRecordReader {
|
||||||
|
|
||||||
private RecordReader[] readers;
|
private final RecordReader[] readers;
|
||||||
|
|
||||||
public ComposableRecordReader(RecordReader... readers) {
|
public ComposableRecordReader(RecordReader... readers) {
|
||||||
this.readers = readers;
|
this.readers = readers;
|
||||||
|
|
|
@ -35,7 +35,7 @@ import java.util.List;
|
||||||
|
|
||||||
public class ConcatenatingRecordReader extends BaseRecordReader {
|
public class ConcatenatingRecordReader extends BaseRecordReader {
|
||||||
|
|
||||||
private RecordReader[] readers;
|
private final RecordReader[] readers;
|
||||||
|
|
||||||
public ConcatenatingRecordReader(RecordReader... readers) {
|
public ConcatenatingRecordReader(RecordReader... readers) {
|
||||||
this.readers = readers;
|
this.readers = readers;
|
||||||
|
|
|
@ -23,14 +23,14 @@ package org.datavec.api.records.reader.impl;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
import org.datavec.api.conf.Configuration;
|
import org.datavec.api.conf.Configuration;
|
||||||
import org.datavec.api.Record;
|
import org.datavec.api.records.Record;
|
||||||
import org.datavec.api.records.metadata.RecordMetaData;
|
import org.datavec.api.records.metadata.RecordMetaData;
|
||||||
import org.datavec.api.records.metadata.RecordMetaDataURI;
|
import org.datavec.api.records.metadata.RecordMetaDataURI;
|
||||||
import org.datavec.api.records.reader.BaseRecordReader;
|
import org.datavec.api.records.reader.BaseRecordReader;
|
||||||
import org.datavec.api.split.InputSplit;
|
import org.datavec.api.split.InputSplit;
|
||||||
import org.datavec.api.writable.IntWritable;
|
import org.datavec.api.writable.IntWritable;
|
||||||
import org.datavec.api.writable.Text;
|
import org.datavec.api.writable.Text;
|
||||||
import org.datavec.api.Writable;
|
import org.datavec.api.writable.Writable;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
|
@ -40,206 +40,202 @@ import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* File reader/writer
|
* File reader/writer
|
||||||
|
*
|
||||||
|
* @author Adam Gibson
|
||||||
*/
|
*/
|
||||||
public class FileRecordReader extends BaseRecordReader {
|
public class FileRecordReader extends BaseRecordReader {
|
||||||
|
|
||||||
protected Iterator<URI> locationsIterator;
|
protected Iterator<URI> locationsIterator;
|
||||||
protected Configuration conf;
|
protected Configuration conf;
|
||||||
protected URI currentUri;
|
protected URI currentUri;
|
||||||
protected List<String> labels;
|
protected List<String> labels;
|
||||||
protected boolean appendLabel = false;
|
protected boolean appendLabel = false;
|
||||||
@Getter
|
@Getter @Setter
|
||||||
@Setter
|
protected String charset = StandardCharsets.UTF_8.name(); //Using String as StandardCharsets.UTF_8 is not serializable
|
||||||
protected String charset = StandardCharsets.UTF_8.name(); //Using String as StandardCharsets.UTF_8 is not serializable
|
|
||||||
|
|
||||||
public FileRecordReader() {
|
public FileRecordReader() {}
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void initialize(InputSplit split) throws IOException, InterruptedException {
|
public void initialize(InputSplit split) throws IOException, InterruptedException {
|
||||||
super.initialize(split);
|
super.initialize(split);
|
||||||
doInitialize(split);
|
doInitialize(split);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected void doInitialize(InputSplit split) {
|
protected void doInitialize(InputSplit split) {
|
||||||
|
|
||||||
if (labels == null && appendLabel) {
|
if (labels == null && appendLabel) {
|
||||||
URI[] locations = split.locations();
|
URI[] locations = split.locations();
|
||||||
if (locations.length > 0) {
|
if (locations.length > 0) {
|
||||||
Set<String> labels = new HashSet<>();
|
Set<String> labels = new HashSet<>();
|
||||||
for (URI u : locations) {
|
for(URI u : locations){
|
||||||
String[] pathSplit = u.toString().split("[/\\\\]");
|
String[] pathSplit = u.toString().split("[/\\\\]");
|
||||||
labels.add(pathSplit[pathSplit.length - 2]);
|
labels.add(pathSplit[pathSplit.length-2]);
|
||||||
|
}
|
||||||
|
this.labels = new ArrayList<>(labels);
|
||||||
|
Collections.sort(this.labels);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
this.labels = new ArrayList<>(labels);
|
locationsIterator = split.locationsIterator();
|
||||||
Collections.sort(this.labels);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
locationsIterator = split.locationsIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void initialize(Configuration conf, InputSplit split)
|
|
||||||
throws IOException, InterruptedException {
|
|
||||||
appendLabel = conf.getBoolean(APPEND_LABEL, true);
|
|
||||||
doInitialize(split);
|
|
||||||
this.inputSplit = split;
|
|
||||||
this.conf = conf;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<Writable> next() {
|
|
||||||
return nextRecord().getRecord();
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<Writable> loadFromStream(URI uri, InputStream next, Charset charset) {
|
|
||||||
List<Writable> ret = new ArrayList<>();
|
|
||||||
try {
|
|
||||||
if (!(next instanceof BufferedInputStream)) {
|
|
||||||
next = new BufferedInputStream(next);
|
|
||||||
}
|
|
||||||
String s = org.apache.commons.io.IOUtils.toString(next, charset);
|
|
||||||
ret.add(new Text(s));
|
|
||||||
if (appendLabel) {
|
|
||||||
int idx = getLabel(uri);
|
|
||||||
ret.add(new IntWritable(idx));
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new IllegalStateException("Error reading from input stream: " + uri);
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the current label. The index of the current file's parent directory in the label list
|
|
||||||
*
|
|
||||||
* @return The index of the current file's parent directory
|
|
||||||
*/
|
|
||||||
public int getCurrentLabel() {
|
|
||||||
return getLabel(currentUri);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getLabel(URI uri) {
|
|
||||||
String s = uri.toString();
|
|
||||||
int lastIdx = Math.max(s.lastIndexOf('/'),
|
|
||||||
s.lastIndexOf('\\')); //Note: if neither are found, -1 is fine here
|
|
||||||
String sub = s.substring(0, lastIdx);
|
|
||||||
int secondLastIdx = Math.max(sub.lastIndexOf('/'), sub.lastIndexOf('\\'));
|
|
||||||
String name = s.substring(secondLastIdx + 1, lastIdx);
|
|
||||||
return labels.indexOf(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getLabels() {
|
|
||||||
return labels;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLabels(List<String> labels) {
|
|
||||||
this.labels = labels;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return locationsIterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setConf(Configuration conf) {
|
|
||||||
this.conf = conf;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Configuration getConf() {
|
|
||||||
return conf;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<List<Writable>> next(int num) {
|
|
||||||
List<List<Writable>> ret = new ArrayList<>(num);
|
|
||||||
int numBatches = 0;
|
|
||||||
while (hasNext() && numBatches < num) {
|
|
||||||
ret.add(next());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
@Override
|
||||||
}
|
public void initialize(Configuration conf, InputSplit split) throws IOException, InterruptedException {
|
||||||
|
appendLabel = conf.getBoolean(APPEND_LABEL, true);
|
||||||
@Override
|
doInitialize(split);
|
||||||
public void reset() {
|
this.inputSplit = split;
|
||||||
if (inputSplit == null) {
|
this.conf = conf;
|
||||||
throw new UnsupportedOperationException("Cannot reset without first initializing");
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
doInitialize(inputSplit);
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new RuntimeException("Error during LineRecordReader reset", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean resetSupported() {
|
|
||||||
if (inputSplit != null) {
|
|
||||||
return inputSplit.resetSupported();
|
|
||||||
}
|
|
||||||
return false; //reset() throws exception on reset() if inputSplit is null
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<Writable> record(URI uri, DataInputStream dataInputStream) throws IOException {
|
|
||||||
invokeListeners(uri);
|
|
||||||
//Here: reading the entire file to a Text writable
|
|
||||||
BufferedReader br = new BufferedReader(new InputStreamReader(dataInputStream));
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
String line;
|
|
||||||
while ((line = br.readLine()) != null) {
|
|
||||||
sb.append(line).append("\n");
|
|
||||||
}
|
|
||||||
return Collections.singletonList(new Text(sb.toString()));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Record nextRecord() {
|
|
||||||
URI next = locationsIterator.next();
|
|
||||||
invokeListeners(next);
|
|
||||||
|
|
||||||
List<Writable> ret;
|
|
||||||
try (InputStream s = streamCreatorFn.apply(next)) {
|
|
||||||
ret = loadFromStream(next, s, Charset.forName(charset));
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("Error reading from stream for URI: " + next);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return new org.datavec.api.records.impl.Record(ret,
|
@Override
|
||||||
new RecordMetaDataURI(next, FileRecordReader.class));
|
public List<Writable> next() {
|
||||||
}
|
return nextRecord().getRecord();
|
||||||
|
|
||||||
@Override
|
|
||||||
public Record loadFromMetaData(RecordMetaData recordMetaData) throws IOException {
|
|
||||||
return loadFromMetaData(Collections.singletonList(recordMetaData)).get(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException {
|
|
||||||
List<Record> out = new ArrayList<>();
|
|
||||||
|
|
||||||
for (RecordMetaData meta : recordMetaDatas) {
|
|
||||||
URI uri = meta.getURI();
|
|
||||||
|
|
||||||
List<Writable> list;
|
|
||||||
try (InputStream s = streamCreatorFn.apply(uri)) {
|
|
||||||
list = loadFromStream(uri, s, Charset.forName(charset));
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("Error reading from stream for URI: " + uri);
|
|
||||||
}
|
|
||||||
|
|
||||||
out.add(new org.datavec.api.records.impl.Record(list, meta));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return out;
|
private List<Writable> loadFromStream(URI uri, InputStream next, Charset charset) {
|
||||||
}
|
List<Writable> ret = new ArrayList<>();
|
||||||
|
try {
|
||||||
|
if(!(next instanceof BufferedInputStream)){
|
||||||
|
next = new BufferedInputStream(next);
|
||||||
|
}
|
||||||
|
String s = org.apache.commons.io.IOUtils.toString(next, charset);
|
||||||
|
ret.add(new Text(s));
|
||||||
|
if (appendLabel) {
|
||||||
|
int idx = getLabel(uri);
|
||||||
|
ret.add(new IntWritable(idx));
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new IllegalStateException("Error reading from input stream: " + uri);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the current label.
|
||||||
|
* The index of the current file's parent directory
|
||||||
|
* in the label list
|
||||||
|
* @return The index of the current file's parent directory
|
||||||
|
*/
|
||||||
|
public int getCurrentLabel() {
|
||||||
|
return getLabel(currentUri);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getLabel(URI uri){
|
||||||
|
String s = uri.toString();
|
||||||
|
int lastIdx = Math.max(s.lastIndexOf('/'), s.lastIndexOf('\\')); //Note: if neither are found, -1 is fine here
|
||||||
|
String sub = s.substring(0, lastIdx);
|
||||||
|
int secondLastIdx = Math.max(sub.lastIndexOf('/'), sub.lastIndexOf('\\'));
|
||||||
|
String name = s.substring(secondLastIdx+1, lastIdx);
|
||||||
|
return labels.indexOf(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getLabels() {
|
||||||
|
return labels;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLabels(List<String> labels) {
|
||||||
|
this.labels = labels;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
return locationsIterator.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setConf(Configuration conf) {
|
||||||
|
this.conf = conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Configuration getConf() {
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<List<Writable>> next(int num) {
|
||||||
|
List<List<Writable>> ret = new ArrayList<>(num);
|
||||||
|
int numBatches = 0;
|
||||||
|
while (hasNext() && numBatches < num) {
|
||||||
|
ret.add(next());
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public void reset() {
|
||||||
|
if (inputSplit == null)
|
||||||
|
throw new UnsupportedOperationException("Cannot reset without first initializing");
|
||||||
|
try {
|
||||||
|
doInitialize(inputSplit);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException("Error during LineRecordReader reset", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean resetSupported() {
|
||||||
|
if(inputSplit != null){
|
||||||
|
return inputSplit.resetSupported();
|
||||||
|
}
|
||||||
|
return false; //reset() throws exception on reset() if inputSplit is null
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Writable> record(URI uri, DataInputStream dataInputStream) throws IOException {
|
||||||
|
invokeListeners(uri);
|
||||||
|
//Here: reading the entire file to a Text writable
|
||||||
|
BufferedReader br = new BufferedReader(new InputStreamReader(dataInputStream));
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
String line;
|
||||||
|
while ((line = br.readLine()) != null) {
|
||||||
|
sb.append(line).append("\n");
|
||||||
|
}
|
||||||
|
return Collections.singletonList(new Text(sb.toString()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Record nextRecord() {
|
||||||
|
URI next = locationsIterator.next();
|
||||||
|
invokeListeners(next);
|
||||||
|
|
||||||
|
List<Writable> ret;
|
||||||
|
try(InputStream s = streamCreatorFn.apply(next)) {
|
||||||
|
ret = loadFromStream(next, s, Charset.forName(charset));
|
||||||
|
} catch (IOException e){
|
||||||
|
throw new RuntimeException("Error reading from stream for URI: " + next);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new org.datavec.api.records.impl.Record(ret,new RecordMetaDataURI(next, FileRecordReader.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Record loadFromMetaData(RecordMetaData recordMetaData) throws IOException {
|
||||||
|
return loadFromMetaData(Collections.singletonList(recordMetaData)).get(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException {
|
||||||
|
List<Record> out = new ArrayList<>();
|
||||||
|
|
||||||
|
for (RecordMetaData meta : recordMetaDatas) {
|
||||||
|
URI uri = meta.getURI();
|
||||||
|
|
||||||
|
List<Writable> list;
|
||||||
|
try(InputStream s = streamCreatorFn.apply(uri)) {
|
||||||
|
list = loadFromStream(uri, s, Charset.forName(charset));
|
||||||
|
} catch (IOException e){
|
||||||
|
throw new RuntimeException("Error reading from stream for URI: " + uri);
|
||||||
|
}
|
||||||
|
|
||||||
|
out.add(new org.datavec.api.records.impl.Record(list, meta));
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -200,7 +200,7 @@ public class LineRecordReader extends BaseRecordReader {
|
||||||
//Here: we are reading a single line from the DataInputStream
|
//Here: we are reading a single line from the DataInputStream
|
||||||
BufferedReader br = new BufferedReader(new InputStreamReader(dataInputStream));
|
BufferedReader br = new BufferedReader(new InputStreamReader(dataInputStream));
|
||||||
String line = br.readLine();
|
String line = br.readLine();
|
||||||
return Collections.singletonList((Writable) new Text(line));
|
return Collections.singletonList(new Text(line));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Iterator<String> getIterator(int location) {
|
protected Iterator<String> getIterator(int location) {
|
||||||
|
@ -265,7 +265,7 @@ public class LineRecordReader extends BaseRecordReader {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"Invalid metadata; expected RecordMetaDataLine instance; got: " + rmd);
|
"Invalid metadata; expected RecordMetaDataLine instance; got: " + rmd);
|
||||||
}
|
}
|
||||||
list.add(new Triple<>(count++, (RecordMetaDataLine) rmd, (List<Writable>) null));
|
list.add(new Triple<>(count++, (RecordMetaDataLine) rmd, null));
|
||||||
if (rmd.getURI() != null)
|
if (rmd.getURI() != null)
|
||||||
uris.add(rmd.getURI());
|
uris.add(rmd.getURI());
|
||||||
}
|
}
|
||||||
|
@ -332,7 +332,7 @@ public class LineRecordReader extends BaseRecordReader {
|
||||||
throw new IllegalStateException("Could not get line " + nextLineIdx + " from URI " + currentURI
|
throw new IllegalStateException("Could not get line " + nextLineIdx + " from URI " + currentURI
|
||||||
+ ": has only " + currentLineIdx + " lines");
|
+ ": has only " + currentLineIdx + " lines");
|
||||||
}
|
}
|
||||||
t.setThird(Collections.<Writable>singletonList(new Text(line)));
|
t.setThird(Collections.singletonList(new Text(line)));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
//Not URI based: String split, etc
|
//Not URI based: String split, etc
|
||||||
|
@ -347,7 +347,7 @@ public class LineRecordReader extends BaseRecordReader {
|
||||||
line = iterator.next();
|
line = iterator.next();
|
||||||
currentLineIdx++;
|
currentLineIdx++;
|
||||||
}
|
}
|
||||||
t.setThird(Collections.<Writable>singletonList(new Text(line)));
|
t.setThird(Collections.singletonList(new Text(line)));
|
||||||
}
|
}
|
||||||
closeIfRequired(iterator);
|
closeIfRequired(iterator);
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,7 +43,7 @@ public class CollectionSequenceRecordReader extends BaseRecordReader implements
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param records Collection of sequences. For example, List<List<List<Writable>>> where the inner two lists
|
* @param records Collection of sequences. For example, {@code List<List<List<Writable>>>} where the inner two lists
|
||||||
* are a sequence, and the outer list/collection is a list of sequences
|
* are a sequence, and the outer list/collection is a list of sequences
|
||||||
*/
|
*/
|
||||||
public CollectionSequenceRecordReader(Collection<? extends Collection<? extends Collection<Writable>>> records) {
|
public CollectionSequenceRecordReader(Collection<? extends Collection<? extends Collection<Writable>>> records) {
|
||||||
|
|
|
@ -45,9 +45,9 @@ public class CSVMultiSequenceRecordReader extends CSVRecordReader implements Seq
|
||||||
PAD
|
PAD
|
||||||
}
|
}
|
||||||
|
|
||||||
private String sequenceSeparatorRegex;
|
private final String sequenceSeparatorRegex;
|
||||||
private Mode mode;
|
private final Mode mode;
|
||||||
private Writable padValue;
|
private final Writable padValue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a sequence reader using the default value for skip lines (0), the default delimiter (',') and the default
|
* Create a sequence reader using the default value for skip lines (0), the default delimiter (',') and the default
|
||||||
|
|
|
@ -41,7 +41,7 @@ public class CSVNLinesSequenceRecordReader extends CSVRecordReader implements Se
|
||||||
public static final String LINES_PER_SEQUENCE = NAME_SPACE + ".nlinespersequence";
|
public static final String LINES_PER_SEQUENCE = NAME_SPACE + ".nlinespersequence";
|
||||||
|
|
||||||
private int nLinesPerSequence;
|
private int nLinesPerSequence;
|
||||||
private String delimiter;
|
private final String delimiter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* No-arg constructor with the default number of lines per sequence (10)
|
* No-arg constructor with the default number of lines per sequence (10)
|
||||||
|
@ -124,7 +124,7 @@ public class CSVNLinesSequenceRecordReader extends CSVRecordReader implements Se
|
||||||
"Invalid metadata; expected RecordMetaDataLineInterval instance; got: " + rmd);
|
"Invalid metadata; expected RecordMetaDataLineInterval instance; got: " + rmd);
|
||||||
}
|
}
|
||||||
list.add(new Triple<>(count++, (RecordMetaDataLineInterval) rmd,
|
list.add(new Triple<>(count++, (RecordMetaDataLineInterval) rmd,
|
||||||
(List<List<Writable>>) new ArrayList<List<Writable>>()));
|
new ArrayList<List<Writable>>()));
|
||||||
}
|
}
|
||||||
|
|
||||||
//Sort by starting line number:
|
//Sort by starting line number:
|
||||||
|
|
|
@ -39,8 +39,8 @@ public class CSVVariableSlidingWindowRecordReader extends CSVRecordReader implem
|
||||||
public static final String LINES_PER_SEQUENCE = NAME_SPACE + ".nlinespersequence";
|
public static final String LINES_PER_SEQUENCE = NAME_SPACE + ".nlinespersequence";
|
||||||
|
|
||||||
private int maxLinesPerSequence;
|
private int maxLinesPerSequence;
|
||||||
private String delimiter;
|
private final String delimiter;
|
||||||
private int stride;
|
private final int stride;
|
||||||
private LinkedList<List<Writable>> queue;
|
private LinkedList<List<Writable>> queue;
|
||||||
private boolean exhausted;
|
private boolean exhausted;
|
||||||
|
|
||||||
|
@ -60,7 +60,7 @@ public class CSVVariableSlidingWindowRecordReader extends CSVRecordReader implem
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param maxLinesPerSequence Number of lines in each sequence, use default delemiter(,) between entries in the same line
|
* @param maxLinesPerSequence Number of lines in each sequence, use default delemiter(,) between entries in the same line
|
||||||
* @param stride Number of lines between records (increment window > 1 line)
|
* @param stride Number of lines between records (increment window > 1 line)
|
||||||
*/
|
*/
|
||||||
public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int stride) {
|
public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int stride) {
|
||||||
this(maxLinesPerSequence, 0, stride, String.valueOf(CSVRecordReader.DEFAULT_DELIMITER));
|
this(maxLinesPerSequence, 0, stride, String.valueOf(CSVRecordReader.DEFAULT_DELIMITER));
|
||||||
|
@ -68,7 +68,7 @@ public class CSVVariableSlidingWindowRecordReader extends CSVRecordReader implem
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param maxLinesPerSequence Number of lines in each sequence, use default delemiter(,) between entries in the same line
|
* @param maxLinesPerSequence Number of lines in each sequence, use default delemiter(,) between entries in the same line
|
||||||
* @param stride Number of lines between records (increment window > 1 line)
|
* @param stride Number of lines between records (increment window > 1 line)
|
||||||
*/
|
*/
|
||||||
public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int stride, String delimiter) {
|
public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int stride, String delimiter) {
|
||||||
this(maxLinesPerSequence, 0, stride, String.valueOf(CSVRecordReader.DEFAULT_DELIMITER));
|
this(maxLinesPerSequence, 0, stride, String.valueOf(CSVRecordReader.DEFAULT_DELIMITER));
|
||||||
|
@ -78,7 +78,7 @@ public class CSVVariableSlidingWindowRecordReader extends CSVRecordReader implem
|
||||||
*
|
*
|
||||||
* @param maxLinesPerSequence Number of lines in each sequences
|
* @param maxLinesPerSequence Number of lines in each sequences
|
||||||
* @param skipNumLines Number of lines to skip at the start of the file (only skipped once, not per sequence)
|
* @param skipNumLines Number of lines to skip at the start of the file (only skipped once, not per sequence)
|
||||||
* @param stride Number of lines between records (increment window > 1 line)
|
* @param stride Number of lines between records (increment window > 1 line)
|
||||||
* @param delimiter Delimiter between entries in the same line, for example ","
|
* @param delimiter Delimiter between entries in the same line, for example ","
|
||||||
*/
|
*/
|
||||||
public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int skipNumLines, int stride, String delimiter) {
|
public CSVVariableSlidingWindowRecordReader(int maxLinesPerSequence, int skipNumLines, int stride, String delimiter) {
|
||||||
|
|
|
@ -302,7 +302,7 @@ public class SerializableCSVParser implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* precondition: sb.length() > 0
|
* precondition: sb.length() > 0
|
||||||
*
|
*
|
||||||
* @param sb A sequence of characters to examine
|
* @param sb A sequence of characters to examine
|
||||||
* @return true if every character in the sequence is whitespace
|
* @return true if every character in the sequence is whitespace
|
||||||
|
|
|
@ -114,8 +114,6 @@ public class InMemoryRecordReader implements RecordReader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reset record reader iterator
|
* Reset record reader iterator
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void reset() {
|
public void reset() {
|
||||||
|
|
|
@ -195,8 +195,6 @@ public class InMemorySequenceRecordReader implements SequenceRecordReader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reset record reader iterator
|
* Reset record reader iterator
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void reset() {
|
public void reset() {
|
||||||
|
|
|
@ -31,8 +31,8 @@ public class FieldSelection implements Serializable {
|
||||||
|
|
||||||
public static final Writable DEFAULT_MISSING_VALUE = new Text("");
|
public static final Writable DEFAULT_MISSING_VALUE = new Text("");
|
||||||
|
|
||||||
private List<String[]> fieldPaths;
|
private final List<String[]> fieldPaths;
|
||||||
private List<Writable> valueIfMissing;
|
private final List<Writable> valueIfMissing;
|
||||||
|
|
||||||
private FieldSelection(Builder builder) {
|
private FieldSelection(Builder builder) {
|
||||||
this.fieldPaths = builder.fieldPaths;
|
this.fieldPaths = builder.fieldPaths;
|
||||||
|
@ -53,8 +53,8 @@ public class FieldSelection implements Serializable {
|
||||||
|
|
||||||
public static class Builder {
|
public static class Builder {
|
||||||
|
|
||||||
private List<String[]> fieldPaths = new ArrayList<>();
|
private final List<String[]> fieldPaths = new ArrayList<>();
|
||||||
private List<Writable> valueIfMissing = new ArrayList<>();
|
private final List<Writable> valueIfMissing = new ArrayList<>();
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -29,8 +29,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
public class JacksonLineRecordReader extends LineRecordReader {
|
public class JacksonLineRecordReader extends LineRecordReader {
|
||||||
|
|
||||||
private FieldSelection selection;
|
private final FieldSelection selection;
|
||||||
private ObjectMapper mapper;
|
private final ObjectMapper mapper;
|
||||||
|
|
||||||
public JacksonLineRecordReader(FieldSelection selection, ObjectMapper mapper) {
|
public JacksonLineRecordReader(FieldSelection selection, ObjectMapper mapper) {
|
||||||
this.selection = selection;
|
this.selection = selection;
|
||||||
|
|
|
@ -39,8 +39,8 @@ import java.util.NoSuchElementException;
|
||||||
|
|
||||||
public class JacksonLineSequenceRecordReader extends FileRecordReader implements SequenceRecordReader {
|
public class JacksonLineSequenceRecordReader extends FileRecordReader implements SequenceRecordReader {
|
||||||
|
|
||||||
private FieldSelection selection;
|
private final FieldSelection selection;
|
||||||
private ObjectMapper mapper;
|
private final ObjectMapper mapper;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
|
@ -45,12 +45,12 @@ public class JacksonRecordReader extends BaseRecordReader {
|
||||||
|
|
||||||
private static final TypeReference<Map<String, Object>> typeRef = new TypeReference<Map<String, Object>>() {};
|
private static final TypeReference<Map<String, Object>> typeRef = new TypeReference<Map<String, Object>>() {};
|
||||||
|
|
||||||
private FieldSelection selection;
|
private final FieldSelection selection;
|
||||||
private ObjectMapper mapper;
|
private final ObjectMapper mapper;
|
||||||
private boolean shuffle;
|
private final boolean shuffle;
|
||||||
private long rngSeed;
|
private final long rngSeed;
|
||||||
private PathLabelGenerator labelGenerator;
|
private final PathLabelGenerator labelGenerator;
|
||||||
private int labelPosition;
|
private final int labelPosition;
|
||||||
private InputSplit is;
|
private InputSplit is;
|
||||||
private Random r;
|
private Random r;
|
||||||
@Getter @Setter
|
@Getter @Setter
|
||||||
|
|
|
@ -35,7 +35,7 @@ import java.util.List;
|
||||||
|
|
||||||
public class MatlabRecordReader extends FileRecordReader {
|
public class MatlabRecordReader extends FileRecordReader {
|
||||||
|
|
||||||
private List<List<Writable>> records = new ArrayList<>();
|
private final List<List<Writable>> records = new ArrayList<>();
|
||||||
private Iterator<List<Writable>> currIter;
|
private Iterator<List<Writable>> currIter;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -96,8 +96,6 @@ public class SVMLightRecordReader extends LineRecordReader {
|
||||||
* Set configuration.
|
* Set configuration.
|
||||||
*
|
*
|
||||||
* @param conf DataVec configuration
|
* @param conf DataVec configuration
|
||||||
* @throws IOException
|
|
||||||
* @throws InterruptedException
|
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void setConf(Configuration conf) {
|
public void setConf(Configuration conf) {
|
||||||
|
@ -181,7 +179,7 @@ public class SVMLightRecordReader extends LineRecordReader {
|
||||||
if (index < 0)
|
if (index < 0)
|
||||||
throw new NumberFormatException("");
|
throw new NumberFormatException("");
|
||||||
} catch (NumberFormatException e) {
|
} catch (NumberFormatException e) {
|
||||||
String msg = String.format("Feature index must be positive integer (found %s)", featureTokens[i].toString());
|
String msg = String.format("Feature index must be positive integer (found %s)", featureTokens[i]);
|
||||||
throw new NumberFormatException(msg);
|
throw new NumberFormatException(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -218,7 +216,7 @@ public class SVMLightRecordReader extends LineRecordReader {
|
||||||
if (index < 0)
|
if (index < 0)
|
||||||
throw new NumberFormatException("");
|
throw new NumberFormatException("");
|
||||||
} catch (NumberFormatException e) {
|
} catch (NumberFormatException e) {
|
||||||
String msg = String.format("Multilabel index must be positive integer (found %s)", labelTokens[i].toString());
|
String msg = String.format("Multilabel index must be positive integer (found %s)", labelTokens[i]);
|
||||||
throw new NumberFormatException(msg);
|
throw new NumberFormatException(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,11 +41,11 @@ import java.util.regex.Pattern;
|
||||||
public class RegexLineRecordReader extends LineRecordReader {
|
public class RegexLineRecordReader extends LineRecordReader {
|
||||||
public final static String SKIP_NUM_LINES = NAME_SPACE + ".skipnumlines";
|
public final static String SKIP_NUM_LINES = NAME_SPACE + ".skipnumlines";
|
||||||
|
|
||||||
private String regex;
|
private final String regex;
|
||||||
private int skipNumLines;
|
private int skipNumLines;
|
||||||
private Pattern pattern;
|
private final Pattern pattern;
|
||||||
private int numLinesSkipped;
|
private int numLinesSkipped;
|
||||||
private int currLine = 0;
|
private final int currLine = 0;
|
||||||
|
|
||||||
public RegexLineRecordReader(String regex, int skipNumLines) {
|
public RegexLineRecordReader(String regex, int skipNumLines) {
|
||||||
this.regex = regex;
|
this.regex = regex;
|
||||||
|
|
|
@ -61,11 +61,11 @@ public class RegexSequenceRecordReader extends FileRecordReader implements Seque
|
||||||
|
|
||||||
public static final Logger LOG = LoggerFactory.getLogger(RegexSequenceRecordReader.class);
|
public static final Logger LOG = LoggerFactory.getLogger(RegexSequenceRecordReader.class);
|
||||||
|
|
||||||
private String regex;
|
private final String regex;
|
||||||
private int skipNumLines;
|
private int skipNumLines;
|
||||||
private Pattern pattern;
|
private final Pattern pattern;
|
||||||
private transient Charset charset;
|
private transient Charset charset;
|
||||||
private LineErrorHandling errorHandling;
|
private final LineErrorHandling errorHandling;
|
||||||
|
|
||||||
public RegexSequenceRecordReader(String regex, int skipNumLines) {
|
public RegexSequenceRecordReader(String regex, int skipNumLines) {
|
||||||
this(regex, skipNumLines, DEFAULT_CHARSET, DEFAULT_ERROR_HANDLING);
|
this(regex, skipNumLines, DEFAULT_CHARSET, DEFAULT_ERROR_HANDLING);
|
||||||
|
@ -92,7 +92,7 @@ public class RegexSequenceRecordReader extends FileRecordReader implements Seque
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<List<Writable>> sequenceRecord(URI uri, DataInputStream dataInputStream) throws IOException {
|
public List<List<Writable>> sequenceRecord(URI uri, DataInputStream dataInputStream) throws IOException {
|
||||||
String fileContents = IOUtils.toString(new BufferedInputStream(dataInputStream), charset.name());
|
String fileContents = IOUtils.toString(new BufferedInputStream(dataInputStream), charset);
|
||||||
return loadSequence(fileContents, uri);
|
return loadSequence(fileContents, uri);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -145,8 +145,6 @@ public class TransformProcessRecordReader implements RecordReader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reset record reader iterator
|
* Reset record reader iterator
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void reset() {
|
public void reset() {
|
||||||
|
|
|
@ -195,8 +195,6 @@ public class TransformProcessSequenceRecordReader implements SequenceRecordReade
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reset record reader iterator
|
* Reset record reader iterator
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void reset() {
|
public void reset() {
|
||||||
|
|
|
@ -94,7 +94,7 @@ public class SVMLightRecordWriter extends FileRecordWriter {
|
||||||
@Override
|
@Override
|
||||||
public PartitionMetaData write(List<Writable> record) throws IOException {
|
public PartitionMetaData write(List<Writable> record) throws IOException {
|
||||||
if (!record.isEmpty()) {
|
if (!record.isEmpty()) {
|
||||||
List<Writable> recordList = record instanceof List ? (List<Writable>) record : new ArrayList<>(record);
|
List<Writable> recordList = record instanceof List ? record : new ArrayList<>(record);
|
||||||
|
|
||||||
/* Infer label columns, if necessary. The default is
|
/* Infer label columns, if necessary. The default is
|
||||||
* to assume that last column is a label and that the
|
* to assume that last column is a label and that the
|
||||||
|
@ -198,7 +198,7 @@ public class SVMLightRecordWriter extends FileRecordWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove extra label delimiter at beginning
|
// Remove extra label delimiter at beginning
|
||||||
String line = result.substring(1).toString();
|
String line = result.substring(1);
|
||||||
out.write(line.getBytes());
|
out.write(line.getBytes());
|
||||||
out.write(NEW_LINE.getBytes());
|
out.write(NEW_LINE.getBytes());
|
||||||
|
|
||||||
|
|
|
@ -124,9 +124,7 @@ public abstract class BaseInputSplit implements InputSplit {
|
||||||
|
|
||||||
for (int i = 0; i < weights.length; i++) {
|
for (int i = 0; i < weights.length; i++) {
|
||||||
List<URI> uris = new ArrayList<>();
|
List<URI> uris = new ArrayList<>();
|
||||||
for (int j = partitions[i]; j < partitions[i + 1]; j++) {
|
uris.addAll(Arrays.asList(paths).subList(partitions[i], partitions[i + 1]));
|
||||||
uris.add(paths[j]);
|
|
||||||
}
|
|
||||||
splits[i] = new CollectionInputSplit(uris);
|
splits[i] = new CollectionInputSplit(uris);
|
||||||
}
|
}
|
||||||
return splits;
|
return splits;
|
||||||
|
|
|
@ -138,7 +138,7 @@ public class FileSplit extends BaseInputSplit {
|
||||||
return addNewLocation(new File(rootDir, UUID.randomUUID().toString()).toURI().toString());
|
return addNewLocation(new File(rootDir, UUID.randomUUID().toString()).toURI().toString());
|
||||||
else {
|
else {
|
||||||
//add a file in the same directory as the file with the same extension as the original file
|
//add a file in the same directory as the file with the same extension as the original file
|
||||||
return addNewLocation(new File(rootDir.getParent(), UUID.randomUUID().toString() + "." + FilenameUtils.getExtension(rootDir.getAbsolutePath())).toURI().toString());
|
return addNewLocation(new File(rootDir.getParent(), UUID.randomUUID() + "." + FilenameUtils.getExtension(rootDir.getAbsolutePath())).toURI().toString());
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@ import java.util.Iterator;
|
||||||
|
|
||||||
public class InputStreamInputSplit implements InputSplit {
|
public class InputStreamInputSplit implements InputSplit {
|
||||||
private InputStream is;
|
private InputStream is;
|
||||||
private URI[] location;
|
private final URI[] location;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instantiate with the given
|
* Instantiate with the given
|
||||||
|
@ -130,7 +130,7 @@ public class InputStreamInputSplit implements InputSplit {
|
||||||
public Iterator<String> locationsPathIterator() {
|
public Iterator<String> locationsPathIterator() {
|
||||||
if(location.length >= 1)
|
if(location.length >= 1)
|
||||||
return Collections.singletonList(location[0].getPath()).iterator();
|
return Collections.singletonList(location[0].getPath()).iterator();
|
||||||
return Arrays.asList("").iterator();
|
return Collections.singletonList("").iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -33,7 +33,7 @@ import java.util.List;
|
||||||
* has delimited data of some kind.
|
* has delimited data of some kind.
|
||||||
*/
|
*/
|
||||||
public class ListStringSplit implements InputSplit {
|
public class ListStringSplit implements InputSplit {
|
||||||
private List<List<String>> data;
|
private final List<List<String>> data;
|
||||||
|
|
||||||
|
|
||||||
public ListStringSplit(List<List<String>> data) {
|
public ListStringSplit(List<List<String>> data) {
|
||||||
|
|
|
@ -43,12 +43,12 @@ public class NumberedFileInputSplit implements InputSplit {
|
||||||
* the index of the file, possibly zero-padded to x digits if the pattern is in the form %0xd.
|
* the index of the file, possibly zero-padded to x digits if the pattern is in the form %0xd.
|
||||||
* @param minIdxInclusive Minimum index/number (starting number in sequence of files, inclusive)
|
* @param minIdxInclusive Minimum index/number (starting number in sequence of files, inclusive)
|
||||||
* @param maxIdxInclusive Maximum index/number (last number in sequence of files, inclusive)
|
* @param maxIdxInclusive Maximum index/number (last number in sequence of files, inclusive)
|
||||||
* @see {NumberedFileInputSplitTest}
|
*
|
||||||
*/
|
*/
|
||||||
public NumberedFileInputSplit(String baseString, int minIdxInclusive, int maxIdxInclusive) {
|
public NumberedFileInputSplit(String baseString, int minIdxInclusive, int maxIdxInclusive) {
|
||||||
Matcher m = p.matcher(baseString);
|
Matcher m = p.matcher(baseString);
|
||||||
if (baseString == null || !m.find()) {
|
if (baseString == null || !m.find()) {
|
||||||
throw new IllegalArgumentException("Base String must match this regular expression: " + p.toString());
|
throw new IllegalArgumentException("Base String must match this regular expression: " + p);
|
||||||
}
|
}
|
||||||
this.baseString = baseString;
|
this.baseString = baseString;
|
||||||
this.minIdx = minIdxInclusive;
|
this.minIdx = minIdxInclusive;
|
||||||
|
|
|
@ -31,7 +31,7 @@ import java.util.Iterator;
|
||||||
* @author Adam Gibson
|
* @author Adam Gibson
|
||||||
*/
|
*/
|
||||||
public class StringSplit implements InputSplit {
|
public class StringSplit implements InputSplit {
|
||||||
private String data;
|
private final String data;
|
||||||
|
|
||||||
public StringSplit(String data) {
|
public StringSplit(String data) {
|
||||||
this.data = data;
|
this.data = data;
|
||||||
|
|
|
@ -449,7 +449,7 @@ public class TransformProcess implements Serializable {
|
||||||
/**
|
/**
|
||||||
* Infer the categories for the given record reader for a particular column
|
* Infer the categories for the given record reader for a particular column
|
||||||
* Note that each "column index" is a column in the context of:
|
* Note that each "column index" is a column in the context of:
|
||||||
* List<Writable> record = ...;
|
* {@code List<Writable> record = ...;}
|
||||||
* record.get(columnIndex);
|
* record.get(columnIndex);
|
||||||
*
|
*
|
||||||
* Note that anything passed in as a column will be automatically converted to a
|
* Note that anything passed in as a column will be automatically converted to a
|
||||||
|
@ -483,7 +483,7 @@ public class TransformProcess implements Serializable {
|
||||||
* if you have more than one column you plan on inferring categories for)
|
* if you have more than one column you plan on inferring categories for)
|
||||||
*
|
*
|
||||||
* Note that each "column index" is a column in the context of:
|
* Note that each "column index" is a column in the context of:
|
||||||
* List<Writable> record = ...;
|
* {@code List<Writable> record = ...;}
|
||||||
* record.get(columnIndex);
|
* record.get(columnIndex);
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
|
@ -607,8 +607,8 @@ public class TransformProcess implements Serializable {
|
||||||
*/
|
*/
|
||||||
public static class Builder {
|
public static class Builder {
|
||||||
|
|
||||||
private List<DataAction> actionList = new ArrayList<>();
|
private final List<DataAction> actionList = new ArrayList<>();
|
||||||
private Schema initialSchema;
|
private final Schema initialSchema;
|
||||||
|
|
||||||
public Builder(Schema initialSchema) {
|
public Builder(Schema initialSchema) {
|
||||||
this.initialSchema = initialSchema;
|
this.initialSchema = initialSchema;
|
||||||
|
@ -1274,7 +1274,7 @@ public class TransformProcess implements Serializable {
|
||||||
* not be modified.
|
* not be modified.
|
||||||
*
|
*
|
||||||
* @param columnName Name of the column in which to do replacement
|
* @param columnName Name of the column in which to do replacement
|
||||||
* @param mapping Map of oldValues -> newValues
|
* @param mapping Map of oldValues -> newValues
|
||||||
*/
|
*/
|
||||||
public Builder stringMapTransform(String columnName, Map<String, String> mapping) {
|
public Builder stringMapTransform(String columnName, Map<String, String> mapping) {
|
||||||
return transform(new StringMapTransform(columnName, mapping));
|
return transform(new StringMapTransform(columnName, mapping));
|
||||||
|
@ -1358,7 +1358,8 @@ public class TransformProcess implements Serializable {
|
||||||
* Keys in the map are the regular expressions; the Values in the map are their String replacements.
|
* Keys in the map are the regular expressions; the Values in the map are their String replacements.
|
||||||
* For example:
|
* For example:
|
||||||
* <blockquote>
|
* <blockquote>
|
||||||
* <table cellpadding="2">
|
* <table>
|
||||||
|
* <caption></caption>
|
||||||
* <tr>
|
* <tr>
|
||||||
* <th>Original</th>
|
* <th>Original</th>
|
||||||
* <th>Regex</th>
|
* <th>Regex</th>
|
||||||
|
@ -1378,7 +1379,7 @@ public class TransformProcess implements Serializable {
|
||||||
* <td>BoneConeTone</td>
|
* <td>BoneConeTone</td>
|
||||||
* </tr>
|
* </tr>
|
||||||
* <tr>
|
* <tr>
|
||||||
* <td>'  4.25 '</td>
|
* <td>' 4.25 '</td>
|
||||||
* <td>^\\s+|\\s+$</td>
|
* <td>^\\s+|\\s+$</td>
|
||||||
* <td></td>
|
* <td></td>
|
||||||
* <td>'4.25'</td>
|
* <td>'4.25'</td>
|
||||||
|
|
|
@ -55,7 +55,7 @@ public class NDArrayAnalysis implements ColumnAnalysis {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
Map<Integer, Long> sortedCountsByRank = new LinkedHashMap<>();
|
Map<Integer, Long> sortedCountsByRank = new LinkedHashMap<>();
|
||||||
List<Integer> keys =
|
List<Integer> keys =
|
||||||
new ArrayList<>(countsByRank == null ? Collections.<Integer>emptySet() : countsByRank.keySet());
|
new ArrayList<>(countsByRank == null ? Collections.emptySet() : countsByRank.keySet());
|
||||||
Collections.sort(keys);
|
Collections.sort(keys);
|
||||||
for (Integer i : keys) {
|
for (Integer i : keys) {
|
||||||
sortedCountsByRank.put(i, countsByRank.get(i));
|
sortedCountsByRank.put(i, countsByRank.get(i));
|
||||||
|
|
|
@ -101,8 +101,8 @@ public class IntegerAnalysisCounter implements AnalysisCounter<IntegerAnalysisCo
|
||||||
countNegative++;
|
countNegative++;
|
||||||
}
|
}
|
||||||
|
|
||||||
digest.add((double) value);
|
digest.add(value);
|
||||||
counter.add((double) value);
|
counter.add(value);
|
||||||
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,7 @@ public class NDArrayAnalysisCounter implements AnalysisCounter<NDArrayAnalysisCo
|
||||||
private long minLength = Long.MAX_VALUE;
|
private long minLength = Long.MAX_VALUE;
|
||||||
private long maxLength = -1;
|
private long maxLength = -1;
|
||||||
private long totalNDArrayValues;
|
private long totalNDArrayValues;
|
||||||
private Map<Integer, Long> countsByRank = new HashMap<>();
|
private final Map<Integer, Long> countsByRank = new HashMap<>();
|
||||||
private double minValue = Double.MAX_VALUE;
|
private double minValue = Double.MAX_VALUE;
|
||||||
private double maxValue = -Double.MAX_VALUE;
|
private double maxValue = -Double.MAX_VALUE;
|
||||||
|
|
||||||
|
|
|
@ -83,7 +83,7 @@ public class StringAnalysisCounter implements AnalysisCounter<StringAnalysisCoun
|
||||||
countMaxLength = 1;
|
countMaxLength = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
counter.add((double) length);
|
counter.add(length);
|
||||||
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,9 +27,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
public class CategoricalHistogramCounter implements HistogramCounter {
|
public class CategoricalHistogramCounter implements HistogramCounter {
|
||||||
private HashMap<String, Integer> counts = new HashMap<>();
|
private final HashMap<String, Integer> counts = new HashMap<>();
|
||||||
|
|
||||||
private List<String> stateNames;
|
private final List<String> stateNames;
|
||||||
|
|
||||||
public CategoricalHistogramCounter(List<String> stateNames) {
|
public CategoricalHistogramCounter(List<String> stateNames) {
|
||||||
this.stateNames = stateNames;
|
this.stateNames = stateNames;
|
||||||
|
|
|
@ -34,8 +34,8 @@ import java.io.ObjectInputStream;
|
||||||
|
|
||||||
public class TDigestDeserializer extends JsonDeserializer<TDigest> {
|
public class TDigestDeserializer extends JsonDeserializer<TDigest> {
|
||||||
@Override
|
@Override
|
||||||
public TDigest deserialize(JsonParser jp, DeserializationContext d) throws IOException, JsonProcessingException {
|
public TDigest deserialize(JsonParser jp, DeserializationContext d) throws IOException {
|
||||||
JsonNode node = (JsonNode)jp.getCodec().readTree(jp);
|
JsonNode node = jp.getCodec().readTree(jp);
|
||||||
String field = node.get("digest").asText();
|
String field = node.get("digest").asText();
|
||||||
Base64 b = new Base64();
|
Base64 b = new Base64();
|
||||||
byte[] bytes = b.decode(field);
|
byte[] bytes = b.decode(field);
|
||||||
|
|
|
@ -33,7 +33,7 @@ import java.io.ObjectOutputStream;
|
||||||
|
|
||||||
public class TDigestSerializer extends JsonSerializer<TDigest> {
|
public class TDigestSerializer extends JsonSerializer<TDigest> {
|
||||||
@Override
|
@Override
|
||||||
public void serialize(TDigest td, JsonGenerator j, SerializerProvider sp) throws IOException, JsonProcessingException {
|
public void serialize(TDigest td, JsonGenerator j, SerializerProvider sp) throws IOException {
|
||||||
try(ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos)){
|
try(ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos)){
|
||||||
oos.writeObject(td);
|
oos.writeObject(td);
|
||||||
oos.close();
|
oos.close();
|
||||||
|
|
|
@ -29,7 +29,7 @@ import org.datavec.api.writable.Writable;
|
||||||
public class BytesQualityAnalysisState implements QualityAnalysisState<BytesQualityAnalysisState> {
|
public class BytesQualityAnalysisState implements QualityAnalysisState<BytesQualityAnalysisState> {
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
private BytesQuality bytesQuality;
|
private final BytesQuality bytesQuality;
|
||||||
|
|
||||||
public BytesQualityAnalysisState() {
|
public BytesQualityAnalysisState() {
|
||||||
this.bytesQuality = new BytesQuality();
|
this.bytesQuality = new BytesQuality();
|
||||||
|
|
|
@ -31,8 +31,8 @@ public class CategoricalQualityAnalysisState implements QualityAnalysisState<Cat
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
private CategoricalQuality categoricalQuality;
|
private CategoricalQuality categoricalQuality;
|
||||||
private CategoricalQualityAddFunction addFunction;
|
private final CategoricalQualityAddFunction addFunction;
|
||||||
private CategoricalQualityMergeFunction mergeFunction;
|
private final CategoricalQualityMergeFunction mergeFunction;
|
||||||
|
|
||||||
public CategoricalQualityAnalysisState(CategoricalMetaData integerMetaData) {
|
public CategoricalQualityAnalysisState(CategoricalMetaData integerMetaData) {
|
||||||
this.categoricalQuality = new CategoricalQuality();
|
this.categoricalQuality = new CategoricalQuality();
|
||||||
|
|
|
@ -31,8 +31,8 @@ public class IntegerQualityAnalysisState implements QualityAnalysisState<Integer
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
private IntegerQuality integerQuality;
|
private IntegerQuality integerQuality;
|
||||||
private IntegerQualityAddFunction addFunction;
|
private final IntegerQualityAddFunction addFunction;
|
||||||
private IntegerQualityMergeFunction mergeFunction;
|
private final IntegerQualityMergeFunction mergeFunction;
|
||||||
|
|
||||||
public IntegerQualityAnalysisState(IntegerMetaData integerMetaData) {
|
public IntegerQualityAnalysisState(IntegerMetaData integerMetaData) {
|
||||||
this.integerQuality = new IntegerQuality(0, 0, 0, 0, 0);
|
this.integerQuality = new IntegerQuality(0, 0, 0, 0, 0);
|
||||||
|
|
|
@ -31,8 +31,8 @@ public class LongQualityAnalysisState implements QualityAnalysisState<LongQualit
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
private LongQuality longQuality;
|
private LongQuality longQuality;
|
||||||
private LongQualityAddFunction addFunction;
|
private final LongQualityAddFunction addFunction;
|
||||||
private LongQualityMergeFunction mergeFunction;
|
private final LongQualityMergeFunction mergeFunction;
|
||||||
|
|
||||||
public LongQualityAnalysisState(LongMetaData longMetaData) {
|
public LongQualityAnalysisState(LongMetaData longMetaData) {
|
||||||
this.longQuality = new LongQuality();
|
this.longQuality = new LongQuality();
|
||||||
|
|
|
@ -31,8 +31,8 @@ public class RealQualityAnalysisState implements QualityAnalysisState<RealQualit
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
private DoubleQuality realQuality;
|
private DoubleQuality realQuality;
|
||||||
private RealQualityAddFunction addFunction;
|
private final RealQualityAddFunction addFunction;
|
||||||
private RealQualityMergeFunction mergeFunction;
|
private final RealQualityMergeFunction mergeFunction;
|
||||||
|
|
||||||
public RealQualityAnalysisState(DoubleMetaData realMetaData) {
|
public RealQualityAnalysisState(DoubleMetaData realMetaData) {
|
||||||
this.realQuality = new DoubleQuality();
|
this.realQuality = new DoubleQuality();
|
||||||
|
|
|
@ -31,8 +31,8 @@ public class StringQualityAnalysisState implements QualityAnalysisState<StringQu
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
private StringQuality stringQuality;
|
private StringQuality stringQuality;
|
||||||
private StringQualityAddFunction addFunction;
|
private final StringQualityAddFunction addFunction;
|
||||||
private StringQualityMergeFunction mergeFunction;
|
private final StringQualityMergeFunction mergeFunction;
|
||||||
|
|
||||||
public StringQualityAnalysisState(StringMetaData stringMetaData) {
|
public StringQualityAnalysisState(StringMetaData stringMetaData) {
|
||||||
this.stringQuality = new StringQuality();
|
this.stringQuality = new StringQuality();
|
||||||
|
|
|
@ -31,8 +31,8 @@ public class TimeQualityAnalysisState implements QualityAnalysisState<TimeQualit
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
private TimeQuality timeQuality;
|
private TimeQuality timeQuality;
|
||||||
private TimeQualityAddFunction addFunction;
|
private final TimeQualityAddFunction addFunction;
|
||||||
private TimeQualityMergeFunction mergeFunction;
|
private final TimeQualityMergeFunction mergeFunction;
|
||||||
|
|
||||||
public TimeQualityAnalysisState(TimeMetaData timeMetaData) {
|
public TimeQualityAnalysisState(TimeMetaData timeMetaData) {
|
||||||
this.timeQuality = new TimeQuality();
|
this.timeQuality = new TimeQuality();
|
||||||
|
|
|
@ -46,12 +46,11 @@ public class SequenceLengthAnalysis implements Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
String sb = "SequenceLengthAnalysis(" + "totalNumSequences=" + totalNumSequences +
|
||||||
sb.append("SequenceLengthAnalysis(").append("totalNumSequences=").append(totalNumSequences)
|
",minSeqLength=" + minSeqLength + ",maxSeqLength=" + maxSeqLength +
|
||||||
.append(",minSeqLength=").append(minSeqLength).append(",maxSeqLength=").append(maxSeqLength)
|
",countZeroLength=" + countZeroLength + ",countOneLength=" +
|
||||||
.append(",countZeroLength=").append(countZeroLength).append(",countOneLength=")
|
countOneLength + ",meanLength=" + meanLength + ")";
|
||||||
.append(countOneLength).append(",meanLength=").append(meanLength).append(")");
|
return sb;
|
||||||
return sb.toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -92,7 +92,7 @@ public abstract class BaseColumnCondition implements ColumnCondition {
|
||||||
return false;
|
return false;
|
||||||
case NoSequenceMode:
|
case NoSequenceMode:
|
||||||
throw new IllegalStateException(
|
throw new IllegalStateException(
|
||||||
"Column condition " + toString() + " does not support sequence execution");
|
"Column condition " + this + " does not support sequence execution");
|
||||||
default:
|
default:
|
||||||
throw new RuntimeException("Unknown/not implemented sequence mode: " + sequenceMode);
|
throw new RuntimeException("Unknown/not implemented sequence mode: " + sequenceMode);
|
||||||
}
|
}
|
||||||
|
@ -116,7 +116,7 @@ public abstract class BaseColumnCondition implements ColumnCondition {
|
||||||
return false;
|
return false;
|
||||||
case NoSequenceMode:
|
case NoSequenceMode:
|
||||||
throw new IllegalStateException(
|
throw new IllegalStateException(
|
||||||
"Column condition " + toString() + " does not support sequence execution");
|
"Column condition " + this + " does not support sequence execution");
|
||||||
default:
|
default:
|
||||||
throw new RuntimeException("Unknown/not implemented sequence mode: " + sequenceMode);
|
throw new RuntimeException("Unknown/not implemented sequence mode: " + sequenceMode);
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,7 @@ public class DoubleColumnCondition extends BaseColumnCondition {
|
||||||
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
||||||
*
|
*
|
||||||
* @param columnName Column to check for the condition
|
* @param columnName Column to check for the condition
|
||||||
* @param op Operation (<, >=, !=, etc)
|
* @param op Operation {@code (<, >=, !=, etc)}
|
||||||
* @param value Value to use in the condition
|
* @param value Value to use in the condition
|
||||||
*/
|
*/
|
||||||
public DoubleColumnCondition(String columnName, ConditionOp op, double value) {
|
public DoubleColumnCondition(String columnName, ConditionOp op, double value) {
|
||||||
|
@ -54,7 +54,7 @@ public class DoubleColumnCondition extends BaseColumnCondition {
|
||||||
*
|
*
|
||||||
* @param column Column to check for the condition
|
* @param column Column to check for the condition
|
||||||
* @param sequenceConditionMode Mode for handling sequence data
|
* @param sequenceConditionMode Mode for handling sequence data
|
||||||
* @param op Operation (<, >=, !=, etc)
|
* @param op Operation {@code (<, >=, !=, etc)}
|
||||||
* @param value Value to use in the condition
|
* @param value Value to use in the condition
|
||||||
*/
|
*/
|
||||||
public DoubleColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op,
|
public DoubleColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op,
|
||||||
|
|
|
@ -42,7 +42,7 @@ public class FloatColumnCondition extends BaseColumnCondition {
|
||||||
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
||||||
*
|
*
|
||||||
* @param columnName Column to check for the condition
|
* @param columnName Column to check for the condition
|
||||||
* @param op Operation (<, >=, !=, etc)
|
* @param op Operation {@code (<, >=, !=, etc)}
|
||||||
* @param value Value to use in the condition
|
* @param value Value to use in the condition
|
||||||
*/
|
*/
|
||||||
public FloatColumnCondition(String columnName, ConditionOp op, float value) {
|
public FloatColumnCondition(String columnName, ConditionOp op, float value) {
|
||||||
|
@ -54,7 +54,7 @@ public class FloatColumnCondition extends BaseColumnCondition {
|
||||||
*
|
*
|
||||||
* @param column Column to check for the condition
|
* @param column Column to check for the condition
|
||||||
* @param sequenceConditionMode Mode for handling sequence data
|
* @param sequenceConditionMode Mode for handling sequence data
|
||||||
* @param op Operation (<, >=, !=, etc)
|
* @param op Operation {@code (<, >=, !=, etc)}
|
||||||
* @param value Value to use in the condition
|
* @param value Value to use in the condition
|
||||||
*/
|
*/
|
||||||
public FloatColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op,
|
public FloatColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op,
|
||||||
|
|
|
@ -42,7 +42,7 @@ public class IntegerColumnCondition extends BaseColumnCondition {
|
||||||
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
||||||
*
|
*
|
||||||
* @param columnName Column to check for the condition
|
* @param columnName Column to check for the condition
|
||||||
* @param op Operation (<, >=, !=, etc)
|
* @param op Operation {@code (<, >=, !=, etc)}
|
||||||
* @param value Value to use in the condition
|
* @param value Value to use in the condition
|
||||||
*/
|
*/
|
||||||
public IntegerColumnCondition(String columnName, ConditionOp op, int value) {
|
public IntegerColumnCondition(String columnName, ConditionOp op, int value) {
|
||||||
|
@ -54,7 +54,7 @@ public class IntegerColumnCondition extends BaseColumnCondition {
|
||||||
*
|
*
|
||||||
* @param column Column to check for the condition
|
* @param column Column to check for the condition
|
||||||
* @param sequenceConditionMode Mode for handling sequence data
|
* @param sequenceConditionMode Mode for handling sequence data
|
||||||
* @param op Operation (<, >=, !=, etc)
|
* @param op Operation {@code (<, >=, !=, etc)}
|
||||||
* @param value Value to use in the condition
|
* @param value Value to use in the condition
|
||||||
*/
|
*/
|
||||||
public IntegerColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op,
|
public IntegerColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op,
|
||||||
|
|
|
@ -42,7 +42,7 @@ public class LongColumnCondition extends BaseColumnCondition {
|
||||||
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
||||||
*
|
*
|
||||||
* @param columnName Column to check for the condition
|
* @param columnName Column to check for the condition
|
||||||
* @param op Operation (<, >=, !=, etc)
|
* @param op Operation {@code (<, >=, !=, etc)}
|
||||||
* @param value Value to use in the condition
|
* @param value Value to use in the condition
|
||||||
*/
|
*/
|
||||||
public LongColumnCondition(String columnName, ConditionOp op, long value) {
|
public LongColumnCondition(String columnName, ConditionOp op, long value) {
|
||||||
|
@ -54,7 +54,7 @@ public class LongColumnCondition extends BaseColumnCondition {
|
||||||
*
|
*
|
||||||
* @param column Column to check for the condition
|
* @param column Column to check for the condition
|
||||||
* @param sequenceConditionMode Mode for handling sequence data
|
* @param sequenceConditionMode Mode for handling sequence data
|
||||||
* @param op Operation (<, >=, !=, etc)
|
* @param op Operation {@code (<, >=, !=, etc)}
|
||||||
* @param value Value to use in the condition
|
* @param value Value to use in the condition
|
||||||
*/
|
*/
|
||||||
public LongColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op, long value) {
|
public LongColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op, long value) {
|
||||||
|
|
|
@ -42,7 +42,7 @@ public class TimeColumnCondition extends BaseColumnCondition {
|
||||||
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
* Uses default sequence condition mode, {@link BaseColumnCondition#DEFAULT_SEQUENCE_CONDITION_MODE}
|
||||||
*
|
*
|
||||||
* @param columnName Column to check for the condition
|
* @param columnName Column to check for the condition
|
||||||
* @param op Operation (<, >=, !=, etc)
|
* @param op Operation {@code (<, >=, !=, etc)}
|
||||||
* @param value Time value (in epoch millisecond format) to use in the condition
|
* @param value Time value (in epoch millisecond format) to use in the condition
|
||||||
*/
|
*/
|
||||||
public TimeColumnCondition(String columnName, ConditionOp op, long value) {
|
public TimeColumnCondition(String columnName, ConditionOp op, long value) {
|
||||||
|
@ -54,7 +54,7 @@ public class TimeColumnCondition extends BaseColumnCondition {
|
||||||
*
|
*
|
||||||
* @param column Column to check for the condition
|
* @param column Column to check for the condition
|
||||||
* @param sequenceConditionMode Mode for handling sequence data
|
* @param sequenceConditionMode Mode for handling sequence data
|
||||||
* @param op Operation (<, >=, !=, etc)
|
* @param op Operation {@code (<, >=, !=, etc)}
|
||||||
* @param value Time value (in epoch millisecond format) to use in the condition
|
* @param value Time value (in epoch millisecond format) to use in the condition
|
||||||
*/
|
*/
|
||||||
public TimeColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op, long value) {
|
public TimeColumnCondition(String column, SequenceConditionMode sequenceConditionMode, ConditionOp op, long value) {
|
||||||
|
|
|
@ -111,24 +111,18 @@ public class FilterInvalidValues implements Filter {
|
||||||
private boolean filterColumn(List<?> row, int i) {
|
private boolean filterColumn(List<?> row, int i) {
|
||||||
ColumnMetaData meta = schema.getMetaData(i);
|
ColumnMetaData meta = schema.getMetaData(i);
|
||||||
if (row.get(i) instanceof Float) {
|
if (row.get(i) instanceof Float) {
|
||||||
if (!meta.isValid(new FloatWritable((Float) row.get(i))))
|
return !meta.isValid(new FloatWritable((Float) row.get(i)));
|
||||||
return true;
|
|
||||||
} else if (row.get(i) instanceof Double) {
|
} else if (row.get(i) instanceof Double) {
|
||||||
if (!meta.isValid(new DoubleWritable((Double) row.get(i))))
|
return !meta.isValid(new DoubleWritable((Double) row.get(i)));
|
||||||
return true;
|
|
||||||
} else if (row.get(i) instanceof String) {
|
} else if (row.get(i) instanceof String) {
|
||||||
if (!meta.isValid(new Text(((String) row.get(i)).toString())))
|
return !meta.isValid(new Text(((String) row.get(i))));
|
||||||
return true;
|
|
||||||
} else if (row.get(i) instanceof Integer) {
|
} else if (row.get(i) instanceof Integer) {
|
||||||
if (!meta.isValid(new IntWritable((Integer) row.get(i))))
|
return !meta.isValid(new IntWritable((Integer) row.get(i)));
|
||||||
return true;
|
|
||||||
|
|
||||||
} else if (row.get(i) instanceof Long) {
|
} else if (row.get(i) instanceof Long) {
|
||||||
if (!meta.isValid(new LongWritable((Long) row.get(i))))
|
return !meta.isValid(new LongWritable((Long) row.get(i)));
|
||||||
return true;
|
|
||||||
} else if (row.get(i) instanceof Boolean) {
|
} else if (row.get(i) instanceof Boolean) {
|
||||||
if (!meta.isValid(new BooleanWritable((Boolean) row.get(i))))
|
return !meta.isValid(new BooleanWritable((Boolean) row.get(i)));
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -96,7 +96,7 @@ public class Join implements Serializable {
|
||||||
|
|
||||||
public static class Builder {
|
public static class Builder {
|
||||||
|
|
||||||
private JoinType joinType;
|
private final JoinType joinType;
|
||||||
private Schema leftSchema;
|
private Schema leftSchema;
|
||||||
private Schema rightSchema;
|
private Schema rightSchema;
|
||||||
private String[] joinColumnsLeft;
|
private String[] joinColumnsLeft;
|
||||||
|
|
|
@ -84,9 +84,8 @@ public class BinaryMetaData extends BaseColumnMetaData {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
String sb = "BinaryMetaData(name=\"" + name + "\"," +
|
||||||
sb.append("BinaryMetaData(name=\"").append(name).append("\",");
|
")";
|
||||||
sb.append(")");
|
return sb;
|
||||||
return sb.toString();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -84,9 +84,8 @@ public class BooleanMetaData extends BaseColumnMetaData {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
String sb = "BooleanMetaData(name=\"" + name + "\"," +
|
||||||
sb.append("BooleanMetaData(name=\"").append(name).append("\",");
|
")";
|
||||||
sb.append(")");
|
return sb;
|
||||||
return sb.toString();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -84,10 +84,7 @@ public class DoubleMetaData extends BaseColumnMetaData {
|
||||||
|
|
||||||
if (minAllowedValue != null && d < minAllowedValue)
|
if (minAllowedValue != null && d < minAllowedValue)
|
||||||
return false;
|
return false;
|
||||||
if (maxAllowedValue != null && d > maxAllowedValue)
|
return maxAllowedValue == null || !(d > maxAllowedValue);
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -115,10 +112,7 @@ public class DoubleMetaData extends BaseColumnMetaData {
|
||||||
|
|
||||||
if (minAllowedValue != null && d < minAllowedValue)
|
if (minAllowedValue != null && d < minAllowedValue)
|
||||||
return false;
|
return false;
|
||||||
if (maxAllowedValue != null && d > maxAllowedValue)
|
return maxAllowedValue == null || !(d > maxAllowedValue);
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -84,10 +84,7 @@ public class FloatMetaData extends BaseColumnMetaData {
|
||||||
|
|
||||||
if (minAllowedValue != null && d < minAllowedValue)
|
if (minAllowedValue != null && d < minAllowedValue)
|
||||||
return false;
|
return false;
|
||||||
if (maxAllowedValue != null && d > maxAllowedValue)
|
return maxAllowedValue == null || d <= maxAllowedValue;
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -115,10 +112,7 @@ public class FloatMetaData extends BaseColumnMetaData {
|
||||||
|
|
||||||
if (minAllowedValue != null && d < minAllowedValue)
|
if (minAllowedValue != null && d < minAllowedValue)
|
||||||
return false;
|
return false;
|
||||||
if (maxAllowedValue != null && d > maxAllowedValue)
|
return maxAllowedValue == null || d <= maxAllowedValue;
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -65,9 +65,7 @@ public class IntegerMetaData extends BaseColumnMetaData {
|
||||||
|
|
||||||
if (minAllowedValue != null && value < minAllowedValue)
|
if (minAllowedValue != null && value < minAllowedValue)
|
||||||
return false;
|
return false;
|
||||||
if (maxAllowedValue != null && value > maxAllowedValue)
|
return maxAllowedValue == null || value <= maxAllowedValue;
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -90,9 +88,7 @@ public class IntegerMetaData extends BaseColumnMetaData {
|
||||||
|
|
||||||
if (minAllowedValue != null && value < minAllowedValue)
|
if (minAllowedValue != null && value < minAllowedValue)
|
||||||
return false;
|
return false;
|
||||||
if (maxAllowedValue != null && value > maxAllowedValue)
|
return maxAllowedValue == null || value <= maxAllowedValue;
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -66,10 +66,7 @@ public class LongMetaData extends BaseColumnMetaData {
|
||||||
}
|
}
|
||||||
if (minAllowedValue != null && value < minAllowedValue)
|
if (minAllowedValue != null && value < minAllowedValue)
|
||||||
return false;
|
return false;
|
||||||
if (maxAllowedValue != null && value > maxAllowedValue)
|
return maxAllowedValue == null || value <= maxAllowedValue;
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -92,10 +89,7 @@ public class LongMetaData extends BaseColumnMetaData {
|
||||||
|
|
||||||
if (minAllowedValue != null && value < minAllowedValue)
|
if (minAllowedValue != null && value < minAllowedValue)
|
||||||
return false;
|
return false;
|
||||||
if (maxAllowedValue != null && value > maxAllowedValue)
|
return maxAllowedValue == null || value <= maxAllowedValue;
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -97,9 +97,9 @@ public class AggregatorImpls {
|
||||||
} else if (a instanceof Float || b instanceof Float) {
|
} else if (a instanceof Float || b instanceof Float) {
|
||||||
return new Float(a.floatValue() + b.floatValue());
|
return new Float(a.floatValue() + b.floatValue());
|
||||||
} else if (a instanceof Long || b instanceof Long) {
|
} else if (a instanceof Long || b instanceof Long) {
|
||||||
return new Long(a.longValue() + b.longValue());
|
return Long.valueOf(a.longValue() + b.longValue());
|
||||||
} else {
|
} else {
|
||||||
return new Integer(a.intValue() + b.intValue());
|
return Integer.valueOf(a.intValue() + b.intValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -146,9 +146,9 @@ public class AggregatorImpls {
|
||||||
} else if (a instanceof Float || b instanceof Float) {
|
} else if (a instanceof Float || b instanceof Float) {
|
||||||
return new Float(a.floatValue() * b.floatValue());
|
return new Float(a.floatValue() * b.floatValue());
|
||||||
} else if (a instanceof Long || b instanceof Long) {
|
} else if (a instanceof Long || b instanceof Long) {
|
||||||
return new Long(a.longValue() * b.longValue());
|
return Long.valueOf(a.longValue() * b.longValue());
|
||||||
} else {
|
} else {
|
||||||
return new Integer(a.intValue() * b.intValue());
|
return Integer.valueOf(a.intValue() * b.intValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -347,7 +347,7 @@ public class AggregatorImpls {
|
||||||
* of the square root of the arithmetic mean of squared differences to the mean, corrected with Bessel's correction.
|
* of the square root of the arithmetic mean of squared differences to the mean, corrected with Bessel's correction.
|
||||||
*
|
*
|
||||||
* See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a>
|
* See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a>
|
||||||
* This is computed with Welford's method for increased numerical stability & aggregability.
|
* This is computed with Welford's method for increased numerical stability & aggregability.
|
||||||
*/
|
*/
|
||||||
public static class AggregableStdDev<T extends Number> implements IAggregableReduceOp<T, Writable> {
|
public static class AggregableStdDev<T extends Number> implements IAggregableReduceOp<T, Writable> {
|
||||||
|
|
||||||
|
@ -402,7 +402,7 @@ public class AggregatorImpls {
|
||||||
* of the square root of the arithmetic mean of squared differences to the mean.
|
* of the square root of the arithmetic mean of squared differences to the mean.
|
||||||
*
|
*
|
||||||
* See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a>
|
* See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a>
|
||||||
* This is computed with Welford's method for increased numerical stability & aggregability.
|
* This is computed with Welford's method for increased numerical stability & aggregability.
|
||||||
*/
|
*/
|
||||||
public static class AggregableUncorrectedStdDev<T extends Number> extends AggregableStdDev<T> {
|
public static class AggregableUncorrectedStdDev<T extends Number> extends AggregableStdDev<T> {
|
||||||
|
|
||||||
|
@ -418,7 +418,7 @@ public class AggregatorImpls {
|
||||||
* of the arithmetic mean of squared differences to the mean, corrected with Bessel's correction.
|
* of the arithmetic mean of squared differences to the mean, corrected with Bessel's correction.
|
||||||
*
|
*
|
||||||
* See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a>
|
* See <a href="https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation">https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation</a>
|
||||||
* This is computed with Welford's method for increased numerical stability & aggregability.
|
* This is computed with Welford's method for increased numerical stability & aggregability.
|
||||||
*/
|
*/
|
||||||
public static class AggregableVariance<T extends Number> implements IAggregableReduceOp<T, Writable> {
|
public static class AggregableVariance<T extends Number> implements IAggregableReduceOp<T, Writable> {
|
||||||
|
|
||||||
|
@ -474,7 +474,7 @@ public class AggregatorImpls {
|
||||||
* of the arithmetic mean of squared differences to the mean.
|
* of the arithmetic mean of squared differences to the mean.
|
||||||
*
|
*
|
||||||
* See <a href="https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance">https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance</a>
|
* See <a href="https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance">https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance</a>
|
||||||
* This is computed with Welford's method for increased numerical stability & aggregability.
|
* This is computed with Welford's method for increased numerical stability & aggregability.
|
||||||
*/
|
*/
|
||||||
public static class AggregablePopulationVariance<T extends Number> extends AggregableVariance<T> {
|
public static class AggregablePopulationVariance<T extends Number> extends AggregableVariance<T> {
|
||||||
|
|
||||||
|
@ -491,7 +491,7 @@ public class AggregatorImpls {
|
||||||
* <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
|
* <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
|
||||||
*
|
*
|
||||||
* The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting
|
* The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting
|
||||||
* a nonzero `sp > p` in HyperLogLogPlus(p, sp) would trigger sparse
|
* a nonzero `sp > p` in HyperLogLogPlus(p, sp) would trigger sparse
|
||||||
* representation of registers, which may reduce the memory consumption
|
* representation of registers, which may reduce the memory consumption
|
||||||
* and increase accuracy when the cardinality is small.
|
* and increase accuracy when the cardinality is small.
|
||||||
* @param <T>
|
* @param <T>
|
||||||
|
@ -501,7 +501,7 @@ public class AggregatorImpls {
|
||||||
|
|
||||||
private float p = 0.05f;
|
private float p = 0.05f;
|
||||||
@Getter
|
@Getter
|
||||||
private HyperLogLogPlus hll = new HyperLogLogPlus((int) Math.ceil(2.0 * Math.log(1.054 / p) / Math.log(2)), 0);
|
private final HyperLogLogPlus hll = new HyperLogLogPlus((int) Math.ceil(2.0 * Math.log(1.054 / p) / Math.log(2)), 0);
|
||||||
|
|
||||||
public AggregableCountUnique(float precision) {
|
public AggregableCountUnique(float precision) {
|
||||||
this.p = precision;
|
this.p = precision;
|
||||||
|
|
|
@ -36,7 +36,7 @@ public class DispatchWithConditionOp<U> extends DispatchOp<Writable, U>
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
@NonNull
|
@NonNull
|
||||||
private List<Condition> conditions;
|
private final List<Condition> conditions;
|
||||||
|
|
||||||
|
|
||||||
public DispatchWithConditionOp(List<IAggregableReduceOp<Writable, List<U>>> ops, List<Condition> conds) {
|
public DispatchWithConditionOp(List<IAggregableReduceOp<Writable, List<U>>> ops, List<Condition> conds) {
|
||||||
|
|
|
@ -37,14 +37,13 @@ public interface AggregableColumnReduction extends Serializable, ColumnOp {
|
||||||
* and NOT the single row
|
* and NOT the single row
|
||||||
* (as is usually the case for {@code List<Writable>} instances
|
* (as is usually the case for {@code List<Writable>} instances
|
||||||
*
|
*
|
||||||
* @param columnData The Writable objects for a column
|
|
||||||
* @return Writable containing the reduced data
|
* @return Writable containing the reduced data
|
||||||
*/
|
*/
|
||||||
IAggregableReduceOp<Writable, List<Writable>> reduceOp();
|
IAggregableReduceOp<Writable, List<Writable>> reduceOp();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Post-reduce: what is the name of the column?
|
* Post-reduce: what is the name of the column?
|
||||||
* For example, "myColumn" -> "mean(myColumn)"
|
* For example, "myColumn" -> "mean(myColumn)"
|
||||||
*
|
*
|
||||||
* @param columnInputName Name of the column before reduction
|
* @param columnInputName Name of the column before reduction
|
||||||
* @return Name of the column after the reduction
|
* @return Name of the column after the reduction
|
||||||
|
|
|
@ -43,7 +43,7 @@ public interface ColumnReduction extends Serializable, ColumnOp {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Post-reduce: what is the name of the column?
|
* Post-reduce: what is the name of the column?
|
||||||
* For example, "myColumn" -> "mean(myColumn)"
|
* For example, "myColumn" -> "mean(myColumn)"
|
||||||
*
|
*
|
||||||
* @param columnInputName Name of the column before reduction
|
* @param columnInputName Name of the column before reduction
|
||||||
* @return Name of the column after the reduction
|
* @return Name of the column after the reduction
|
||||||
|
|
|
@ -291,11 +291,11 @@ public class Reducer implements IAssociativeReducer {
|
||||||
|
|
||||||
public static class Builder {
|
public static class Builder {
|
||||||
|
|
||||||
private ReduceOp defaultOp;
|
private final ReduceOp defaultOp;
|
||||||
private Map<String, List<ReduceOp>> opMap = new HashMap<>();
|
private final Map<String, List<ReduceOp>> opMap = new HashMap<>();
|
||||||
private Map<String, AggregableColumnReduction> customReductions = new HashMap<>();
|
private final Map<String, AggregableColumnReduction> customReductions = new HashMap<>();
|
||||||
private Map<String, ConditionalReduction> conditionalReductions = new HashMap<>();
|
private final Map<String, ConditionalReduction> conditionalReductions = new HashMap<>();
|
||||||
private Set<String> ignoreInvalidInColumns = new HashSet<>();
|
private final Set<String> ignoreInvalidInColumns = new HashSet<>();
|
||||||
private String[] keyColumns;
|
private String[] keyColumns;
|
||||||
|
|
||||||
|
|
||||||
|
@ -480,7 +480,6 @@ public class Reducer implements IAssociativeReducer {
|
||||||
* ignored/excluded.
|
* ignored/excluded.
|
||||||
*
|
*
|
||||||
* @param column Name of the column to execute the conditional reduction on
|
* @param column Name of the column to execute the conditional reduction on
|
||||||
* @param outputName Name of the column, after the reduction has been executed
|
|
||||||
* @param reductions Reductions to execute
|
* @param reductions Reductions to execute
|
||||||
* @param condition Condition to use in the reductions
|
* @param condition Condition to use in the reductions
|
||||||
*/
|
*/
|
||||||
|
@ -500,7 +499,6 @@ public class Reducer implements IAssociativeReducer {
|
||||||
*
|
*
|
||||||
* @param column Name of the column to execute the conditional reduction on
|
* @param column Name of the column to execute the conditional reduction on
|
||||||
* @param outputName Name of the column, after the reduction has been executed
|
* @param outputName Name of the column, after the reduction has been executed
|
||||||
* @param reductions Reductions to execute
|
|
||||||
* @param condition Condition to use in the reductions
|
* @param condition Condition to use in the reductions
|
||||||
*/
|
*/
|
||||||
public Builder conditionalReduction(String column, String outputName, ReduceOp reduction, Condition condition) {
|
public Builder conditionalReduction(String column, String outputName, ReduceOp reduction, Condition condition) {
|
||||||
|
|
|
@ -69,7 +69,7 @@ public class GeographicMidpointReduction implements AggregableColumnReduction {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<ColumnMetaData> getColumnOutputMetaData(List<String> newColumnName, ColumnMetaData columnInputMeta) {
|
public List<ColumnMetaData> getColumnOutputMetaData(List<String> newColumnName, ColumnMetaData columnInputMeta) {
|
||||||
return Collections.<ColumnMetaData>singletonList(new StringMetaData(newColumnName.get(0)));
|
return Collections.singletonList(new StringMetaData(newColumnName.get(0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -111,7 +111,7 @@ public class GeographicMidpointReduction implements AggregableColumnReduction {
|
||||||
public static class AverageCoordinateReduceOp implements IAggregableReduceOp<Writable, List<Writable>> {
|
public static class AverageCoordinateReduceOp implements IAggregableReduceOp<Writable, List<Writable>> {
|
||||||
private static final double PI_180 = Math.PI / 180.0;
|
private static final double PI_180 = Math.PI / 180.0;
|
||||||
|
|
||||||
private String delim;
|
private final String delim;
|
||||||
|
|
||||||
private double sumx;
|
private double sumx;
|
||||||
private double sumy;
|
private double sumy;
|
||||||
|
@ -186,7 +186,7 @@ public class GeographicMidpointReduction implements AggregableColumnReduction {
|
||||||
Preconditions.checkState(!Double.isNaN(longDeg), "Final longitude is NaN");
|
Preconditions.checkState(!Double.isNaN(longDeg), "Final longitude is NaN");
|
||||||
|
|
||||||
String str = latDeg + delim + longDeg;
|
String str = latDeg + delim + longDeg;
|
||||||
return Collections.<Writable>singletonList(new Text(str));
|
return Collections.singletonList(new Text(str));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@ import org.datavec.api.writable.Writable;
|
||||||
|
|
||||||
public class TypeConversion {
|
public class TypeConversion {
|
||||||
|
|
||||||
private static TypeConversion SINGLETON = new TypeConversion();
|
private static final TypeConversion SINGLETON = new TypeConversion();
|
||||||
|
|
||||||
private TypeConversion() {}
|
private TypeConversion() {}
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,7 @@ public class SplitMaxLengthSequence implements SequenceSplit {
|
||||||
/**
|
/**
|
||||||
* @param maxSequenceLength max length of sequences
|
* @param maxSequenceLength max length of sequences
|
||||||
* @param equalSplits if true: split larger sequences into equal sized subsequences. If false: split into
|
* @param equalSplits if true: split larger sequences into equal sized subsequences. If false: split into
|
||||||
* n maxSequenceLength sequences, and (if necessary) 1 with 1 <= length < maxSequenceLength
|
* n maxSequenceLength sequences, and (if necessary) 1 with 1 <= length < maxSequenceLength
|
||||||
*/
|
*/
|
||||||
public SplitMaxLengthSequence(@JsonProperty("maxSequenceLength") int maxSequenceLength,
|
public SplitMaxLengthSequence(@JsonProperty("maxSequenceLength") int maxSequenceLength,
|
||||||
@JsonProperty("equalSplits") boolean equalSplits) {
|
@JsonProperty("equalSplits") boolean equalSplits) {
|
||||||
|
|
|
@ -295,7 +295,7 @@ public abstract class BaseSerializer {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deserialize an IStringReducer List serialized using {@link #serializeReducerList(List)}, or
|
* Deserialize an IStringReducer List serialized using {@link #serializeReducerList(List)}, or
|
||||||
* an array serialized using {@link #serialize(IReducer[])}
|
* an array serialized using {@code #serialize(IReducer[])}
|
||||||
*
|
*
|
||||||
* @param str String representation (YAML/JSON) of the IStringReducer list
|
* @param str String representation (YAML/JSON) of the IStringReducer list
|
||||||
* @return {@code List<IStringReducer>}
|
* @return {@code List<IStringReducer>}
|
||||||
|
|
|
@ -34,8 +34,8 @@ import com.fasterxml.jackson.datatype.joda.JodaModule;
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class JsonMappers {
|
public class JsonMappers {
|
||||||
|
|
||||||
private static ObjectMapper jsonMapper;
|
private static final ObjectMapper jsonMapper;
|
||||||
private static ObjectMapper yamlMapper;
|
private static final ObjectMapper yamlMapper;
|
||||||
private static ObjectMapper legacyMapper; //For 1.0.0-alpha and earlier TransformProcess etc
|
private static ObjectMapper legacyMapper; //For 1.0.0-alpha and earlier TransformProcess etc
|
||||||
|
|
||||||
static {
|
static {
|
||||||
|
|
|
@ -24,7 +24,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
public class JsonSerializer extends BaseSerializer {
|
public class JsonSerializer extends BaseSerializer {
|
||||||
|
|
||||||
private ObjectMapper om;
|
private final ObjectMapper om;
|
||||||
|
|
||||||
public JsonSerializer() {
|
public JsonSerializer() {
|
||||||
this.om = JsonMappers.getMapper();
|
this.om = JsonMappers.getMapper();
|
||||||
|
|
|
@ -37,7 +37,7 @@ public class ListWrappers {
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
public static class TransformList {
|
public static class TransformList {
|
||||||
private List<Transform> list;
|
private final List<Transform> list;
|
||||||
|
|
||||||
public TransformList(@JsonProperty("list") List<Transform> list) {
|
public TransformList(@JsonProperty("list") List<Transform> list) {
|
||||||
this.list = list;
|
this.list = list;
|
||||||
|
@ -46,7 +46,7 @@ public class ListWrappers {
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
public static class FilterList {
|
public static class FilterList {
|
||||||
private List<Filter> list;
|
private final List<Filter> list;
|
||||||
|
|
||||||
public FilterList(@JsonProperty("list") List<Filter> list) {
|
public FilterList(@JsonProperty("list") List<Filter> list) {
|
||||||
this.list = list;
|
this.list = list;
|
||||||
|
@ -55,7 +55,7 @@ public class ListWrappers {
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
public static class ConditionList {
|
public static class ConditionList {
|
||||||
private List<Condition> list;
|
private final List<Condition> list;
|
||||||
|
|
||||||
public ConditionList(@JsonProperty("list") List<Condition> list) {
|
public ConditionList(@JsonProperty("list") List<Condition> list) {
|
||||||
this.list = list;
|
this.list = list;
|
||||||
|
@ -64,7 +64,7 @@ public class ListWrappers {
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
public static class ReducerList {
|
public static class ReducerList {
|
||||||
private List<IAssociativeReducer> list;
|
private final List<IAssociativeReducer> list;
|
||||||
|
|
||||||
public ReducerList(@JsonProperty("list") List<IAssociativeReducer> list) {
|
public ReducerList(@JsonProperty("list") List<IAssociativeReducer> list) {
|
||||||
this.list = list;
|
this.list = list;
|
||||||
|
@ -73,7 +73,7 @@ public class ListWrappers {
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
public static class SequenceComparatorList {
|
public static class SequenceComparatorList {
|
||||||
private List<SequenceComparator> list;
|
private final List<SequenceComparator> list;
|
||||||
|
|
||||||
public SequenceComparatorList(@JsonProperty("list") List<SequenceComparator> list) {
|
public SequenceComparatorList(@JsonProperty("list") List<SequenceComparator> list) {
|
||||||
this.list = list;
|
this.list = list;
|
||||||
|
@ -82,7 +82,7 @@ public class ListWrappers {
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
public static class DataActionList {
|
public static class DataActionList {
|
||||||
private List<DataAction> list;
|
private final List<DataAction> list;
|
||||||
|
|
||||||
public DataActionList(@JsonProperty("list") List<DataAction> list) {
|
public DataActionList(@JsonProperty("list") List<DataAction> list) {
|
||||||
this.list = list;
|
this.list = list;
|
||||||
|
|
|
@ -24,7 +24,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
public class YamlSerializer extends BaseSerializer {
|
public class YamlSerializer extends BaseSerializer {
|
||||||
|
|
||||||
private ObjectMapper om;
|
private final ObjectMapper om;
|
||||||
|
|
||||||
public YamlSerializer() {
|
public YamlSerializer() {
|
||||||
this.om = JsonMappers.getMapperYaml();
|
this.om = JsonMappers.getMapperYaml();
|
||||||
|
|
|
@ -177,10 +177,10 @@ public class StringReducer implements IStringReducer {
|
||||||
|
|
||||||
public static class Builder {
|
public static class Builder {
|
||||||
|
|
||||||
private StringReduceOp defaultOp;
|
private final StringReduceOp defaultOp;
|
||||||
private Map<String, StringReduceOp> opMap = new HashMap<>();
|
private final Map<String, StringReduceOp> opMap = new HashMap<>();
|
||||||
private Map<String, ColumnReduction> customReductions = new HashMap<>();
|
private final Map<String, ColumnReduction> customReductions = new HashMap<>();
|
||||||
private Set<String> ignoreInvalidInColumns = new HashSet<>();
|
private final Set<String> ignoreInvalidInColumns = new HashSet<>();
|
||||||
private String outputColumnName;
|
private String outputColumnName;
|
||||||
private List<String> inputColumns;
|
private List<String> inputColumns;
|
||||||
|
|
||||||
|
|
|
@ -80,7 +80,7 @@ public abstract class BaseColumnTransform extends BaseTransform implements Colum
|
||||||
if (writables.size() != inputSchema.numColumns()) {
|
if (writables.size() != inputSchema.numColumns()) {
|
||||||
throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size()
|
throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size()
|
||||||
+ ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns()
|
+ ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns()
|
||||||
+ "). Transform = " + toString());
|
+ "). Transform = " + this);
|
||||||
}
|
}
|
||||||
int n = writables.size();
|
int n = writables.size();
|
||||||
List<Writable> out = new ArrayList<>(n);
|
List<Writable> out = new ArrayList<>(n);
|
||||||
|
|
|
@ -96,7 +96,7 @@ public class CategoricalToIntegerTransform extends BaseTransform {
|
||||||
if (writables.size() != inputSchema.numColumns()) {
|
if (writables.size() != inputSchema.numColumns()) {
|
||||||
throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size()
|
throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size()
|
||||||
+ ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns()
|
+ ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns()
|
||||||
+ "). Transform = " + toString());
|
+ "). Transform = " + this);
|
||||||
}
|
}
|
||||||
int idx = getColumnIdx();
|
int idx = getColumnIdx();
|
||||||
|
|
||||||
|
|
|
@ -123,7 +123,7 @@ public class CategoricalToOneHotTransform extends BaseTransform {
|
||||||
if (writables.size() != inputSchema.numColumns()) {
|
if (writables.size() != inputSchema.numColumns()) {
|
||||||
throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size()
|
throw new IllegalStateException("Cannot execute transform: input writables list length (" + writables.size()
|
||||||
+ ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns()
|
+ ") does not " + "match expected number of elements (schema: " + inputSchema.numColumns()
|
||||||
+ "). Transform = " + toString());
|
+ "). Transform = " + this);
|
||||||
}
|
}
|
||||||
int idx = getColumnIdx();
|
int idx = getColumnIdx();
|
||||||
|
|
||||||
|
|
|
@ -89,7 +89,7 @@ public class IntegerToCategoricalTransform extends BaseColumnTransform {
|
||||||
|
|
||||||
IntegerToCategoricalTransform o2 = (IntegerToCategoricalTransform) o;
|
IntegerToCategoricalTransform o2 = (IntegerToCategoricalTransform) o;
|
||||||
|
|
||||||
return map != null ? map.equals(o2.map) : o2.map == null;
|
return Objects.equals(map, o2.map);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue