Optimize LineRecordReader (#419)
* Move locations initialization for file input splits to initialize method * Little optimization for schema detection regular expression Signed-off-by: Paul Dubs <paul.dubs@gmail.com>master
parent
c9d1454743
commit
b9d5f1645b
|
@ -60,6 +60,13 @@ public class LineRecordReader extends BaseRecordReader {
|
|||
@Override
|
||||
public void initialize(InputSplit split) throws IOException, InterruptedException {
|
||||
super.initialize(split);
|
||||
if(!(inputSplit instanceof StringSplit || inputSplit instanceof InputStreamInputSplit)){
|
||||
final ArrayList<URI> uris = new ArrayList<>();
|
||||
final Iterator<URI> uriIterator = inputSplit.locationsIterator();
|
||||
while(uriIterator.hasNext()) uris.add(uriIterator.next());
|
||||
|
||||
this.locations = uris.toArray(new URI[0]);
|
||||
}
|
||||
this.iter = getIterator(0);
|
||||
this.initialized = true;
|
||||
}
|
||||
|
@ -68,7 +75,6 @@ public class LineRecordReader extends BaseRecordReader {
|
|||
public void initialize(Configuration conf, InputSplit split) throws IOException, InterruptedException {
|
||||
this.conf = conf;
|
||||
initialize(split);
|
||||
this.initialized = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -207,11 +213,6 @@ public class LineRecordReader extends BaseRecordReader {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
final ArrayList<URI> uris = new ArrayList<>();
|
||||
final Iterator<URI> uriIterator = inputSplit.locationsIterator();
|
||||
while(uriIterator.hasNext()) uris.add(uriIterator.next());
|
||||
|
||||
this.locations = uris.toArray(new URI[uris.size()]);
|
||||
if (locations.length > 0) {
|
||||
InputStream inputStream = streamCreatorFn.apply(locations[location]);
|
||||
try {
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.net.URI;
|
|||
import java.net.URISyntaxException;
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* A simple utility method to convert a {@code Iterator<String>} to an {@code Iterator<URI>}, where each
|
||||
|
@ -32,6 +33,7 @@ import java.util.NoSuchElementException;
|
|||
*/
|
||||
@AllArgsConstructor
|
||||
public class UriFromPathIterator implements Iterator<URI> {
|
||||
final Pattern schemaPattern = Pattern.compile("^.*?:/.*");
|
||||
|
||||
private final Iterator<String> paths;
|
||||
|
||||
|
@ -42,16 +44,17 @@ public class UriFromPathIterator implements Iterator<URI> {
|
|||
|
||||
@Override
|
||||
public URI next() {
|
||||
|
||||
if (!hasNext()) {
|
||||
throw new NoSuchElementException("No next element");
|
||||
}
|
||||
try {
|
||||
String s = paths.next();
|
||||
if(!s.matches(".*:/.*")){
|
||||
if(schemaPattern.matcher(s).matches()){
|
||||
return new URI(s);
|
||||
} else {
|
||||
//No scheme - assume file for backward compatibility
|
||||
return new File(s).toURI();
|
||||
} else {
|
||||
return new URI(s);
|
||||
}
|
||||
|
||||
} catch (URISyntaxException e) {
|
||||
|
|
Loading…
Reference in New Issue