Optimize LineRecordReader (#419)
* Move locations initialization for file input splits to initialize method * Little optimization for schema detection regular expression Signed-off-by: Paul Dubs <paul.dubs@gmail.com>
This commit is contained in:
		
							parent
							
								
									c9d1454743
								
							
						
					
					
						commit
						b9d5f1645b
					
				| @ -60,6 +60,13 @@ public class LineRecordReader extends BaseRecordReader { | ||||
|     @Override | ||||
|     public void initialize(InputSplit split) throws IOException, InterruptedException { | ||||
|         super.initialize(split); | ||||
|         if(!(inputSplit instanceof StringSplit || inputSplit instanceof InputStreamInputSplit)){ | ||||
|             final ArrayList<URI> uris = new ArrayList<>(); | ||||
|             final Iterator<URI> uriIterator = inputSplit.locationsIterator(); | ||||
|             while(uriIterator.hasNext()) uris.add(uriIterator.next()); | ||||
| 
 | ||||
|             this.locations = uris.toArray(new URI[0]); | ||||
|         } | ||||
|         this.iter = getIterator(0); | ||||
|         this.initialized = true; | ||||
|     } | ||||
| @ -68,7 +75,6 @@ public class LineRecordReader extends BaseRecordReader { | ||||
|     public void initialize(Configuration conf, InputSplit split) throws IOException, InterruptedException { | ||||
|         this.conf = conf; | ||||
|         initialize(split); | ||||
|         this.initialized = true; | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
| @ -207,11 +213,6 @@ public class LineRecordReader extends BaseRecordReader { | ||||
|                 } | ||||
|             } | ||||
|         } else { | ||||
|             final ArrayList<URI> uris = new ArrayList<>(); | ||||
|             final Iterator<URI> uriIterator = inputSplit.locationsIterator(); | ||||
|             while(uriIterator.hasNext()) uris.add(uriIterator.next()); | ||||
| 
 | ||||
|             this.locations = uris.toArray(new URI[uris.size()]); | ||||
|             if (locations.length > 0) { | ||||
|                 InputStream inputStream = streamCreatorFn.apply(locations[location]); | ||||
|                 try { | ||||
|  | ||||
| @ -23,6 +23,7 @@ import java.net.URI; | ||||
| import java.net.URISyntaxException; | ||||
| import java.util.Iterator; | ||||
| import java.util.NoSuchElementException; | ||||
| import java.util.regex.Pattern; | ||||
| 
 | ||||
| /** | ||||
|  * A simple utility method to convert a {@code Iterator<String>} to an {@code Iterator<URI>}, where each | ||||
| @ -32,6 +33,7 @@ import java.util.NoSuchElementException; | ||||
|  */ | ||||
| @AllArgsConstructor | ||||
| public class UriFromPathIterator implements Iterator<URI> { | ||||
|     final Pattern schemaPattern = Pattern.compile("^.*?:/.*"); | ||||
| 
 | ||||
|     private final Iterator<String> paths; | ||||
| 
 | ||||
| @ -42,16 +44,17 @@ public class UriFromPathIterator implements Iterator<URI> { | ||||
| 
 | ||||
|     @Override | ||||
|     public URI next() { | ||||
| 
 | ||||
|         if (!hasNext()) { | ||||
|             throw new NoSuchElementException("No next element"); | ||||
|         } | ||||
|         try { | ||||
|             String s = paths.next(); | ||||
|             if(!s.matches(".*:/.*")){ | ||||
|             if(schemaPattern.matcher(s).matches()){ | ||||
|                 return new URI(s); | ||||
|             } else { | ||||
|                 //No scheme - assume file for backward compatibility | ||||
|                 return new File(s).toURI(); | ||||
|             } else { | ||||
|                 return new URI(s); | ||||
|             } | ||||
| 
 | ||||
|         } catch (URISyntaxException e) { | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user