cavis/datavec/datavec-spark/src/main/java/org/datavec/spark/functions/LineRecordReaderFunction.java

45 lines
1.6 KiB
Java
Raw Normal View History

2019-06-06 15:21:15 +03:00
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.datavec.spark.functions;
import org.apache.spark.api.java.function.Function;
import org.datavec.api.records.reader.RecordReader;
import org.datavec.api.split.StringSplit;
import org.datavec.api.writable.Writable;
import java.util.List;
/**
* LineRecordReaderFunction: Used to map a {@code JavaRDD<String>} to a {@code JavaRDD<Collection<Writable>>}
* Note that this is most useful with LineRecordReader instances (CSVRecordReader, SVMLightRecordReader, etc)
*
* @author Alex Black
*/
public class LineRecordReaderFunction implements Function<String, List<Writable>> {
private final RecordReader recordReader;
public LineRecordReaderFunction(RecordReader recordReader) {
this.recordReader = recordReader;
}
@Override
public List<Writable> call(String s) throws Exception {
recordReader.initialize(new StringSplit(s));
return recordReader.next();
}
}