@Public
@Stable
InputSplit represents the data to be processed by an individual Mapper.
Typically, it presents a byte-oriented view on the input and is the responsibility of RecordReader of the job to process this and present a record-oriented view.
---------------------
@InterfaceAudience.Public
@InterfaceStability.Stable
public abstract class InputSplit {
/**
* Get the size of the split, so that the input splits can be sorted by size.
* @return the number of bytes in the split
* @throws IOException
* @throws InterruptedException
*/
public abstract long getLength() throws IOException, InterruptedException;
/**
* Get the list of nodes by name where the data for the split would be local.
* The locations do not need to be serialized.
*
* @return a new array of the node nodes.
* @throws IOException
* @throws InterruptedException
*/
public abstract
String[] getLocations() throws IOException, InterruptedException;
/**
* Gets info about which nodes the input split is stored on and how it is
* stored at each location.
*
* @return list of <code>SplitLocationInfo</code>s describing how the split
* data is stored at each location. A null value indicates that all the
* locations have the data stored on disk.
* @throws IOException
*/
@Evolving
public SplitLocationInfo[] getLocationInfo() throws IOException {
return null;
}
---------------------------
/**
* A InputSplit that spans a set of rows
*/
@InterfaceStability.Evolving
public static class DBInputSplit extends InputSplit implements Writable {
private long end = 0;
private long start = 0;
/**
* Default Constructor
*/
public DBInputSplit() {
}
/**
* Convenience Constructor
* @param start the index of the first row to select
* @param end the index of the last row to select
*/
public DBInputSplit(long start, long end) {
this.start = start;
this.end = end;
}
/** {@inheritDoc} */
public String[] getLocations() throws IOException {
// TODO Add a layer to enable SQL "sharding" and support locality
return new String[] {};
}
/**
* @return The index of the first row to select
*/
public long getStart() {
return start;
}
/**
* @return The index of the last row to select
*/
public long getEnd() {
return end;
}
/**
* @return The total row count in this split
*/
public long getLength() throws IOException {
return end - start;
}
/** {@inheritDoc} */
public void readFields(DataInput input) throws IOException {
start = input.readLong();
end = input.readLong();
}
/** {@inheritDoc} */
public void write(DataOutput output) throws IOException {
output.writeLong(start);
output.writeLong(end);
}
}
---------
InputFormat牛逼(2)org.apache.hadoop.mapreduce.InputSplit & DBInputSplit
猜你喜欢
转载自niub.iteye.com/blog/2191204
今日推荐
周排行