|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectorg.apache.hadoop.zebra.mapred.BasicTableOutputFormat
@Deprecated public class BasicTableOutputFormat
OutputFormat class for creating a
BasicTable.
Usage Example:
In the main program, add the following code.
jobConf.setOutputFormat(BasicTableOutputFormat.class);
Path outPath = new Path("path/to/the/BasicTable");
BasicTableOutputFormat.setOutputPath(jobConf, outPath);
BasicTableOutputFormat.setSchema(jobConf, "Name, Age, Salary, BonusPct");
The above code does the following things:
String multiLocs = "commaSeparatedPaths"
jobConf.setOutputFormat(BasicTableOutputFormat.class);
BasicTableOutputFormat.setMultipleOutputPaths(jobConf, multiLocs);
jobConf.setOutputFormat(BasicTableOutputFormat.class);
BasicTableOutputFormat.setSchema(jobConf, "Name, Age, Salary, BonusPct");
BasicTableOutputFormat.setZebraOutputPartitionClass(
jobConf, MultipleOutputsTest.OutputPartitionerClass.class);
The user ZebraOutputPartitionClass should like this
static class OutputPartitionerClass implements ZebraOutputPartition {
@Override
public int getOutputPartition(BytesWritable key, Tuple value) {
return someIndexInOutputParitionlist0;
}
The user Reducer code (or similarly Mapper code if it is a Map-only job)
should look like the following:
static class MyReduceClass implements Reducer<K, V, BytesWritable, Tuple> {
// keep the tuple object for reuse.
Tuple outRow;
// indices of various fields in the output Tuple.
int idxName, idxAge, idxSalary, idxBonusPct;
@Override
public void configure(JobConf job) {
Schema outSchema = BasicTableOutputFormat.getSchema(job);
// create a tuple that conforms to the output schema.
outRow = TypesUtils.createTuple(outSchema);
// determine the field indices.
idxName = outSchema.getColumnIndex("Name");
idxAge = outSchema.getColumnIndex("Age");
idxSalary = outSchema.getColumnIndex("Salary");
idxBonusPct = outSchema.getColumnIndex("BonusPct");
}
@Override
public void reduce(K key, Iterator<V> values,
OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
throws IOException {
String name;
int age;
int salary;
double bonusPct;
// ... Determine the value of the individual fields of the row to be inserted.
try {
outTuple.set(idxName, name);
outTuple.set(idxAge, new Integer(age));
outTuple.set(idxSalary, new Integer(salary));
outTuple.set(idxBonusPct, new Double(bonusPct));
output.collect(new BytesWritable(name.getBytes()), outTuple);
}
catch (ExecException e) {
// should never happen
}
}
@Override
public void close() throws IOException {
// no-op
}
}
| Constructor Summary | |
|---|---|
BasicTableOutputFormat()
Deprecated. |
|
| Method Summary | |
|---|---|
void |
checkOutputSpecs(org.apache.hadoop.fs.FileSystem ignored,
org.apache.hadoop.mapred.JobConf conf)
Deprecated. Note: we perform the Initialization of the table here. |
static void |
close(org.apache.hadoop.mapred.JobConf conf)
Deprecated. Close the output BasicTable, No more rows can be added into the table. |
static org.apache.hadoop.fs.Path |
getOutputPath(org.apache.hadoop.mapred.JobConf conf)
Deprecated. Get the output path of the BasicTable from JobConf |
static org.apache.hadoop.fs.Path[] |
getOutputPaths(org.apache.hadoop.mapred.JobConf conf)
Deprecated. Set the multiple output paths of the BasicTable in JobConf |
org.apache.hadoop.mapred.RecordWriter<org.apache.hadoop.io.BytesWritable,Tuple> |
getRecordWriter(org.apache.hadoop.fs.FileSystem ignored,
org.apache.hadoop.mapred.JobConf conf,
String name,
org.apache.hadoop.util.Progressable progress)
Deprecated. |
static Schema |
getSchema(org.apache.hadoop.mapred.JobConf conf)
Deprecated. Get the table schema in JobConf. |
static SortInfo |
getSortInfo(org.apache.hadoop.mapred.JobConf conf)
Deprecated. Get the SortInfo object |
static org.apache.hadoop.io.BytesWritable |
getSortKey(Object builder,
Tuple t)
Deprecated. Generates a BytesWritable key for the input key using keygenerate provided. |
static Object |
getSortKeyGenerator(org.apache.hadoop.mapred.JobConf conf)
Deprecated. Generates a zebra specific sort key generator which is used to generate BytesWritable key Sort Key(s) are used to generate this object |
static String |
getStorageHint(org.apache.hadoop.mapred.JobConf conf)
Deprecated. Get the table storage hint in JobConf. |
static Class<? extends ZebraOutputPartition> |
getZebraOutputPartitionClass(org.apache.hadoop.mapred.JobConf conf)
Deprecated. |
static void |
setMultipleOutputs(org.apache.hadoop.mapred.JobConf conf,
Class<? extends ZebraOutputPartition> theClass,
org.apache.hadoop.fs.Path... paths)
Deprecated. Set the multiple output paths of the BasicTable in JobConf |
static void |
setMultipleOutputs(org.apache.hadoop.mapred.JobConf conf,
String commaSeparatedLocations,
Class<? extends ZebraOutputPartition> theClass)
Deprecated. Set the multiple output paths of the BasicTable in JobConf |
static void |
setOutputPath(org.apache.hadoop.mapred.JobConf conf,
org.apache.hadoop.fs.Path path)
Deprecated. Set the output path of the BasicTable in JobConf |
static void |
setSchema(org.apache.hadoop.mapred.JobConf conf,
String schema)
Deprecated. Use setStorageInfo(JobConf, ZebraSchema, ZebraStorageHint, ZebraSortInfo) instead. |
static void |
setSortInfo(org.apache.hadoop.mapred.JobConf conf,
String sortColumns)
Deprecated. Use setStorageInfo(JobConf, ZebraSchema, ZebraStorageHint, ZebraSortInfo) instead. |
static void |
setSortInfo(org.apache.hadoop.mapred.JobConf conf,
String sortColumns,
Class<? extends org.apache.hadoop.io.RawComparator<Object>> comparatorClass)
Deprecated. Use setStorageInfo(JobConf, ZebraSchema, ZebraStorageHint, ZebraSortInfo) instead. |
static void |
setStorageHint(org.apache.hadoop.mapred.JobConf conf,
String storehint)
Deprecated. Use setStorageInfo(JobConf, ZebraSchema, ZebraStorageHint, ZebraSortInfo) instead. |
static void |
setStorageInfo(org.apache.hadoop.mapred.JobConf conf,
ZebraSchema zSchema,
ZebraStorageHint zStorageHint,
ZebraSortInfo zSortInfo)
Deprecated. Set the table storage info including ZebraSchema, |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
|---|
public BasicTableOutputFormat()
| Method Detail |
|---|
public static void setMultipleOutputs(org.apache.hadoop.mapred.JobConf conf,
String commaSeparatedLocations,
Class<? extends ZebraOutputPartition> theClass)
throws IOException
conf - The JobConf object.commaSeparatedLocations - The comma separated output paths to the tables.
The path must either not existent, or must be an empty directory.theClass - Zebra output partitoner class
IOException
public static void setMultipleOutputs(org.apache.hadoop.mapred.JobConf conf,
Class<? extends ZebraOutputPartition> theClass,
org.apache.hadoop.fs.Path... paths)
throws IOException
conf - The JobConf object.paths - The list of paths
The path must either not existent, or must be an empty directory.theClass - Zebra output partitioner class
IOException
public static org.apache.hadoop.fs.Path[] getOutputPaths(org.apache.hadoop.mapred.JobConf conf)
throws IOException
conf - The JobConf object.
IOException
public static Class<? extends ZebraOutputPartition> getZebraOutputPartitionClass(org.apache.hadoop.mapred.JobConf conf)
throws IOException
IOException
public static void setOutputPath(org.apache.hadoop.mapred.JobConf conf,
org.apache.hadoop.fs.Path path)
conf - The JobConf object.path - The output path to the table. The path must either not existent,
or must be an empty directory.public static org.apache.hadoop.fs.Path getOutputPath(org.apache.hadoop.mapred.JobConf conf)
conf - job conf
public static void setSchema(org.apache.hadoop.mapred.JobConf conf,
String schema)
setStorageInfo(JobConf, ZebraSchema, ZebraStorageHint, ZebraSortInfo) instead.
conf - The JobConf object.schema - The schema of the BasicTable to be created. For the initial
implementation, the schema string is simply a comma separated list
of column names, such as "Col1, Col2, Col3".
public static Schema getSchema(org.apache.hadoop.mapred.JobConf conf)
throws ParseException
conf - The JobConf object.
ParseException
public static Object getSortKeyGenerator(org.apache.hadoop.mapred.JobConf conf)
throws IOException,
ParseException
conf - The JobConf object.
IOException
ParseException
public static org.apache.hadoop.io.BytesWritable getSortKey(Object builder,
Tuple t)
throws Exception
builder - Opaque key generator created by getSortKeyGenerator() methodt - Tuple to create sort key from
Exception
public static void setStorageHint(org.apache.hadoop.mapred.JobConf conf,
String storehint)
throws ParseException,
IOException
setStorageInfo(JobConf, ZebraSchema, ZebraStorageHint, ZebraSortInfo) instead.
conf - The JobConf object.storehint - The storage hint of the BasicTable to be created. The format would
be like "[f1, f2.subfld]; [f3, f4]".
ParseException
IOExceptionpublic static String getStorageHint(org.apache.hadoop.mapred.JobConf conf)
conf - The JobConf object.
public static void setSortInfo(org.apache.hadoop.mapred.JobConf conf,
String sortColumns,
Class<? extends org.apache.hadoop.io.RawComparator<Object>> comparatorClass)
setStorageInfo(JobConf, ZebraSchema, ZebraStorageHint, ZebraSortInfo) instead.
conf - The JobConf object.sortColumns - Comma-separated sort column namescomparatorClass - comparator class name; null for default
public static void setSortInfo(org.apache.hadoop.mapred.JobConf conf,
String sortColumns)
setStorageInfo(JobConf, ZebraSchema, ZebraStorageHint, ZebraSortInfo) instead.
conf - The JobConf object.sortColumns - Comma-separated sort column names
public static void setStorageInfo(org.apache.hadoop.mapred.JobConf conf,
ZebraSchema zSchema,
ZebraStorageHint zStorageHint,
ZebraSortInfo zSortInfo)
throws ParseException,
IOException
conf - The JobConf object.zSchema - The ZebraSchema object containing schema information.zStorageHint - The ZebraStorageHint object containing storage hint information.zSortInfo - The ZebraSortInfo object containing sorting information.
ParseException
IOException
public static SortInfo getSortInfo(org.apache.hadoop.mapred.JobConf conf)
throws IOException
conf - The JobConf object.
IOException
public void checkOutputSpecs(org.apache.hadoop.fs.FileSystem ignored,
org.apache.hadoop.mapred.JobConf conf)
throws IOException
getRecordWriter(FileSystem, JobConf, String, Progressable)
checkOutputSpecs in interface org.apache.hadoop.mapred.OutputFormat<org.apache.hadoop.io.BytesWritable,Tuple>IOExceptionOutputFormat.checkOutputSpecs(FileSystem, JobConf)
public org.apache.hadoop.mapred.RecordWriter<org.apache.hadoop.io.BytesWritable,Tuple> getRecordWriter(org.apache.hadoop.fs.FileSystem ignored,
org.apache.hadoop.mapred.JobConf conf,
String name,
org.apache.hadoop.util.Progressable progress)
throws IOException
getRecordWriter in interface org.apache.hadoop.mapred.OutputFormat<org.apache.hadoop.io.BytesWritable,Tuple>IOExceptionOutputFormat.getRecordWriter(FileSystem, JobConf, String,
Progressable)
public static void close(org.apache.hadoop.mapred.JobConf conf)
throws IOException
conf - The JobConf object.
IOException
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||