TFIDF code
TFIDF is 3 pair of map-reduce program
TF&DF can be achieved from simple map-reduce tasks, here posted the third map reduce task to find out the TFIDF final deliverables.
Custom Key
public class Key implements WritableComparable<Key>{
Text word;
IntWritable type;
public Key() {
word = new Text();
type = new IntWritable();
}
public Text getWord() {
return word;
}
public void setWord(Text word) {
this.word = word;
}
public IntWritable getType() {
return type;
}
public void setType(IntWritable type) {
this.type = type;
}
@Override
public void readFields(DataInput arg0) throws IOException {
// TODO Auto-generated method stub
this.word.readFields(arg0);
this.type.readFields(arg0);
}
@Override
public void write(DataOutput arg0) throws IOException {
// TODO Auto-generated method stub
this.word.write(arg0);
this.type.write(arg0);
}
@Override
public int compareTo(Key o) {
// TODO Auto-generated method stub
int cmp = this.word.compareTo(o.getWord());
if(cmp == 0){
cmp = this.type.compareTo(o.getType());
}
return cmp;
}
}
TF&DF can be achieved from simple map-reduce tasks, here posted the third map reduce task to find out the TFIDF final deliverables.
Custom Key
public class Key implements WritableComparable<Key>{
Text word;
IntWritable type;
public Key() {
word = new Text();
type = new IntWritable();
}
public Text getWord() {
return word;
}
public void setWord(Text word) {
this.word = word;
}
public IntWritable getType() {
return type;
}
public void setType(IntWritable type) {
this.type = type;
}
@Override
public void readFields(DataInput arg0) throws IOException {
// TODO Auto-generated method stub
this.word.readFields(arg0);
this.type.readFields(arg0);
}
@Override
public void write(DataOutput arg0) throws IOException {
// TODO Auto-generated method stub
this.word.write(arg0);
this.type.write(arg0);
}
@Override
public int compareTo(Key o) {
// TODO Auto-generated method stub
int cmp = this.word.compareTo(o.getWord());
if(cmp == 0){
cmp = this.type.compareTo(o.getType());
}
return cmp;
}
}
Custom Value
public class Value implements Writable{
Text filename;
IntWritable tf;
IntWritable df;
public Value() {
// TODO Auto-generated constructor stub
filename=new Text();
tf=new IntWritable();
df=new IntWritable();
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.filename.readFields(in);
this.tf.readFields(in);
this.df.readFields(in);
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
this.filename.write(out);
this.tf.write(out);
this.df.write(out);
}
public Text getFilename() {
return filename;
}
public void setFilename(Text filename) {
this.filename = filename;
}
public IntWritable getTf() {
return tf;
}
public void setTf(IntWritable tf) {
this.tf = tf;
}
public IntWritable getDf() {
return df;
}
public void setDf(IntWritable df) {
this.df = df;
}
}
Mapper
public class Mappe extends Mapper<LongWritable, Text, Key, Value>{
int type = 0;
Key k = new Key();
Value v = new Value();
@Override
protected void setup(Context context)throws IOException, InterruptedException {
Path fileName = ((FileSplit)context.getInputSplit()).getPath();
String file = fileName.getName();
if(file.equalsIgnoreCase("TF")){
type = 1;
}else{
type = 0;
}
}
@Override
protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
String[] splits = value.toString().split("\\W+");
if(type == 1){
k.setWord(new Text(splits[0]));
k.setType(new IntWritable(type));
v.setFilename(new Text(splits[1]));
v.setTf(new IntWritable(Integer.parseInt(splits[2])));
}else{
k.setWord(new Text(splits[0]));
k.setType(new IntWritable(type));
v.setDf(new IntWritable(Integer.parseInt(splits[1])));
}
context.write(k, v);
}
}
Grouping Comparator
public class GrpCMP extends WritableComparator{
protected GrpCMP() {
super(Key.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
// TODO Auto-generated method stub
Key p1= (Key) a;
Key p2= (Key) b;
int cmp = p1.getWord().compareTo(p2.getWord());
return cmp;
}
}
Reducer
public class Reduce extends Reducer<Key, Value, Text, Text> {
@Override
protected void reduce(Key k, Iterable<Value> v,Context context)throws IOException, InterruptedException {
Iterator<Value> itr = v.iterator();
Value val = itr.next();
int Df = val.getDf().get();
Double a = (double) (5/Df);
while(itr.hasNext()){
Value val1=itr.next();
String file=val1.getFilename().toString();
int Tf=val1.getTf().get();
Double logval = Math.log(a) * Tf;
context.write(new Text(k.getWord()+" \t\t:\t "+file),new Text("\t\t"+logval.toString()));
}
}
}
I feel really happy to have seen your webpage and look forward to so many more entertaining
ReplyDeletetimes reading here. Thanks once more for all the details.
Big Data training in Chennai