TFIDF code

TFIDF is 3 pair of map-reduce program
TF&DF can be achieved from simple map-reduce tasks, here posted the third map reduce task to find out the TFIDF final deliverables.  


Custom Key

public class Key implements WritableComparable<Key>{

Text word;
IntWritable type;
public Key() {
word = new Text();
type = new IntWritable();
}


public Text getWord() {
return word;
}


public void setWord(Text word) {
this.word = word;
}


public IntWritable getType() {
return type;
}


public void setType(IntWritable type) {
this.type = type;
}


@Override
public void readFields(DataInput arg0) throws IOException {
// TODO Auto-generated method stub
this.word.readFields(arg0);
this.type.readFields(arg0);
}

@Override
public void write(DataOutput arg0) throws IOException {
// TODO Auto-generated method stub
this.word.write(arg0);
this.type.write(arg0);
}

@Override
public int compareTo(Key o) {
// TODO Auto-generated method stub
int cmp = this.word.compareTo(o.getWord());
if(cmp == 0){
cmp = this.type.compareTo(o.getType());
}
return cmp;
}

}

Custom Value

public class Value implements Writable{
 Text filename;
 IntWritable tf;
 IntWritable df;
 public Value() {
// TODO Auto-generated constructor stub
filename=new Text();
tf=new IntWritable();
df=new IntWritable();
 }
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.filename.readFields(in);
this.tf.readFields(in);
this.df.readFields(in);
}

@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
this.filename.write(out);
this.tf.write(out);
this.df.write(out);
}
public Text getFilename() {
return filename;
}
public void setFilename(Text filename) {
this.filename = filename;
}
public IntWritable getTf() {
return tf;
}
public void setTf(IntWritable tf) {
this.tf = tf;
}
public IntWritable getDf() {
return df;
}
public void setDf(IntWritable df) {
this.df = df;
}

}

Mapper

public class Mappe extends Mapper<LongWritable, Text, Key, Value>{
int type = 0;
Key k = new Key();
Value v = new Value();
@Override
protected void setup(Context context)throws IOException, InterruptedException {
Path fileName = ((FileSplit)context.getInputSplit()).getPath();
String file = fileName.getName();
if(file.equalsIgnoreCase("TF")){
type = 1;
}else{
type = 0;
}
}
@Override
protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
  String[] splits = value.toString().split("\\W+");
  if(type == 1){
     k.setWord(new Text(splits[0]));
k.setType(new IntWritable(type));
 
     v.setFilename(new Text(splits[1]));
     v.setTf(new IntWritable(Integer.parseInt(splits[2])));
}else{
     k.setWord(new Text(splits[0]));
k.setType(new IntWritable(type));
 
     v.setDf(new IntWritable(Integer.parseInt(splits[1])));
}
context.write(k, v);
}
}

Grouping Comparator 

public class GrpCMP extends WritableComparator{

protected GrpCMP() {
super(Key.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
// TODO Auto-generated method stub
Key p1= (Key) a;
Key p2= (Key) b;
int cmp = p1.getWord().compareTo(p2.getWord());
return cmp;
}
}

Reducer

public class Reduce extends Reducer<Key, Value, Text, Text> {
@Override
protected void reduce(Key k, Iterable<Value> v,Context context)throws IOException, InterruptedException {
Iterator<Value> itr = v.iterator();
Value val = itr.next();
int Df = val.getDf().get();
Double a = (double) (5/Df);
while(itr.hasNext()){
Value val1=itr.next();
String file=val1.getFilename().toString();
int Tf=val1.getTf().get();
Double logval = Math.log(a) * Tf;
context.write(new Text(k.getWord()+" \t\t:\t "+file),new Text("\t\t"+logval.toString()));
}
}
}

Comments

  1. I feel really happy to have seen your webpage and look forward to so many more entertaining
    times reading here. Thanks once more for all the details.
    Big Data training in Chennai

    ReplyDelete

Post a Comment

Popular posts from this blog

Hadoop 1 Vs Hadoop 2

Secondary NameNode check-pointing process

Failover and fencing