MapperReduce serialization job - sorting

1 data source: the last product about simple statistics of mobile traffic

 

 

2. Requirements: sort in reverse order according to the total flow value, and then get the output

3. General logic

(1) FlowSort class: for serialization and deserialization, the implementation of sorting logic interface

(2) FlowSortMapper class: encapsulating data

(3) FlowSortReducer class: exchange key and value, and then encapsulate and write them (only key (parameter) can be sorted, not value)

(4) FlowSortDriver class; for driver operation (with specific comments below the code), k and value will be swapped back when the output type is set

 

4. Code

(1)FlowSort

package flowsort;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class FlowSort implements WritableComparable<FlowSort> {
    //Up flow flow
    private long upFlow;
    //Lower flow flow
    private long downFlow;
    //sum flow
    private long sumFlow;
    //Space parameter structure
    public FlowSort() {
    }

    @Override
    public String toString() {
        return upFlow +"\t"
                 +downFlow +"\t"+
                sumFlow
                ;
    }

    //Serialization method
    public void write(DataOutput out) throws IOException {

       out.writeLong(upFlow);
        out.writeLong(downFlow);
        out.writeLong(sumFlow);
    }
    //Deserialization method
    public void readFields(DataInput in) throws IOException {
         this.upFlow=in.readLong();
         this.downFlow=in.readLong();
         this.sumFlow=in.readLong();
    }

    //Sorting logic
    public int compareTo(FlowSort o) {
        int result=0;
        if (sumFlow>o.getSumFlow()) {
            result = -1;
        }else if (sumFlow<o.getSumFlow()){
            result=1;

        }else{
            result=0;        }
        return result;
    }

    public long getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(long upFlow) {
        this.upFlow = upFlow;
    }

    public long getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(long downFlow) {
        this.downFlow = downFlow;
    }

    public long getSumFlow() {
        return sumFlow;
    }

    public void setSumFlow(long sumFlow) {
        this.sumFlow = sumFlow;
    }
}

  

(2)FlowSortMapper

package flowsort;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class FlowSortMapper extends Mapper<LongWritable, Text,FlowSort,Text> {

    Text k = new Text();
    FlowSort v = new FlowSort();
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        //Get a row of data
        String line = value.toString();
        //Cut string
        String[] filds = line.split("\t");
        //Encapsulate objects
        //Encapsulate key
        String phonenumber = filds[0];
        //Get upstream traffic
        long upflow=Long.parseLong(filds[1]);
        //Get downstream traffic
        long downflow=Long.parseLong(filds[2]);
        //Get total traffic
        long sumflow=Long.parseLong(filds[3]);



        k.set(phonenumber);
        v.setUpFlow(upflow);
        v.setDownFlow(downflow);
        v.setSumFlow(sumflow);

        //Write
        context.write(v,k);

    }
}

  

(3)FlowSortReducer

package flowsort;


import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowSortReducer extends Reducer<FlowSort, Text,Text,FlowSort> {
    @Override
    protected void reduce(FlowSort key, Iterable<Text> values, Context context) throws IOException, InterruptedException {


        for (Text phoneNumber : values) {
            context.write(phoneNumber,key);

        }
    }
}

  

(4)FlowSortdriver

package flowsort;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


import java.io.IOException;

public class FlowSortDrivers {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //0 encapsulates the I / O path
        args =new String[]{"C:/Users/input","C:/Users/output"};
        System.setProperty("hadoop.home.dir","E:/hadoop-2.7.2/");
        //Get job object
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        //Set jar load path
        job.setJarByClass(FlowSortDrivers.class);
        //Associate mapper and reducer
        job.setMapperClass(FlowSortMapper.class);
        job.setReducerClass(FlowSortReducer.class);
        //Set map final output type
        job.setMapOutputKeyClass(FlowSort.class);
        job.setMapOutputValueClass(Text.class);
        //Set final output type
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowSort.class);
        //Set input road force
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        //Submit job
        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);

    }
}

5. Results

6. My error: there is no problem with the path, but the job object is not completed, and the ouput has no content

(1) Misguided bag, it's really fatal

(2) Wrong function name.....

Oh, I hope I don't make these low-level mistakes in the future

2020-05-26

21:30:13

FlowSort

Tags: Java Hadoop Apache Mobile

Posted on Tue, 26 May 2020 10:10:14 -0400 by TwistedLogix