MapReduce之partition按照key值分区

本文链接： https://blog.csdn.net/qq262593421/article/details/102519901

1、设置Partitioner类和reducer个数

job.setPartitionerClass(MyPartitioner.class);
job.setNumReduceTasks(3);

2、编写Partitioner类

	/*
	 * 对mapper的结果进行分区，让多个reducer分别对多个partition文件并行处理
	 */
	private static class MyPartitioner extends Partitioner<AccounterWritable, NullWritable>{
		
		private static int index = -1;
		private Map<String, Integer> map = new HashMap<String, Integer>();
		/*
		 * return:返回值为该行数据存储在第几个分区
		 * numPartitions: reduce执行的个数
		 */
		@Override
		public int getPartition(AccounterWritable key, NullWritable value, int numPartitions) {

			String currenKey = key.getAccountName();
			// 判断key是否存在
			if(map.containsKey(currenKey)) {
				return map.get(currenKey);
			}else {
				map.put(currenKey, ++index);
				return index;
			}
		}
	}

3、序列化类

package com.gxwz.entity;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;
/**
 * @author com
 *
 */
public class AccounterWritable implements WritableComparable<AccounterWritable>{

	private String accountName = "";
	private double orderAmount = 0D;
	
	public String getAccountName() {
		return accountName;
	}

	public void setAccountName(String accountName) {
		this.accountName = accountName;
	}

	public double getOrderAmount() {
		return orderAmount;
	}

	public void setOrderAmount(double orderAmount) {
		this.orderAmount = orderAmount;
	}

	@Override
	public String toString() {
		return accountName + "\t" + orderAmount;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeUTF(this.accountName);
		out.writeDouble(this.orderAmount);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		this.accountName = in.readUTF();
		this.orderAmount = in.readDouble();
	}

	@Override
	public int compareTo(AccounterWritable o) {
		return this.getAccountName().compareTo(o.getAccountName());
	}
	
}

4、结果截图

MapReduce之partition按照key值分区

猜你喜欢