一、MapReduce中数据类型介绍
1.MR中所有的数据类型都要实现Writable接口,以便于这些类型定义的数据可以被序列化进行网络传输和文件存储
2.MR基本数据类型
BooleanWritable :布尔型 ByteWritable DoubleWritable FloatWritable 以下是常用的数据类型: IntWritable LongWritable Text:使用UTF8格式存储我们的文本 NullWritable:当<key,value>中key或者value为空时使用
3.Writable <key,value> value数据对应的数据类型必须要实现Writable接口
write()是把每个对象序列化到输出流 readFields()是把输入流字节反序列化
4.WritableComparable - key排序,因为shuffle中排序依据是key,若定义的数据类型为Key,key数据对应的数据类型必须要实现必须实现writable和comparable接口,即WritableComparable接口。
5.重写toString() 、equals()、hashCode()
二、自定义数据类型
定义私有变量
setter,getter方法
无参有参构造器
set()方法,帮助构造器初始化数据(Hadoop偏爱)
hashCode()方法和equals()方法
toString()方法
implement Writable并实现write()方法readFilds()方法
implement WritableComparable并实现compareTo()方法
(1)自定义UserWritable实现Writable接口
自定义value对应的数据类型:
package com.kfk.hadoop.io; import org.apache.hadoop.io.Writable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; /** * @author : 蔡政洁 * @email :caizhengjie888@icloud.com * @date : 2020/10/11 * @time : 3:38 下午 */ public class UserWritable implements Writable { private int id; private String name; // 构造函数 public UserWritable(int id,String name) { this.set(id, name); } public void set(int id,String name){ this.name = name; this.id = id; } // 序列化 public void write(DataOutput dataOutput) throws IOException { dataOutput.writeInt(id); dataOutput.writeUTF(name); } // 反序列化 public void readFields(DataInput dataInput) throws IOException { this.id = dataInput.readInt(); this.name = dataInput.readUTF(); } public int getId() { return id; } public void setId(int id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } @Override public String toString() { return "UserWritable{" + "id=" + id + ", name='" + name + '\'' + '}'; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; UserWritable that = (UserWritable) o; if (id != that.id) return false; return name != null ? name.equals(that.name) : that.name == null; } @Override public int hashCode() { int result = id; result = 31 * result + (name != null ? name.hashCode() : 0); return result; } }
(2)自定义OrderWritable实现WritableComparable接口
自定义key对应的数据类型:
package com.kfk.hadoop.io; import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; /** * @author : 蔡政洁 * @email :caizhengjie888@icloud.com * @date : 2020/10/11 * @time : 3:55 下午 */ public class OrderWritable implements WritableComparable<OrderWritable> { private String orderId; private float price; // 比较 public int compareTo(OrderWritable o) { int compare = this.getOrderId().compareTo(o.getOrderId()); if (0 == compare){ compare = Float.valueOf(price).compareTo(Float.valueOf(o.getPrice())); } return compare; } // 构造函数 public OrderWritable(String orderId,float price) { this.set(orderId,price); } public void set(String orderId,float price){ this.orderId = orderId; this.price = price; } // 序列化 public void write(DataOutput dataOutput) throws IOException { dataOutput.writeUTF(orderId); dataOutput.writeFloat(price); } // 反序列化 public void readFields(DataInput dataInput) throws IOException { this.orderId = dataInput.readUTF(); this.price = dataInput.readInt(); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; OrderWritable that = (OrderWritable) o; if (Float.compare(that.price, price) != 0) return false; return orderId != null ? orderId.equals(that.orderId) : that.orderId == null; } @Override public int hashCode() { int result = orderId != null ? orderId.hashCode() : 0; result = 31 * result + (price != +0.0f ? Float.floatToIntBits(price) : 0); return result; } @Override public String toString() { return "OrderWritable{" + "orderId='" + orderId + '\'' + ", price=" + price + '}'; } public String getOrderId() { return orderId; } public void setOrderId(String orderId) { this.orderId = orderId; } public float getPrice() { return price; } public void setPrice(float price) { this.price = price; } }