protobuf实现Hbase数据压缩

发布于:2024-12-06 ⋅ 阅读:(92) ⋅ 点赞:(0)

前置

安装说明
使用说明
HBaseDDL和DML操作

HBase数据压缩

问题
在上文的datain中原文
每次写入数据会写入4个单元格的内容,现在希望能对其进行筛减,合并成1格,减少存储空间(序列化)
在这里插入图片描述
datain2
此处仅修改了插入方法之前的内容,将数据合并,对于插入方法没有做修改,其他内容仿照之前的即可。

package org.wunaiieq;

import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.wunaiieq.util.MyPhone;

import java.text.SimpleDateFormat;
import java.util.*;

public class datain2 {
    public static Connection connection = HBaseConnection.connection;
    public static Random random=new Random();
    public static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    public static String getPhone(String prefix) {
        return prefix + String.format("%08d", random.nextInt(99999999));
    }

    private static String getData(int year) {
        Calendar calendar = Calendar.getInstance();
        calendar.set(year, 0, 1);
        calendar.add(Calendar.MONTH, random.nextInt(12));
        calendar.add(Calendar.DAY_OF_MONTH, random.nextInt(31));
        calendar.add(Calendar.HOUR_OF_DAY, random.nextInt(12));
        Date time = calendar.getTime();
        return simpleDateFormat.format(time);
    }

    public static void main(String[] args) throws Exception {
        Table table = connection.getTable(TableName.valueOf("wunaiieq", "phone_log"));

        List<Put> putList =new ArrayList<Put>();
        //10个用户
        for (int i = 0; i < 10            ; i++) {
            String phonenumber = getPhone("158");
            for (int j = 0; j < 1000; j++) {
                putList.clear();
                String dnum = getPhone("199");
                int length = random.nextInt(200) + 1;
                int type = random.nextInt(2);
                String date = getData(2050);
                String rowkey = phonenumber + "_" + (Long.MAX_VALUE - simpleDateFormat.parse(date).getTime()) + i + j;
                Put put =new Put(Bytes.toBytes(rowkey));
                //构造器
                MyPhone.Phone.Builder builder = MyPhone.Phone.newBuilder();
                //数据写入
                builder.setDnum(dnum);
                builder.setLength(length);
                builder.setDate(date);
                builder.setType(type);
                //构造器创建phone对象
                MyPhone.Phone phone = builder.build();
                put.addColumn(Bytes.toBytes("basic"),Bytes.toBytes("info"),phone.toByteArray());
                putList.add(put);
            }
            table.put(putList);
        }
    }

}

其他内容
修改pom.xml——增加对protobuf的依赖
导入myPhone.java——protobuf生成的java文件
编写HbaseDML.java(参考专栏博客)

效果

原有大小
在这里插入图片描述
更新后
在这里插入图片描述

获取数据(反序列化)

数据存储格式已经修改,因此获取数据的方式需要改变
在这里插入图片描述
Using2.java

package org.wunaiieq;


import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.wunaiieq.util.MyPhone;

import java.io.IOException;
import java.text.SimpleDateFormat;

public class Using2 {

    public static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    public static Connection connection0 = HBaseConnection.connection;
    static Table table;

    static {
        try {
            table = connection0.getTable(TableName.valueOf("wunaiieq", "phone_log"));
            System.out.println(table);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public static void scanProtocBuf(String phoneNumber) throws Exception {
        String startRow = phoneNumber+"_"+(Long.MAX_VALUE-
                simpleDateFormat.parse("2099-04-01 00:00:00").getTime());
        String stopRow = phoneNumber+"_"+(Long.MAX_VALUE-
                simpleDateFormat.parse("2000-03-01 00:00:00").getTime());
        Scan scan = new Scan();
        scan.withStartRow(Bytes.toBytes(startRow));
        scan.withStopRow(Bytes.toBytes(stopRow),true);
        //执行查询
        ResultScanner resultScanner = table.getScanner(scan);
        //解析resultScanner
        for(Result result:resultScanner){
            Cell[] cells = result.rawCells();
            //获取值
            byte[] phoneInfoBytes = CellUtil.cloneValue(cells[0]);
            //将字节数据中的数据反序列化为MyPhone.Phone对象
            MyPhone.Phone phone = MyPhone.Phone.parseFrom(phoneInfoBytes);
            System.out.print(phone.getDnum()+"--");
            System.out.print(phone.getType()+"--");
            System.out.print(phone.getLength()+"--");
            System.out.println(phone.getDate()+"--");
        }
    }


    public static void main(String[] args) throws Exception {
        String phoneNumber0 = "15894163362";
        scanProtocBuf(phoneNumber0);
        String phoneNumber1 = "15898559729";
        scanProtocBuf(phoneNumber1);
        String phoneNumber2 = "15807236902";
        scanProtocBuf(phoneNumber2);
    }
}


网站公告

今日签到

点亮在社区的每一天
去签到