解析mysql的DDL语句生成高斯内表及表字段主键配置

发布于:2024-04-19 ⋅ 阅读:(32) ⋅ 点赞:(0)

mysql的DDL语句如下:

CREATE TABLE `gg_zr` (
  `id` bigint(20) NOT NULL COMMENT '责任信息表主键id',
  `zrdm` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '责任代码',
  `zrmc` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '责任名称',
  `delflag` char(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT '0' COMMENT '删除标志(0.存在;1.删除)',
  `version` int(11) DEFAULT '1' COMMENT '版本号',
  `createby` bigint(20) DEFAULT NULL COMMENT '创建人',
  `createtime` datetime DEFAULT NULL COMMENT '创建时间',
  `updateby` bigint(20) DEFAULT NULL COMMENT '修改人',
  `updatetime` datetime DEFAULT NULL COMMENT '修改时间',
  PRIMARY KEY (`id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC COMMENT='责任信息表';

解析oracle表生成高斯内表入口:Mysql2GaussCreateMana.scala

package com.tpiods.sqoop.mysql2gauss

import java.io.{File, FileInputStream, PrintWriter}
/**
 * 读取mysql建表语句,生成高斯内表
 * 读取oceanbase建表语句,生成高斯内表
 * 仅93行提取主键的逻辑,mysql与oceanbase有差异
 */
object Mysql2GaussCreateMana {

  def main(args: Array[String]): Unit = {
    // 指定输入sql语句的文件
    val input = "ods_etl/src/main/resources/work0409_test1/test1_tab.sql"
    val lines = scala.io.Source.fromInputStream(new FileInputStream(input)).getLines()

    val str = readSql(lines)
    val result = replaceSql(str)

    // 读上一步输出的文件
    val str2 = addTeacCols3(result)
    // 指定输出sql语句的文件(增加技术字段)
    val output2 = "ods_etl/src/main/resources/work0409_test1/test1_tab_mana.sql"
    val writer2 = new PrintWriter(new File(output2))
    writer2.write(str2)
    writer2.close()
  }

  /**
   *
   * @param lines 从文件读入的源系统建表语句
   * @return 返回转换后的建表语句
   */
  def readSql(lines: Iterator[String]): String = {
    var first = 0
    val sb = new StringBuilder
    var pkName: String = null
    for (line <- lines if line != null) {
        if(line.startsWith("CREATE")) {
          val tabName = line.split("\\s+")(2)
          first += 1
          if (sb.nonEmpty) {
            // 不按主键做hash,则注释掉下面这段代码
            sb.append(
              s"""
                |)
                |with (orientation=column, compression=low)
                |distribute by hash${pkName};""".stripMargin)

            sb.append(
                s"""
                |
                |set search_path = ods;
                |drop table if exists ${tabName};
                |create table if not exists ${tabName} (\t
                |""".stripMargin)
          }
          else {
            sb.append(
              s"""
                |
                |set search_path = tpods;
                |drop table if exists ${tabName};
                |create table if not exists ${tabName} (\t
                |""".stripMargin)
          }
        }
        if (line.startsWith("  `")) {
          val split = line.split("\\s+")
          if (first == 1) {
            val ln = (split(1) + "\t" + split(2)).toLowerCase
            if (ln.indexOf("char(") > 0) {
              val l1 = ln.split("\\(")(0)
              val l2 = ln.split("\\(")(1)
              val l3 = l2.substring(0, l2.length - 1)
              val l4 = Integer.valueOf(l3) * 3
              sb.append(l1).append("(").append(l4).append(")")
            }
            else sb.append(split(1)).append("\t").append(split(2))
          }
          else {
            val ln = (split(1) + "\t" + split(2)).toLowerCase
            if (ln.indexOf("char(") > 0) {
              val l1 = ln.split("\\(")(0)
              val l2 = ln.split("\\(")(1)
              val l3 = l2.substring(0, l2.length - 1)
              val l4 = Integer.valueOf(l3) * 3
              sb.append(",").append(l1).append("(").append(l4).append(")")
            }
            else sb.append(",").append(split(1)).append("\t").append(split(2))
          }
          sb.append("\n")
          first = 0
        }
        if (line.startsWith("  PRIMARY KEY")) {
          //取mysql 主键
          pkName = line.split("\\s+")(3)
        }
    }
    sb +
      s"""
         |)
         |WITH (orientation=column, compression=low)
         |distribute by hash${pkName};
         |""".stripMargin
    // 不按主键做hash,打开下面这段代码
//    sb.toString()
  }

  /**
   *
   * @param sql 读入转换后的sql
   * @return 对字段类型进行替换,返回最终的sql
   */
  def replaceSql(sql: String): String = {
    val res = sql.toLowerCase.replaceAll("`", "")
                  .replaceAll("\\b" + "datetime" + "\\b", "date")
                  .replaceAll("tinyint\\(\\d*\\)", "int")
                  .replaceAll("smallint\\(\\d*\\)", "int")
                  .replaceAll("int\\(\\d*\\)", "int")
                  .replaceAll("bigint\\(\\d*\\)", "bigint")
                  .replaceAll("float\\(\\d*,\\d*\\)", "float")
                  .replaceAll("double\\(\\d*,\\d*\\)", "float")
                  .replaceAll("timestamp\\(\\d*\\)", "timestamp")
                  .replaceAll("date\\(\\d*\\)", "date")
                  .replaceAll("\\b" + "longtext" + "\\b", "text")
                  .replaceAll("\\b" + "longblob" + "\\b", "varchar")
                  .replaceAll(",\n", "\n")
                  .replaceAll("mediumtext", "text")
    res + "\n"
  }

  /**
   * 按主键做hash
   * @param lines2 读入字段类型替换后的sql
   * @return 字段后面增加技术字段
   */
  def addTeacCols(lines2: String): String = {
    lines2.replaceAll("\n\\)\nwith",
      """,oper	varchar
        |,mtime	numeric(38,5)
        |,source_sys	varchar
        |,etl_time	varchar
        |,etl_date	varchar
        |)
        |with""".stripMargin)
  }

  /**
   * 不按主键做hash,调用这个方法
   * @param lines2 读入字段类型替换后的sql
   * @return 字段后面增加技术字段
   */
  def addTeacCols2(lines2: String): String = {
    lines2.replaceAll("\\);",
      """,oper	varchar
        |,mtime	numeric(38,5)
        |,source_sys	varchar
        |,etl_time	varchar
        |,etl_date	varchar
        |);
        |""".stripMargin)
  }

  /**
   * 按主键做hash
   * @param lines2 读入字段类型替换后的sql
   * @return 字段前面增加技术字段
   */
  def addTeacCols3(lines2: String): String = {
    lines2.replaceAll("\t\n",
      """
        | tabname varchar
        |,oper	varchar
        |,mtime	numeric(38,5)
        |,source_sys	varchar
        |,etl_time	varchar
        |,etl_date	varchar
        |,""".stripMargin)
  }

  /**
   * 不按主键做hash,调用这个方法
   * @param lines2 读入字段类型替换后的sql
   * @return 字段前面增加技术字段
   */
  def addTeacCols4(lines2: String): String = {
    lines2.replaceAll("\t\n",
      """
        | tabname varchar
        |,oper	varchar
        |,mtime	numeric(38,5)
        |,source_sys	varchar
        |,etl_time	varchar
        |,etl_date	varchar
        |,""".stripMargin)
  }

}

运行以上代码,输出如下:

set search_path = ods;
drop table if exists gg_zr;
create table if not exists gg_zr (
tabname varchar
,oper varchar
,mtime numeric(38,5)
,source_sys varchar
,etl_time varchar
,etl_date varchar
,id bigint
,zr_dm varchar(96)
,zr_mc varchar(192)
,del_flag char(3)
,version int
,create_by bigint
,create_time date
,update_by bigint
,update_time date

)
with (orientation=column, compression=low)
distribute by hash(id);

解析oracle表生成表名字段主键配置入口:MysqlGeneTabColsPk.scala

package com.tpiods.sqoop.mysql2gauss

import java.io.{File, FileInputStream, PrintWriter}
/**
 * 输入: 源系统建表语句
 * 输出: 表名|列名...|主键
 * 仅36行提取主键的逻辑,mysql与oceanbase有差异
 */
object MysqlGeneTabColsPk {
  def main(args: Array[String]): Unit = {
    // 指定输入sql语句的文件
    val input = "ods_etl/src/main/resources/work0409_test1/test1_tab.sql"
    val lines = scala.io.Source.fromInputStream(new FileInputStream(input)).getLines()
    val str = readSql(lines)

    // 指定输出sql语句的文件
    val output1 = "ods_etl/src/main/resources/work0409_test1/test1_tab_tabcolspk.txt"
    val writer = new PrintWriter(new File(output1))
    writer.write(str)
    writer.close()
  }

  def readSql(lines: Iterator[String]): String = {
    val sb = new StringBuilder
    for (tempString <- lines if tempString != null) {
      if (tempString.startsWith("CREATE")) {
        val tabName = tempString.split("TABLE `")(1)
        sb.append("\n").append(tabName).append("|")
      }
      if (tempString.startsWith("  `")) {
        val colName = tempString.split("\\s+")(1)
        sb.append(colName).append(",")
      }
      if (tempString.startsWith("  PRIMARY KEY")) {
//         var pkName = tempString.split("\\s+")(3)
        //取oceanbase 主键
        var pkName = tempString.replaceAll("  PRIMARY KEY ", "")
        if (pkName.endsWith(",")) {
          pkName = pkName.substring(0, pkName.length - 1).replaceAll(" ", "")
        }
        sb.append("|").append(pkName)
      }
    }
    sb.toString().toLowerCase()
      .replaceAll("` ", "")
      .replaceAll("`", "")
      .replaceAll("\\(", "")
      .replaceAll("\\)", "")
      .replaceAll(",\\|", "\\|")
      .replaceAll(",\n", "\n")
  }

}

运行以上代码,输出如下:

gg_zr|id,zr_dm,zr_mc,del_flag,version,create_by,create_time,update_by,update_time|id

网站公告

今日签到

点亮在社区的每一天
去签到