9. seatunnel-incubating-2.1.2安装部署

发布于:2022-07-28 ⋅ 阅读:(657) ⋅ 点赞:(0)

环境说明:

主机名:cmcc01为例 

操作系统:centos7

安装部署软件 版本 部署方式
centos 7
zookeeper zookeeper-3.4.10 伪分布式
hadoop hadoop-3.1.3 伪分布式
hive hive-3.1.3-bin 伪分布式
clickhouse 21.11.10.1-2 单节点多实例
dolphinscheduler 3.0.0 单节点
kettle pdi-ce-9.3.0.0 单节点
sqoop sqoop-1.4.7 单节点
seatunnel seatunnel-incubating-2.1.2 单节点
spark spark- 2.4.8-bin-hadoop2.7.tgz 单节点

1.下载 seatunnel、spark

seatunnel:https://seatunnel.incubator.apache.org/download

spark:https://archive.apache.org/dist/spark/

解压:

# 解压
tar zxf apache-seatunnel-incubating-2.1.2-bin.tar.gz -C /opt/software/
tar zxf spark-2.4.8-bin-hadoop2.7.tgz -C /opt/software/

2.配置环境变量

vim ~/.bash_profile
 
# spark
export SPARK_HOME=/opt/software/spark-2.4.8-bin-hadoop2.7
export PATH=$PATH:${SPARK_HOME}/bin

# seatunnel
export SEATUNNEL_HOME=/opt/software/apache-seatunnel-incubating-2.1.2
export PATH=$PATH:${SEATUNNEL_HOME}/bin
# 使用环境变量生效
source ~/.bash_profile

3.修改spark配置

cd /opt/software/spark-2.4.8-bin-hadoop2.7/conf

# 复制配置文件
cp spark-env.sh.template spark-env.sh

vim spark-env.sh

# 添加以下内容
export HADOOP_CONF_DIR=/opt/software/hadoop-3.1.3/etc/hadoop
export YARN_CONF_DIR=/opt/software/hadoop-3.1.3/etc/hadoop
export HADOOP_OPTS="-Djava.library.path=/opt/software/hadoop-3.1.3/lib/native"

# 修改hive配置文件
vim /opt/software/hive-3.1.3-bin/conf/hive-site.xml

添加元数据库配置
  <property>
      <name>hive.metastore.uris</name>
      <!-- 注意替换自己的主机名 -->
      <value>thrift://cmcc01:9083</value>
  </property>
# 创建hive配置文件软连接
ln -s /opt/software/hive-3.1.3-bin/conf/hive-site.xml /opt/software/spark-2.4.8-bin-hadoop2.7/conf

# 复制mysql 驱动包到spark的jar目录下
cp /opt/package/mysql-connector-java-8.0.20.jar /opt/software/spark-2.4.8-bin-hadoop2.7/jars
# 启动metastore
nohup hive --service metastore > ${HIVE_HOME}/logs/metastore.log 2>&1 &
# 添加启动命令到启动脚本
vim /opt/software/start_hiveserver2.sh
# 添加以下内容

#!bin/bash

# 启动hiveserver2
nohup ${HIVE_HOME}/bin/hiveserver2 > ${HIVE_HOME}/logs/hiveserver2.log 2>&1 &

# 启动metastore
nohup hive --service metastore > ${HIVE_HOME}/logs/metastore.log 2>&1 &

# beeline -u jdbc:hive2://cmcc01:10000/default -n root

4.测试seatunnel

  (1)创建测试文件

vim /opt/software/apache-seatunnel-incubating-2.1.2/config/hive-console.conf
# 添加以下内容
env {
        spark.app.name = "SeaTunnel"
        spark.executor.instances = 1
        spark.executor.cores = 1
        spark.num.executors=1
        spark.executor.memory = "1g"
        execution.parallelism = 1

}

source {
  hive {
    pre_sql = "select id, name,age from stg.student01"
    result_table_name = "student01_log"
  }

}

transform {

}

sink {
  Console{}
}

  (1)执行测试

准备hive数据集

CREATE TABLE `stg.student01`
(
    `id`   int,
    `name` string,
    `age`  int
)
    row format delimited fields terminated by ","
    STORED AS textfile;

INSERT INTO `stg`.`student01`  VALUES (1, '张三', 20),(2, '李四', 21),(3, '五王', 22);
start-seatunnel-spark.sh --master yarn --deploy-mode client --config /opt/software/apache-seatunnel-incubating-2.1.2/config/hive-console.conf

测试成功截图

 

本文含有隐藏内容,请 开通VIP 后查看

网站公告

今日签到

点亮在社区的每一天
去签到