jdk
hadoop3.x需要jdk8以上的版本
hadoop3.x
从官网下载对应的tar.gz文件
配置环境变量
vim /etc/profile
# 需要替换为自己的安装地址!!!
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME=/opt/hadoop-3.3.6
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
source /etc/profile
hadoop env 文件配置
vim /opt/hadoop-3.3.6/etc/hadoop/hadoop-env.sh
# 需要更改为自己的安装地址
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64
export HADOOP=/opt/hadoop-3.3.6
# 用户也需要对应修改
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
设置服务器免密登陆
hostname # 输出应为 hcss-ecs-a5de(根据你的实际主机名)
sudo vim /etc/hosts
127.0.0.1 localhost hcss-ecs-a5de
your_server_ip hcss-ecs-a5de
ping hcss-ecs-a5de # 应能正常解析
ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
chmod 700 ~/.ssh
ssh localhost # 应无需密码直接登录
如果没有进行免密配置,最终启动会报错:
root@hcss-ecs-a5de:/opt/hadoop-3.3.6/sbin# ./start-all.sh
Starting namenodes on [bigdataflowing]
bigdataflowing: ssh: Could not resolve hostname bigdataflowing: Temporary failure in name resolution
Starting datanodes
localhost: root@localhost: Permission denied (publickey,password).
Starting secondary namenodes [hcss-ecs-a5de]
hcss-ecs-a5de: root@hcss-ecs-a5de: Permission denied (publickey,password).
Starting resourcemanager
Starting nodemanagers
localhost: root@localhost: Permission denied (publickey,password).
核心配置文件修改
注意:hcss-ecs-a5de 需要修改为你自己的服务器信息!!!
vim /opt/hadoop-3.3.6/etc/hadoop/core-site.xml
# 在 configuration 标签中,添加如下内容:
<property>
<name>fs.defaultFS</name>
<value>hdfs://hcss-ecs-a5de:9090</value>
</property>
<!-- 指定 hadoop 数据的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop3.3.6/hdfs/tmp</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
# 修改 hdfs-site.xml,在 configuration 标签中,添加如下内容:
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/hadoop3.3.6/hdfs/name</value>
<final>true</final>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/hadoop3.3.6/hdfs/data</value>
<final>true</final>
</property>
<property>
<name>dfs.http.address</name>
<value>0.0.0.0:50070</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
# 修改 mapre-site.xml,在 configuration 标签中,添加如下内容:
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
# 修改 yarn-site.xml,在 configuration 标签中,添加如下内容:
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
启动hadoop
hdfs namenode -format
cd /opt/hadoop-3.3.6/sbin/
./start-all.sh
正常不会有报错,同时使用 jps 命令查看,会有 Datanode,ResourceManager,SecondaryNameNode,NameNode,NodeManager 五个进程。