hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
        <property>
                        <name>dfs.replication</name>
                                        <value>2</value>
                                                        <final>true</final>
                                                </property>
        <property>
                        <name>dfs.namenode.name.dir</name>
                                        <value>/usr/local/hadoop/namenode</value>
                                                        <final>true</final>
                                                </property>
                                                <property>
                                                                <name>dfs.datanode.name.dir</name>
                                                                                <value>/usr/local/hadoop/datanode</value>
                                                                                                <final>true</final>
                                                                                        </property>
                                                                                        <property>
                                                                                                        <name>dfs.namenode.secondary.http-address</name>

<value>master:50090</value>

                </property>
</configuration>

core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
        <property>
                        <name>hadoop.tmp.dir</name>
                                        <value>file:/usr/local/hadoop/tmp</value>
                                                        <description>Abase for other temporary directories.</description>
                                                </property>
<property>
           <name>fs.defaultFS</name>
                      <value>hdfs://master:9000</value>
                               </property>

                               <property>
                                                       <name>ha.zookeeper.quorum</name>
                                                                               <value>master:2181,slave1:2181,slave2:2181</value>
                                                                                               </property>
</configuration>
~

hadoop-env.sh

export JAVA_HOME=/usr/local/jdk1.8.0_291
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root

mapred-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

        <property>
                        <name>mapreduce.framework.name</name>
                                        <value>yarn</value>
                                                    </property>
        <property>
                        <name>mapreduce.jobhistory.address</name>
                                        <value>master:10020</value>
                                </property>
                                <property>
                                                <name>mapreduce.jobhistory.webapp.address</name>
                                                                <value>master:19888</value>
                                                        </property>
</configuration>
~                                                                                                                       ~                                                                                                                       ~

yarn-site.xml

<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<!-- Site specific YARN configuration properties -->

        <property>
                  <name>yarn.resourcemanager.webapp.address</name>
                            <value>master:8088</value>
                    </property>
                    <property>
                              <name>yarn.resourcemanager.hostname</name>
                                        <value>master</value>
                                </property>
                                <property>
                                          <name>yarn.nodemanager.aux-services</name>
                                                    <value>mapreduce_shuffle</value>
                                            </property>
</configuration>
~                                                                                                                       ~                                                                                                                       ~                                                                                                                       ~                                                                                                                       ~                                                                                                                       ~

workers

master
slave1
slave2
~                                                                                                                       ~                                                                                                                       ~                                                                                                                       ~                                                                                                                       ~

在$HADOOP_HOME目录下创建datanode,namenode,tmp文件夹

HBASE

hbase-env.sh

export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP="true"

# Override text processing tools for use by these launch scripts.
# export GREP="${GREP-grep}"
# export SED="${SED-sed}"
export JAVA_HOME=/usr/local/jdk1.8.0_291
export HBASE_MANAGES_ZK=false

hbase-site.xml

* Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-->
<configuration>
  <!--
    The following properties are set for running HBase as a single process on a
    developer workstation. With this configuration, HBase is running in
    "stand-alone" mode and without a distributed file system. In this mode, and
    without further configuration, HBase and ZooKeeper data are stored on the
    local filesystem, in a path under the value configured for `hbase.tmp.dir`.
    This value is overridden from its default value of `/tmp` because many
    systems clean `/tmp` on a regular basis. Instead, it points to a path within
    this HBase installation directory.

    Running against the `LocalFileSystem`, as opposed to a distributed
    filesystem, runs the risk of data integrity issues and data loss. Normally
    HBase will refuse to run in such an environment. Setting
    `hbase.unsafe.stream.capability.enforce` to `false` overrides this behavior,
    permitting operation. This configuration is for the developer workstation
    only and __should not be used in production!__

    See also https://hbase.apache.org/book.html#standalone_dist
  -->
         <property>
                         <name>hbase.rootdir</name>
                                         <value>hdfs://master:9000/hbase</value>
                                         <description>nothing</description>                              </property>
  <property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
  </property>
  <property>
                  <name>hbase.zookeeper.quorum</name>
                                  <value>master:2181,slave1:2181,slave2:2181</value>
                                                          <description>nothing</description>
                                                                      </property>
  <property>
    <name>hbase.tmp.dir</name>
    <value>./tmp</value>
  </property>
  <property>
    <name>hbase.unsafe.stream.capability.enforce</name>
    <value>false</value>
  </property>
</configuration>

regionservers

master
slave1
slave2
~                                                                                                                       ~         :q

HIVE

hive-site.xml

?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
	<property>   
    	<name>javax.jdo.option.ConnectionURL</name>                                 <value>jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true&amp;useUnicode=true&amp;characterEncoding=UTF-8&amp;useSSL=false</value>
    </property>                                                                 <property>
		<name>javax.jdo.option.ConnectionDriverName</name>
		<value>com.mysql.jdbc.Driver</value>
	</property>
	<property>
		<name>javax.jdo.option.ConnectionUserName</name>
		<value>root</value>
	</property>
	<property>
		<name>javax.jdo.option.ConnectionPassword</name>
		<value>asdfqwer</value>
	</property>
	<property>
		<name>datanucleus.readOnlyDatastore</name>
		<value>false</value>
	</property>                                                                 <property>
		<name>datanucleus.fixedDatastore</name>
		<value>false</value>
	</property>
    <property>
		<name>datanucleus.autoCreateSchema</name>
		<value>true</value>
	</property>
	<property>
		<name>datanucleus.schema.autoCreateAll</name>
		<value>true</value>
	</property>
	<property>
		<name>datanucleus.autoCreateTables</name>
		<value>true</value>
	</property>
	<property>
		<name>datanucleus.autoCreateColumns</name>
		<value>true</value>
	</property>
	<property>
		<name>hive.metastore.local</name>
		<value>true</value>
	</property>
	<property>
		<name>hive.cli.print.header</name>
		<value>true</value>
	</property>
	<property>
		<name>hive.cli.print.current.db</name>
        <value>true</value>
    </property>                                                             </configuration>

创建warehouse文件夹，将template去掉，

1 2	mysql-connector-java-5.1.49-bin.jar ##来自下载 guava-27.0-jre.jar##来自hadoop

SPARK

spark-env.sh

export JAVA_HOME=/usr/local/jdk1.8.0_291
export SCALA_HOME=/usr/share/scala
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_MASTER_HOST=master
export SPARK_LOCAL_DIRS=/usr/local/spark

workers

master
slave1
slave2
~

1
2
3

start-spark-all.sh
stop-spark-all.sh
### 防止与Hadoop start-all.sh 冲突，改名

ZOOKEEPER

zoo.cfg

# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/usr/local/zookeeper/data
# the port at which the clients will connect
clientPort=2181
server.1=172.17.0.2:2888:3888
server.2=172.17.0.3:2888:3888
server.3=172.17.0.4:2888:3888

~/.bashrc

export JAVA_HOME=/usr/local/jdk1.8.0_291
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONFIG_HOME=$HADOOP_HOME/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export ZOOKEEPER_HOME=/usr/local/zookeeper
export PATH=$PATH:$ZOOKEEPER_HOME/bin
export SCALA_HOME=/usr/local/scala
export PATH=$PATH:$SCALA_HOME/bin
export SPARK_HOME=/usr/local/spark
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
export HBASE_HOME=/usr/local/hbase
export PATH=$PATH:$HBASE_HOME/bin
export HIVE_HOME=/usr/local/hive
export PATH=$PATH:$HIVE_HOME/bin

Hadoop#

step 1 拉取Ubuntu镜像#

1	docker pull ubuntu:latest

step 2 使用Dockerfile构建包含jdk的ubuntu镜像#

1	去jdk官网下载jdk包，此处下载的为jdk1.8 jdk-8u291-linux-x64.tar.gz, 将下载好的jdk文件移至wsl2环境下，在此目录下新建Dockerfile文件，并进入编辑状态

1	vim Dockerfile

在Dockfile中输入以下内容：

FROM ubuntu:latest
MAINTAINER duanmu
ADD jdk-8u291-linux-x64.tar.gz /usr/local/
ENV JAVA_HOME /usr/local/jdk1.8.0_291
ENV CLASSPATH $JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
ENV PATH $PATH:$JAVA_HOME/bin

编辑后保存，开始build镜像

1	docker build -t jdk-20210127 .

step 3 进入镜像#

新建一个以jdk-20210127为基础镜像的容器命名为ubuntu_hadoop并指定容器的hostname为charlie,并进入容器。

1 2	docker run -it --name=ubuntu_hadoop -h charlie jdk-20210127

step 4 升级apt-get#

1	apt-get update

step 5 安装vim#

1 2	apt-get install vim

step 6 更新apt-get镜像源#

1	vim /etc/apt/sources.list

将其中内容全部替换为

deb-src http://archive.ubuntu.com/ubuntu focal main restricted #Added by software-properties
deb http://mirrors.aliyun.com/ubuntu/ focal main restricted
deb-src http://mirrors.aliyun.com/ubuntu/ focal main restricted multiverse universe #Added by software-properties
deb http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted
deb-src http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted multiverse universe #Added by software-properties
deb http://mirrors.aliyun.com/ubuntu/ focal universe
deb http://mirrors.aliyun.com/ubuntu/ focal-updates universe
deb http://mirrors.aliyun.com/ubuntu/ focal multiverse
deb http://mirrors.aliyun.com/ubuntu/ focal-updates multiverse
deb http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse #Added by software-properties
deb http://archive.canonical.com/ubuntu focal partner
deb-src http://archive.canonical.com/ubuntu focal partner
deb http://mirrors.aliyun.com/ubuntu/ focal-security main restricted
deb-src http://mirrors.aliyun.com/ubuntu/ focal-security main restricted multiverse universe #Added by software-properties
deb http://mirrors.aliyun.com/ubuntu/ focal-security universe
deb http://mirrors.aliyun.com/ubuntu/ focal-security multiverse

step 7 重新升级apt-get#

1	apt-get update

step 8 安装wget#

1	apt-get update

step 9 通过wget下载Hadoop安装包#

wget https://mirrors.cnnic.cn/apache/hadoop/common/hadoop-3.2.2/hadoop-3.2.2.tar.gz

#wget https://mirrors.tuna.tsinghua.edu.cn/apache/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz

#wget https://mirrors.tuna.tsinghua.edu.cn/apache/zookeeper/zookeeper-3.6.3/apache-zookeeper-3.6.3-bin.tar.gz 

#wget https://mirrors.tuna.tsinghua.edu.cn/apache/hbase/stable/hbase-2.3.5-bin.tar.gz 

#wget https://mirrors.tuna.tsinghua.edu.cn/apache/spark/spark-3.1.2/spark-3.1.2-bin-hadoop3.2.tgz

#wget https://downloads.lightbend.com/scala/2.13.6/scala-2.13.6.tgz

#wget http://mirrors.ustc.edu.cn/mysql-ftp/Downloads/MySQL-5.7/mysql-server_5.7.31-1ubuntu18.04_amd64.deb-bundle.tar

#wget https://cdn.mysql.com/archives/mysql-connector-java-5.1/mysql-connector-java-5.1.49.tar.gz

step 10 解压hadoop#

1	tar -xvzf hadoop-3.2.2.tar.gz -C /usr/local

step 11 配置环境变量并重启配置文件#

1	vim ~/.bashrc

新增以下环境变量：

export JAVA_HOME=/usr/local/jdk1.8.0_291
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONFIG_HOME=$HADOOP_HOME/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin

并重启配置文件

1	source ~/.bashrc

step 12 创建文件夹并修改配置文件#

cd $HADOOP_HOME
mkdir tmp
mkdir namenode
mkdir datanode

修改配置文件

1 2	cd $HADOOP_CONFIG_HOME vim core-site.xml

将下面内容替换

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->
<configuration>
	<property>
		<name>hadoop.tmp.dir</name>
		<value>file:/usr/local/hadoop/tmp</value>
		<description>Abase for other temporary directories.</description>  		</property>
	<property>
		<name>fs.defaultFS</name>
		<value>hdfs://hdcluster</value>
	</property>
	<property>
		<name>ha.zookeeper.quorum</name>
		<value>master:2181,slave1:2181,slave2:2181</value>
	</property>
</configuration>

修改hdfs-site.xml

1	vim hdfs-site.xml

用下面配置替换

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
	<property>
        <name>dfs.replication</name>
        <value>2</value>
        <final>true</final>
	</property>
	<property>
        <name>dfs.namenode.name.dir</name>
        <value>/usr/local/hadoop/namenode</value>
        <final>true</final>
	</property>
	<property>
        <name>dfs.datanode.name.dir</name>
        <value>/usr/local/hadoop/datanode</value>
        <final>true</final>
	</property>
	<property>
		<name>dfs.namenode.secondary.http-address</name>
		<value>master:50090</value>
	</property>
</configuration>

接下来

1	vim mapred-site.xml

使用下面内容替换

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
	<property>
		<name>mapreduce.framework.name</name>
		<value>yarn</value>
	</property>
    <property>
		<name>mapreduce.jobhistory.address</name>
		<value>master:10020</value>
	</property>
	<property>
		<name>mapreduce.jobhistory.webapp.address</name>
		<value>master:19888</value>
	</property>
</configuration>

再是yarn-site.xml

1	vim yarn-site.xml

使用下面的内容替换

<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<!-- Site specific YARN configuration properties -->
	<property>
		<name>yarn.resourcemanager.webapp.address</name>
		<value>master:8088</value>
	</property>
	<property>
		<name>yarn.resourcemanager.hostname</name>
		<value>master</value>
	</property>
	<property>
		<name>yarn.nodemanager.aux-services</name>
		<value>mapreduce_shuffle</value>
	</property>
</configuration>

修改hadoop环境变量，在hadoop安装目录下，找到hadoop-env.sh文件

1	vim hadoop-env.sh

在最后添加

export JAVA_HOME=/usr/local/jdk1.8.0_291
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root

编辑安装目录下的workers文件

1	vim workers

内容改为

1
2
3

master
slave1
slave2

刷新及hdfs初始化#

1 2	chown -R root:root /usr/local/hadoop/

安装SSH#

hadoop的环境必须满足ssh免密登陆，先安装ssh

1 2	apt-get install net-tools apt-get install ssh

创建sshd目录

1	mkdir -p ~/var/run/sshd

生成访问密钥

cd ~/
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cd .ssh
cat id_rsa.pub >> authorized_keys

这一步骤提示安装路径与设置密码时全布直接按回车即可设置成免密。

修改SSH配置#

1	vim /etc/ssh/ssh_config

添加,将下面这句话直接添加即可，也可以在文件中找到被注释的这句话去修改。

1	StrictHostKeyChecking no #将ask改为no

1	vim /etc/ssh/sshd_config

在末尾添加：

#禁用密码验证
PasswordAuthentication no
#启用密钥验证
RSAAuthentication yes
PubkeyAuthentication yes
AuthorizedKeysFile .ssh/authorized_keys

最后使用下面语句测试是否免密登陆，

1	ssh localhost

当出现报错时，考虑输入：

1	/etc/init.d/ssh restart

用户组问题，chown -R root:root .ssh

权限问题

chmod g-w /home/your_user # 或　chmod 0755 /home/your_user
 
chmod 700 /home/your_user/.ssh
 
chmod 600 /home/your_user/.ssh/authorized_keys
chmod 600 .ssh/ida_*

将hadoop文件夹利用scp传输

1 2	scp -r /usr/local/hadoop/ slave1:/usr/local/ scp -r /usr/local/hadoop/ slave2:/usr/local/

在master机器上初始化

1	hdfs namenode -format #否则web端看不到

Zookeeper安装配置#

wget https://mirrors.tuna.tsinghua.edu.cn/apache/zookeeper/zookeeper-3.6.3/apache-zookeeper-3.6.3-bin.tar.gz
#下载完成后解压至/usr/local目录下
tar -zxvf apache-zookeeper-3.6.1-bin.tar.gz -C /usr/local/
cd /usr/local
# 重命名zookeeper
mv apache-zookeeper-3.6.1-bin zookeeper

设置环境变量

1	vim ~/.bashrc

添加

1 2	export ZOOKEEPER_HOME=/usr/local/zookeeper export PATH=$PATH:$ZOOKEEPER_HOME/bin

分发至其他机器并执行

1	source ~/.bashrc

配置zookeeper

进入conf目录

1	cd /usr/local/zookeeper/conf

将zoo_sample.cfg复制一份并命名为zoo.cfg

1	cp zoo_sample.cfg zoo.cfg

对zoo.cfg做如下修改

dataDir=/usr/local/zookeeper/data
server.1=master:2888:3888
server.2=slave1:2888:3888
server.3=slave2:2888:3888

分发至其他机器

创建data目录并新建一个myid 的文件，在每个机器中，文件内容对应server.后面的数字，master机器即为

1
2
3

vim /usr/local/zookeeper/data/myid

插入1

至此还只是集群搭建，但不是高可用！

Spark 安装配置#

spark#

解压spark文件

tar -xvf spark-3.1.2-bin-hadoop3.2.tgz -C /usr/local
# 然后重命名
cd /usr/local
mv spark-3.1.2-bin-hadoop3.2 spark

环境配置

1	vim ~/.bashrc

更改配置文件

1 2	cd /usr/local/spark/conf vi spark-env.sh

写入以下文件

export JAVA_HOME=/usr/local/jdk1.8.0_291
export SCALA_HOME=/usr/share/scala
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_MASTER_HOST=master
export SPARK_LOCAL_DIRS=/usr/local/spark

再同一目录下

1	vim workers

写入以下内容

1
2
3

master
slave1
slave2

将/usr/local/spark/sbin目录下start-all.sh 重命名为start-spark-all.sh, stop-all.sh 重命名为stop-spark-all.sh

scala 安装配置#

只需在~/.bashrc

添加

1 2	export SCALA_HOME=/usr/local/scala export PATH=$PATH:$SCALA_HOME/bin

Hbase 安装配置#

解压下载文件

tar -xvf hbase-2.3.5-bin.tar.gz -C /usr/local/
#重命名
cd /usr/local
 mv hbase-2.3.5/ hbase
export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP="true"
export JAVA_HOME=/usr/local/jdk1.8.0_291
export HBASE_MANAGES_ZK=false

修改~/.bashrc

1 2	export HBASE_HOME=/usr/local/hbase export PATH=$PATH:$HBASE_HOME/bin

修改配置文件

1 2	cd /usr/local/hbase/conf vi hbase-env.sh

添加以下内容

1
2
3

export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP="true"
export JAVA_HOME=/usr/local/jdk1.8.0_291
export HBASE_MANAGES_ZK=false

修改hbase-site.xml

<property>
                       <name>hbase.rootdir</name>
                                       <value>hdfs://master:9000/hbase</value>
                                       <description>nothing</description>                              </property>
<property>
  <name>hbase.cluster.distributed</name>
  <value>true</value>
</property>
<property>
                <name>hbase.zookeeper.quorum</name>
                                <value>master:2181,slave1:2181,slave2:2181</value>
                                                        <description>nothing</description>
                                                                    </property>
<property>
  <name>hbase.tmp.dir</name>
  <value>./tmp</value>
</property>
<property>
  <name>hbase.unsafe.stream.capability.enforce</name>
  <value>false</value>
</property>

修改regionservers

1
2
3

master
slave1
slave2

Hive安装配置#

mysql安装#

tar -xvf mysql-server_5.7.31-1ubuntu18.04_amd64.deb-bundle.tar
apt-get install ./libmysql*
apt-get install libtinfo5
apt-get install ./mysql-community-client_5.7.31-1ubuntu18.04_amd64.deb
apt-get install ./mysql-client_5.7.31-1ubuntu18.04_amd64.deb
apt-get install ./mysql-community-server_5.7.31-1ubuntu18.04_amd64.deb
###第6行步骤会有两次让输入密码
apt-get install ./mysql-server_5.7.31-1ubuntu18.04_amd64.deb
###安装结束后，修改权限
cd /var/run
chmod -R 777 mysqld
cd /var/lib
chmod -R 777 mysql
service mysql start
mysql -uroot -p #输入密码


use mysql;
grant all privileges on *.* to 'hive'@'%' identified BY 'yourpassword' with grant option;
flush privileges;
exit;


service mysql restart

hive 安装#

解压

tar -xzvf apache-hive-3.1.2-bin.tar.gz -C /usr/local/
#重命名
cd /usr/local
mv apache-hive-3.1.2-bin hive

修改环境变量

vim ~/.bashrc
#添加以下内容
export HIVE_HOME=/usr/local/hive
export PATH=$PATH:$HIVE_HOME/bin

创建warehouse 文件夹

1 2	cd /usr/local/hive mkdir warehouse

配置文件修改

hive-env.sh

1
2
3

HADOOP_HOME=/usr/local/hadoop
export HIVE_CONF_DIR=/usr/local/hive/conf
export HIVE_AUX_JARS_PATH=/usr/local/hive/lib