BlogBlog
首页
  • Vue
  • TypeScript
  • React
  • Angular
  • Node.js
  • 小程序
  • Flutter
  • 数据产品
  • 大数据

    • Hadoop
    • Hive
    • Spark
  • MySQL
  • Redis
  • Java
  • Python
  • Golang
GitHub
首页
  • Vue
  • TypeScript
  • React
  • Angular
  • Node.js
  • 小程序
  • Flutter
  • 数据产品
  • 大数据

    • Hadoop
    • Hive
    • Spark
  • MySQL
  • Redis
  • Java
  • Python
  • Golang
GitHub
  • 大数据技术
    • Hadoop
      • HDFS
      • YARN
      • Hadoop 安装(Windows)
    • Hive
      • Hive 入门
      • Hive 常用 DDL 操作
      • Hive 分区表和分桶表
      • Hive 视图和索引
      • Hive 常用 DML 操作
      • Hive Partition 分区
      • Hive 数据查询详解
      • Hive Install
      • Hive 面试题

Hive Install

1. 安装 Hadoop

见 hadoop-install

2. 安装 MySQL

运行docker-compose-mysql.yml文件,启动MySQL服务。

services:
  mysql:
    image: mysql:8.0
    container_name: mysql     
    ports:
      - "3306:3306"
    # restart: always
    # 容器日志大小配置
    logging:
      driver: 'json-file'
      options:
        max-size: '10m'
        max-file: '3'
    environment:
      - TZ=Asia/Shanghai
      - LANG=en_US.UTF-8
      - MYSQL_ROOT_PASSWORD=root
      # - MYSQL_USER=root
      - MYSQL_ROOT_HOST=%
      - default-authentication-plugin=mysql_native_password
    volumes:  
      - mysql-data:/var/lib/mysql
      - ./mysql/config/my.cnf:/etc/mysql/my.cnf
    networks:
      - mysql-network
volumes:
  mysql-data:
networks:
  mysql-network:
    driver: bridge

my.cnf文件配置如下:

# 服务端参数配置
[mysqld]
user=mysql                     # MySQL启动用户
default-storage-engine=INNODB  # 创建新表时将使用的默认存储引擎
character-set-server=utf8mb4   # 设置mysql服务端默认字符集
collation-server = utf8mb4_general_ci # 数据库字符集对应一些排序等规则,注意要和character-set-server对应

pid-file        = /var/lib/mysql/mysqld.pid  # pid文件所在目录
#socket          = /var/lib/mysql/mysqld.sock # 用于本地连接的socket套接字
socket = /var/lib/mysql/mysql.sock
datadir         = /var/lib/mysql             # 数据文件存放的目录
bind-address    = 0.0.0.0                   # MySQL绑定IP
mysqlx_bind_address=0.0.0.0

expire_logs_days=7                            # 定义清除过期日志的时间(这里设置为7天)

# 设置client连接mysql时的字符集,防止乱码
init_connect='SET NAMES utf8mb4'

# 是否对sql语句大小写敏感,1表示不敏感
lower_case_table_names = 1

# 执行sql的模式,规定了sql的安全等级, 暂时屏蔽,my.cnf文件中配置报错
#sql_mode = STRICT_TRANS_TABLES,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION

# 事务隔离级别,默认为可重复读,mysql默认可重复读级别(此级别下可能参数很多间隙锁,影响性能)
transaction_isolation = READ-COMMITTED

# TIMESTAMP如果没有显示声明NOT NULL,允许NULL值
explicit_defaults_for_timestamp = true

#它控制着mysqld进程能使用的最大文件描述(FD)符数量。
#需要注意的是这个变量的值并不一定是你设定的值,mysqld会在系统允许的情况下尽量获取更多的FD数量
open_files_limit    = 65535

# 允许最大连接数
max_connections=200

#最大错误连接数
max_connect_errors = 600


[client]
default-character-set=utf8mb4  # 设置mysql客户端默认字符集

执行脚本,创建hive数据库, hive用户,并赋予权限。

mysql -uroot -proot -e "
create user 'hive'@'%' identified WITH mysql_native_password by '123456'; 
grant all privileges on *.* to 'hive'@'%'; 
flush privileges;"

3. 安装 Hive

下载最新版本的Hive安装包,解压到指定目录,并配置环境变量。

cd /opt/soft/bigdata/
wget https://archive.apache.org/dist/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz
tar -zxvf apache-hive-3.1.2-bin.tar.gz -C /opt/soft/bigdata/
rm -rf apache-hive-3.1.2-bin.tar.gz
mv /opt/soft/bigdata/apache-hive-3.1.2-bin /opt/soft/bigdata/hive

4. 配置环境变量:

hive-site.xml文件配置如下:

<?xml version="1.0" encoding="UTF-8"?>
<configuration>
    <property>
        <name>javax.jdo.option.ConnectionURL</name>
        <value>jdbc:mysql://localhost:3306/hive?useSSL=false&amp;createDatabaseIfNotExist=true&amp;allowPublicKeyRetrieval=true</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.jdbc.Driver</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionUserName</name>
        <value>hive</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionPassword</name>
        <value>123456</value>
    </property>
    <property>
        <name>hive.metastore.db.type</name>
        <value>mysql</value>
    </property>
    <property>
        <name>hive.metastore.db.type</name>
        <value>mysql</value>
    </property>
    <property>
        <name>hive.server2.enable.doAs</name>
        <value>false</value>
    </property>
    <property>
        <name>hive.tez.exec.inplace.progress</name>
        <value>false</value>
    </property>
    <property>
        <name>hive.exec.scratchdir</name>
        <value>/opt/hive/scratch_dir</value>
    </property>
    <property>
        <name>hive.user.install.directory</name>
        <value>/opt/hive/install_dir</value>
    </property>
    <property>
        <name>tez.runtime.optimize.local.fetch</name>
        <value>true</value>
    </property>
    <property>
        <name>hive.exec.submit.local.task.via.child</name>
        <value>false</value>
    </property>
    <property>
        <name>mapreduce.framework.name</name>
        <value>local</value>
    </property>
    <property>
        <name>tez.local.mode</name>
        <value>true</value>
    </property>
    <property>
        <name>hive.execution.engine</name>
        <value>tez</value>
    </property>
    <property>
        <name>hive.metastore.warehouse.dir</name>
        <value>/opt/hive/data/warehouse</value>
    </property>
    <property>
        <name>metastore.metastore.event.db.notification.api.auth</name>
        <value>false</value>
    </property>
    <property>
        <name>hive.server2.active.passive.ha.enable</name>
        <value>true</value>
    </property>
    <property>
        <name>hive.server2.thrift.bind.host</name>
        <value>0.0.0.0</value>
    </property>
    <property>
        <name>hive.server2.thrift.port</name>
        <value>10000</value>
    </property>
</configuration>

修改jar包

rm -rf /opt/soft/bigdata/hive/lib/guava-11.0.2.jar
cp /opt/soft/bigdata/hadoop-3.3.0/share/hadoop/common/lib/guava-27.0-jre.jar /opt/soft/bigdata/hive/lib/

5. 添加 tez 引擎

cd /opt/soft/bigdata/
curl -O https://dlcdn.apache.org/tez/0.10.1/apache-tez-0.10.1-bin.tar.gz
tar -zxvf apache-tez-0.10.1-bin.tar.gz 
mv apache-tez-0.10.1  tez-0.10.1

hadoop fs -mkdir -p /tez
hadoop fs -put /opt/soft/bigdata/apache-tez-0.10.1-bin.tar.gz /tez/

cd /opt/soft/bigdata/hadoop-3.3.0/etc/hadoop/

touch tez-site.xml
cat <<"EOF" | tee -a ./tez-site.xml 
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
	<!--将 tez 包上传到hsfs上的位置-->
	<property>
		<name>tez.lib.uris</name>
		<value>${fs.defaultFS}/tez/apache-tez-0.10.1-bin.tar.gz</value>
	</property>
	<property>
		<name>tez.use.cluster.hadoop-libs</name>
		<value>true</value>
	</property>
	<!--AM 容器大小(MB),建议大于等于yarn.scheduler.minimum-allocation-mb值-->
	<property>
		<name>tez.am.resource.memory.mb</name>
		<value>1024</value>
	</property>
	<!--每个AM 容器的虚拟核数-->
	<property>
		<name>tez.am.resource.cpu.vcores</name>
		<value>1</value>
	</property>
	<!--Tez容器占用Java堆空间最大的比例-->
	<property>
		<name>tez.container.max.java.heap.fraction</name>
		<value>0.8</value>
	</property>
	<!-- 为Tez 任务容器分配的内存大小(MB),如果太小可能导致Tez任务运行不起来-->
	<property>
		<name>tez.task.resource.memory.mb</name>
		<value>1024</value>
	</property>
	<!-- 为Tez 任务容器分配的虚拟核数-->
	<property>
		<name>tez.task.resource.cpu.vcores</name>
		<value>1</value>
	</property>
</configuration>

EOF


cat >> $HADOOP_HOME/etc/hadoop/shellprofile.d/tez.sh <<"EOF"

hadoop_add_profile tez
function _tez_hadoop_classpath
{
 hadoop_add_classpath "$HADOOP_HOME/etc/hadoop" after
 hadoop_add_classpath "/opt/soft/bigdata/tez-0.10.1/*" after
 hadoop_add_classpath "/opt/soft/bigdata/tez-0.10.1/lib/*" after
}

EOF

#  config  hive
touch $HIVE_HOME/conf/hive-env.sh
cat >> $HIVE_HOME/conf/hive-env.sh <<"EOF"
export TEZ_HOME=/opt/soft/bigdata/tez-0.10.1  
export TEZ_JARS=$TEZ_HOME/*:$TEZ_HOME/lib/*
export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${TEZ_JARS}

EOF

source $HIVE_HOME/conf/hive-env.sh


# 删除jar包,解决日志jar包冲突

rm -rf /opt/bigdata/tez-0.10.1/lib/slf4j-log4j12-1.7.10.jar

# 确保 tez 包有 hadoop-shaded-protobuf_3_7-1.0.0.jar

cp /opt/soft/bigdata/hadoop-3.3.0/share/hadoop/common/lib/hadoop-shaded-protobuf_3_7-1.0.0.jar /opt/soft/bigdata/tez-0.10.1/lib/

# 确保 hive 删除冲突的 protobuf 包
mv /opt/soft/bigdata/hive/lib/protobuf-java-2.5.0.jar /opt/soft/bigdata/hive/lib/protobuf-java-2.5.0.jar.bak

配置hive-site.xml

这里支持 tez 引擎

<configuration>
    <property>
        <name>javax.jdo.option.ConnectionURL</name>
        <value>jdbc:mysql://localhost:3306/hive?useSSL=false&amp;createDatabaseIfNotExist=true&amp;allowPublicKeyRetrieval=true</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.jdbc.Driver</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionUserName</name>
        <value>hive</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionPassword</name>
        <value>123456</value>
    </property>
    <property>
        <name>hive.metastore.db.type</name>
        <value>mysql</value>
    </property>
    <property>
        <name>hive.metastore.db.type</name>
        <value>mysql</value>
    </property>
    <property>
        <name>hive.server2.enable.doAs</name>
        <value>false</value>
    </property>
    <property>
        <name>hive.tez.exec.inplace.progress</name>
        <value>false</value>
    </property>
    <property>
        <name>hive.exec.scratchdir</name>
        <value>/opt/hive/scratch_dir</value>
    </property>
    <property>
        <name>hive.user.install.directory</name>
        <value>/opt/hive/install_dir</value>
    </property>
    <property>
        <name>hive.exec.submit.local.task.via.child</name>
        <value>false</value>
    </property>
    <property>
        <name>mapreduce.framework.name</name>
        <value>local</value>
    </property>
    
    <property>
        <name>tez.runtime.optimize.local.fetch</name>
        <value>true</value>
    </property>
    <property>
        <name>tez.local.mode</name>
        <value>true</value>
    </property>
    <property>
        <name>hive.execution.engine</name>
        <value>tez</value>
    </property>
    <property>
        <name>hive.tez.container.size</name>
        <value>1024</value>
    </property>
    <property>
        <name>hive.tez.am.task.max.failed.attempts</name>
        <value>10</value>
    </property>
    <property>
        <name>hive.tez.am.max.app.attempts</name>
        <value>5</value>
    </property>

    <property>
        <name>hive.metastore.warehouse.dir</name>
        <value>/opt/hive/data/warehouse</value>
    </property>
    <property>
        <name>metastore.metastore.event.db.notification.api.auth</name>
        <value>false</value>
    </property>
    <property>
        <name>hive.server2.active.passive.ha.enable</name>
        <value>true</value>
    </property>
    <property>
        <name>hive.server2.thrift.bind.host</name>
        <value>0.0.0.0</value>
    </property>
    <property>
        <name>hive.server2.thrift.port</name>
        <value>10000</value>
    </property>
</configuration>

配置 hadoop core-site.xml 文件

<property>
    <name>hadoop.proxyuser.hive.groups</name>
    <value>*</value>
</property>

<property>
    <name>hadoop.proxyuser.hive.hosts</name>
    <value>*</value>
</property>

6. 启动 Hive 服务

# 启动 hive 服务 本身的 hive cli 客户端
# $HIVE_HOME/bin/hive

# 启动 Hive 服务 hiveserver2, 支持jdbc
mkdir $HIVE_HOME/logs
nohup $HIVE_HOME/bin/hive --service metastore > $HIVE_HOME/logs/hive-metastore.log 2>&1 &
nohup $HIVE_HOME/bin/hiveserver2 > $HIVE_HOME/logs/hiveserver2.log 2>&1 &

# 启动成功后,查看日志

7. 使用beeline连接hiveserver2

# 连接 Hive
$HIVE_HOME/bin/beeline -u jdbc:hive2://0.0.0.0:10000

# 测试
create database test;
use test;
create table test(a string);
insert into test values("tom");
select * from test group by a;


$HIVE_HOME/bin/beeline -u jdbc:hive2://localhost:10000 -e 'use test;insert into test values("tom");'
  • https://kontext.tech/article/448/install-hadoop-330-on-linux
  • https://kontext.tech/article/561/apache-hive-312-installation-on-linux-guide
最近更新:: 2025/4/14 16:44
Contributors: alice
Prev
Hive 数据查询详解
Next
Hive 面试题