将 ES 中的数据导入 HIVE 操作记录


1. 环境设置

1
2
3
4
5
6
7
8
# 设置允许动态分区
set hive.exec.dynamic.partition.mode=nonstrict;

# 切换库
use stage;

# 添加依赖 jar
add jar hdfs://node1:8020/lib/hive/elasticsearch-hadoop-6.1.1.jar;

2. 建ES 源数据表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
-- 1.es数据源表
drop TABLE if exists stage.es_action_activitypar;

CREATE EXTERNAL TABLE stage.es_action_activitypar(
log_ip string,
current_time string,
version string,
device_id string,
subscriber_id string,
user_id string,
)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = '$es_resource','es.nodes'='esn1,esn2,esn3','es.port'='9200','es.mapping.date.rich' = 'false');

3. 创建Hive 数据表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
-- 2.hdfs原始数据源表
drop table if exists stage.stage_action_activitypar;

CREATE EXTERNAL TABLE stage.stage_action_activitypar(
log_ip string,
current_time string,
version string,
device_id string,
subscriber_id string,
user_id string,
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
STORED AS textfile
LOCATION '/raw/rawdata/backendlog/market/activityPartakeLog/$day_dir';

4. 导入数据

1
2
3
4
5
6
7
8
9
insert overwrite table stage.stage_action_activitypar
select
log_ip,
current_time,
version,
device_id,
subscriber_id,
user_id
from stage.es_action_activitypar;

文章作者: hnbian
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 hnbian !
评论
  目录