1. 环境设置
set hive.exec.dynamic.partition.mode=nonstrict;
use stage;
add jar hdfs://bdm:8020/lib/hive/elasticsearch-hadoop-6.1.1.jar;
2. 建ES 源数据表
drop TABLE if exists stage.es_action_activitypar;
CREATE EXTERNAL TABLE stage.es_action_activitypar(
log_ip string,
current_time string,
version string,
device_id string,
subscriber_id string,
user_id string,
)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = '$es_resource','es.nodes'='esn1,esn2,esn3','es.port'='9200','es.mapping.date.rich' = 'false');
3. 创建Hive 数据表
drop table if exists stage.stage_action_activitypar;
CREATE EXTERNAL TABLE stage.stage_action_activitypar(
log_ip string,
current_time string,
version string,
device_id string,
subscriber_id string,
user_id string,
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
STORED AS textfile
LOCATION '/raw/rawdata/backendlog/market/activityPartakeLog/$day_dir';
4. 导入数据
insert overwrite table stage.stage_action_activitypar
select
log_ip,
current_time,
version,
device_id,
subscriber_id,
user_id
from stage.es_action_activitypar;