curl 获取json数据导入hive外部双分区表解决科学计数法和特殊字符问题 ——首发

145 阅读 0 评论 96 点赞

我是靠谱客的博主会撒娇紫菜，这篇文章主要介绍curl 获取json数据导入hive外部双分区表解决科学计数法和特殊字符问题 ——首发，现在分享给大家，希望可以做个参考。

shell 脚本

hadoop dfs -mkdir /jzytest/hivePublicOpinionList/

#创建hive外部双分区表
hive -e 'create external table if not exists jzyPublicOpinionListjson
(
channel string,
publishTime string,
source string,
title string,
url string
)
partitioned by (topic_id string,dt string)
row format serde "org.apache.hive.hcatalog.data.JsonSerDe"
LOCATION "hdfs:///jzytest/hivePublicOpinionList/";'
channel_array=("21" "104" "303")
dt=$(date "+%Y-%m-%d")
for topic_id in $(cat jzyhive.out)
do
   hadoop dfs -mkdir -p /jzytest/hivePublicOpinionList/topic_id=${topic_id}/dt=${dt}
   for channel in ${channel_array[*]}
   do
   curl -X POST -d '{
       "version":"1.0",
       "caller":"pxxx",
       "componentName":"pxxx",
       "password":"pxxx",
       "callee":"tencent_poa",
       "eventId":293194931,
       "seqId":"1481860426.8432320581878515392",
       "spanId":"pxxx",
       "timestamp":1481860426,
       "interface":{
           "interfaceName":"qcloud.supervision.getPublicOpinionList",
           "para":{
               "topic_id": "'$topic_id'",
               "channel": "'$channel'",
               "page": 1,
               "page_size": 30
               }
           }
       }' http://2xx.1xx.xx2.xx4:9090/interface | awk -v head=":[" -v tail="]}" '{print substr($0, index($0,head)+length(head),index($0,tail)-index($0,head)-length(head))}' > jzytopic_id${topic_id}channel${channel}.out
   sed -i "s/},/}n/g" jzytopic_id${topic_id}channel${channel}.out
   sed -i "s/《/<</g" jzytopic_id${topic_id}channel${channel}.out
   sed -i "s/》/>>/g" jzytopic_id${topic_id}channel${channel}.out
   hadoop dfs -put jzytopic_id${topic_id}channel${channel}.out /jzytest/hivePublicOpinionList/topic_id=${topic_id}/dt=${dt}/
   rm -rf jzytopic_id${topic_id}channel${channel}.out
   done
   hive -e "alter table jzyPublicOpinionListjson add partition (topic_id='$topic_id',dt='$dt');"
done
第一次标红，是数据格式是{*“data”[{*}]}获取{*} *的内容