概述
1. 安装sdk
yum -y install unzip
yum -y install zip
curl -s "https://get.sdkman.io" | bash
新终端下执行:source "$HOME/.sdkman/bin/sdkman-init.sh"
检查是够安装成功:
(1) sdk version
(2) sdk help
补充删除sdk
tar zcvf ~/sdkman-backup_$(date +%F-%kh%M).tar.gz -C ~/ .sdkman
rm -rf ~/.sdkman
2. 安装gradle
sdk install gradle
3. 下载es-hadoopcd /data/tools
git clone https://github.com/elastic/elasticsearch-hadoop.git
4.编译es-hadoop
cd /data/tools/elasticsearch-hadoop
vi gradle.properties
+hadoopversion 2.6.0
+hiveversion 1.1.0
+sparkversion 2.1.0
./gradlew distZip
5.
cp elasticsearch-hadoop-7.0.0-alpha1-SNAPSHOT.jar /opt/cloudera/parcels/CDH/lib/hive/lib
scp elasticsearch-hadoop-7.0.0-alpha1-SNAPSHOT.jar root@ctdn-1:/opt/cloudera/parcels/CDH/lib/hive/lib
6.
参考:
https://github.com/elastic/elasticsearch-hadoop
https://www.elastic.co/guide/en/elasticsearch/hadoop/current/hive.html#hive
https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html
hive> add jar /opt/cloudera/parcels/CDH/lib/hive/lib/elasticsearch-hadoop-7.0.0-alpha1-SNAPSHOT.jar;
CREATE EXTERNAL TABLE ext_es_org_info (
`orgid` string,
`investorg` string,
`orgname` string,
`logo` string,
`weburl` string,
`orgdesc` string,
`founddate` string,
`district` string,
`investtotal` int,
`investstage` string,
`prov` string,
`city` string,
`focusdomain` string,
`investproj` string,
`investamount` string)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES(
'es.nodes' = '10.11.8.32:9200',
'es.index.auto.create' = 'true',
'es.resource' = 'org/org_info',
'es.mapping.id' = 'orgid',
'es.mapping.names' = 'investorg:investorg,
orgname:orgname,
logo:logo,
weburl:weburl,
orgdesc:orgdesc,
founddate:founddate,
district:district,
investtotal:investtotal,
investstage:investstage,
prov:prov,
city:city,
focusdomain:focusdomain,
investproj:investproj,
investamount:investamount');
SET hive.mapred.reduce.tasks.speculative.execution = false;
SET mapreduce.map.speculative = false;
SET mapreduce.reduce.speculative = false;
INSERT overwrite TABLE ext_es_org_info
SELECT orgid
,investorg
,orgname
,logo
,weburl
,orgdesc
,founddate
,district
,investtotal
,investstage
,prov
,city
,focusdomain
,investproj
,investamount
FROM es_org_info;
curl -XGET http://10.11.8.32:9200/yelpindex/1
7.
cd /opt/cloudera/parcels/CDH/lib/hive/conf
vi hive-site.xml
+
<property>
<name>hive.aux.jars.path</name>
<value>/opt/cloudera/parcels/CDH/lib/hive/lib/elasticsearch-hadoop-7.0.0-alpha1-SNAPSHOT.jar</value>
<description>A comma separated list (with no spaces) of the jar files</description>
</property>
scp hive-site.xml root@ctdn-6:/opt/cloudera/parcels/CDH/lib/hive/conf
hive-site.xml
curl XGET http://10.11.8.32:9200/yelpindex/yelp/_search?q=id:1
参照:
https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/index.html
https://github.com/medcl/elasticsearch-analysis-ik
https://github.com/elastic/elasticsearch-py
http://qbox.io/blog/elasticsearch-in-apache-spark-python
https://www.yelp.com/dataset
http://blog.csdn.net/xmo_jiao/article/details/73251937
https://www.elastic.co/guide/en/elasticsearch/reference/6.1/query-dsl-mlt-query.html
最后
以上就是称心裙子为你收集整理的elasticsearch与Hadoop的全部内容,希望文章能够帮你解决elasticsearch与Hadoop所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复