概述
使用flume采集web服务器日志,架构见图
每台webserver 的agent的配置:
-
#configuration 'flume74Agent'
-
flume74Agent.sources=source74
-
flume74Agent.sinks=sink74-1 sink74-2
-
flume74Agent.channels=channel74
-
#configuration sinks group
-
flume74Agent.sinksgroups=group74
-
#配置source为syslogtcp的源
-
flume74Agent.sources.source74.type=syslogtcp
-
flume74Agent.sources.source74.port=514
-
flume74Agent.sources.source74.host=10.21.3.74
-
flume74Agent.sources.source74.channels=channel74
-
#配置 memory channels,说明capacity必须大于transactionCapacity,容量配置越小,Agent挂了之后丢失的数据量越少,keep-alive的单位是秒,存活时间
-
flume74Agent.channels.channel74.type=memory
-
flume74Agent.channels.channel74.capacity=2000
-
flume74Agent.channels.channel74.transactionCapacity=1000
-
flume74Agent.channels.channel74.keep-alive=30
-
#配置 file channel,为了提高效率checkpointDir和dataDir的目录最好分开
-
#flume74Agent.channels.channel74.type=file
-
#flume74Agent.channels.channel74.checkpointDir=/usr/local/new-cluster/apache-flume-1.6.0-bin/checkpoint
-
#flume74Agent.channels.channel74.dataDirs=/usr/local/new-cluster/apache-flume-1.6.0-bin/data
-
#flume74Agent.channels.channel74.transactionCapacity=10000
-
#flume74Agent.channels.channel74.checkpointInterval=60000
-
#flume74Agent.channels.channel74.capacity=20000
-
#flume74Agent.channels.channel74.keep-alive=30
-
#配置第一个sink sink74-1
-
flume74Agent.sinks.sink74-1.type=avro
-
flume74Agent.sinks.sink74-1.port=4141
-
flume74Agent.sinks.sink74-1.hostname=10.21.3.73
-
flume74Agent.sinks.sink74-1.channel=channel74
-
#配置第二个sink sink74-2
-
flume74Agent.sinks.sink74-2.type=avro
-
flume74Agent.sinks.sink74-2.port=4141
-
flume74Agent.sinks.sink74-2.hostname=10.21.3.75
-
flume74Agent.sinks.sink74-2.channel=channel74
-
#配置sink组
-
flume74Agent.sinkgroups.group74.sinks=sink74-1 sink74-2
-
# 配置sink组的负载均衡,既能分摊压力又能防止其中一个collect采集挂了丢失数据问题
-
flume74Agent.sinkgroups.group74.processor.type = load_balance
-
flume74Agent.sinkgroups.group74.processor.backoff = true
-
flume74Agent.sinkgroups.group74.processor.selector = random
flume collect的agent配置:
-
collection75Agent.sources=source75
-
collection75Agent.sinks=sink75-1
-
collection75Agent.channels=channel75
-
#configuration source
-
collection75Agent.sources.source75.type=avro
-
collection75Agent.sources.source75.channels=channel75
-
collection75Agent.sources.source75.bind=10.21.3.75
-
collection75Agent.sources.source75.port=4141
-
collection75Agent.sources.source75.interceptors = i1 i2
-
collection75Agent.sources.source75.interceptors.i1.type = org.apache.flume.interceptor.HostInterceptor$Builder
-
collection75Agent.sources.source75.interceptors.i1.preserveExisting = false
-
collection75Agent.sources.source75.interceptors.i1.hostHeader = hostname
-
collection75Agent.sources.source75.interceptors.i2.type = org.apache.flume.interceptor.TimestampInterceptor$Builder
-
#configuration memory channel
-
collection75Agent.channels.channel75.type=memory
-
collection75Agent.channels.channel75.capacity=2000
-
collection75Agent.channels.channel75.transactionCapacity=1000
-
collection75Agent.channels.channel75.keep-alive=30
-
#configuration file channel
-
#collection75Agent.channels.channel75.type=file
-
#collection75Agent.channels.channel75.checkpointDir=/usr/local/new-cluster/apache-flume-1.6.0-bin/checkpoint
-
#collection75Agent.channels.channel75.dataDirs=/usr/local/new-cluster/apache-flume-1.6.0-bin/data
-
#collection75Agent.channels.channel75.transactionCapacity=10000
-
#collection75Agent.channels.channel75.checkpointInterval=60000
-
#collection75Agent.channels.channel75.capacity=20000
-
#collection75Agent.channels.channel75.keep-alive=30
-
#confituration sinks
-
collection75Agent.sinks.sink75-1.type=hdfs
-
collection75Agent.sinks.sink75-1.channel=channel75
-
collection75Agent.sinks.sink75-1.hdfs.path=hdfs://mycluster1/flume/%Y-%m
-
collection75Agent.sinks.sink75-1.hdfs.filePrefix=syslog75.%Y-%m-%d
-
collection75Agent.sinks.sink75-1.hdfs.fileSuffix=.log
-
collection75Agent.sinks.sink75-1.hdfs.round=true
-
collection75Agent.sinks.sink75-1.hdfs.roundValue=10
-
collection75Agent.sinks.sink75-1.hdfs.roundUnit=minute
-
collection75Agent.sinks.sink75-1.hdfs.rollInterval=0 #多久后重新生成日志文件,0从不生成日志文件
-
collection75Agent.sinks.sink75-1.hdfs.rollSize=0 #日志多大后重新生成日志文件
-
collection75Agent.sinks.sink75-1.hdfs.batchSize=1000 #flush到hdfs的日志条数
-
collection75Agent.sinks.sink75-1.hdfs.rollCount=0 #多少条后重新生成日志文件
-
collection75Agent.sinks.sink75-1.hdfs.fileType = DataStream
-
collection75Agent.sinks.sink75-1.hdfs.writeFormat=Text
-
collection75Agent.sinks.sink75-1.hdfs.callTimeout=600000 #和hdfs通讯多久超时
-
collection75Agent.sinks.sink75-1.hdfs.threadsPoolSize=20
-
collection75Agent.sinks.sink75-1.hdfs.rollTimerPoolSize=5
-
collection75Agent.sinks.sink75-1.hdfs.idleTimeout=600 #间隔多久没有往该日志文件写数据,那么把这个文件结束重命名去除.tmp状态,单位为s
-
#confituration sinks
-
#collection75Agent.sinks.sink75-2.type=hdfs
-
#collection75Agent.sinks.sink75-2.channel=channel75
-
#collection75Agent.sinks.sink75-2.hdfs.path=hdfs://mycluster1/flume/%Y-%m
-
#collection75Agent.sinks.sink75-2.hdfs.filePrefix=syslog2.%Y-%m-%d
-
#collection75Agent.sinks.sink75-2.hdfs.fileSuffix=.log
-
#collection75Agent.sinks.sink75-2.hdfs.round=true
-
#collection75Agent.sinks.sink75-2.hdfs.roundValue=10
-
#collection75Agent.sinks.sink75-2.hdfs.roundUnit=minute
-
#collection75Agent.sinks.sink75-2.hdfs.rollInterval=0
-
#collection75Agent.sinks.sink75-2.hdfs.rollSize=0
-
#collection75Agent.sinks.sink75-2.hdfs.batchSize=1000
-
#collection75Agent.sinks.sink75-2.hdfs.rollCount=0
-
#collection75Agent.sinks.sink75-2.hdfs.fileType = DataStream
-
#collection75Agent.sinks.sink75-2.hdfs.writeFormat=Text
-
#collection75Agent.sinks.sink75-2.hdfs.callTimeout=600000
-
#collection75Agent.sinks.sink75-2.hdfs.threadsPoolSize=20
-
#collection75Agent.sinks.sink75-2.hdfs.rollTimerPoolSize=5
-
#collection75Agent.sinks.sink75-2.channel=channel75
后台启动flume Agent:
nohup flume-ng agent -c conf/ -f conf/collection73Agent.conf -n collection73Agent > start.log 2>&1 &
rsyslog.conf配置图:
补充:flume-env.sh配置
JAVA_OPTS="-Xms2048m -Xmx2048m -Xss256k -Xmn512m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-UseGCOverheadLimit"
最后
以上就是碧蓝爆米花为你收集整理的Flume 采集rsyslog整个配置和流程的全部内容,希望文章能够帮你解决Flume 采集rsyslog整个配置和流程所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复