Flume, 大数据

flume-08 案例

kafka source file channel hdfs sink

a1.sources = r1
a1.sinks = k1
a1.channels = c1

a1.sources.r1.type = org.apache.flume.source.kafka.KafkaSource
a1.sources.r1.kafka.bootstrap.servers = 192.168.1.81:9092
a1.sources.r1.kafka.topics = logs
a1.sources.r1.kafka.consumer.group.id = new.logs

a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = com.chajisong.interceptors.Kafka2HDFSInterceptor$Builder
a1.sources.r1.interceptors.i1.param = parameter

a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://192.168.1.83:8020/flume/webdata/logs/%Y-%m
#a1.sinks.k1.hdfs.rollInterval = 14400
a1.sinks.k1.hdfs.rollSize = 130048
a1.sinks.k1.hdfs.minBlockReplicas = 1
a1.sinks.k1.hdfs.writeFormat = Text
a1.sinks.k1.hdfs.fileType = DataStream
# 忽略临时文件
a1.sinks.k1.hdfs.inUsePrefix=.
a1.sinks.k1.hdfs.inUseSuffix=.temp


a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /home/flume/checkpoint-kafka
a1.channels.c1.dataDirs = /home/flume/data-kafka

a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

kafka source file channel kafka sink

a1.sources = r1
a1.sinks = k1
a1.channels = c1

a1.sources.r1.type = org.apache.flume.source.kafka.KafkaSource
a1.sources.r1.kafka.bootstrap.servers = 192.168.1.78:9092
a1.sources.r1.kafka.topics = test
a1.sources.r1.kafka.consumer.group.id = new.cjs
a1.sources.r1.topicHeader = testSink

a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = com.chajisong.interceptors.Kafka2KafkaInterceptor$Builder
a1.sources.r1.interceptors.i1.param = parameter

a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.topic = testSink
a1.sinks.k1.kafka.bootstrap.servers = 192.168.1.78:9092

a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /home/flume/checkpoint-kafka
a1.channels.c1.dataDirs = /home/flume/data-kafka

a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

这里需要注意 在souce的kafka配置中需要添加 topicHeader 值是目标kafka的topic,否则会一直循环不存入目标 sink

kafka source file channel kafka、hdfs sink

a1.sources = r1
a1.sinks = k1 k2
a1.channels = c1 c2

a1.sources.r1.type = org.apache.flume.source.kafka.KafkaSource
a1.sources.r1.kafka.bootstrap.servers = 192.168.1.78:9092
a1.sources.r1.kafka.topics = test
a1.sources.r1.kafka.consumer.group.id = new.cjs
a1.sources.r1.topicHeader = testSink

a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = com.chajisong.interceptors.Kafka2HDFSInterceptor$Builder
a1.sources.r1.interceptors.i1.param = parameter

a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://192.168.1.78:8020/flume/webdata/
a1.sinks.k1.hdfs.rollInterval = 14400
a1.sinks.k1.hdfs.rollSize = 130048

a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /home/flume/checkpoint-kafka
a1.channels.c1.dataDirs = /home/flume/data-kafka

a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

tcp source file channel kafka sink

a1.sources = r1
a1.sinks = k1
a1.channels = c1

a1.sources.r1.type = netcat
a1.sources.r1.bind = 192.168.1.78
a1.sources.r1.port = 44444

a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = com.chajisong.interceptors.CustomInterceptor$Builder
a1.sources.r1.interceptors.i1.param = parameter

a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.topic = testSink
a1.sinks.k1.kafka.bootstrap.servers = 192.168.1.78:9092
a1.sinks.k1.kafka.flumeBatchSize=5

a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /home/flume/checkpoint
a1.channels.c1.dataDirs = /home/flume/data

a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1