4 common use case of Kafka API
Kafka Connect - High Level
Kafka Connect Concepts
Getting Confluent
SetUp for Kafka Connect
./zookeeper-server-start ../etc/kafka/zookeeper.properties
./kafka-server-start ../etc/kafka/server.properties
./schema-registry-start ../etc/schema-registry/schema-registry.properties
Reading file data with connect
./connect-standalone ../etc/schema-registry/connect-avro-standalone.properties \
../etc/kafka/connect-file-source.properties
./kafka-avro-console-consumer --bootstrap-server localhost:9092
--topic connect-test --from-beginning
Write File Data with connect
./connect-standalone ../etc/schema-registry/connect-avro-standalone.properties \
../etc/kafka/connect-file-source.properties ../etc/kafka/connect-file-sink.properties
Kafka Connector to Mysql source
./confluent-hub install confluentinc/kafka-connect-jdbc:latest
name=test-source-mysql-jdbc-autoincrement
connector.class=io.confluent.connect.jdbc.JdbcSourceConnector
tasks.max=1
connection.url=jdbc:mysql://127.0.0.1:3306/mydb?user=root&password=
mode=incrementing
incrementing.column.name=rollno
topic.prefix=test-mysql-jdbc-
table.whitelist=user4
create table user4(
fname varchar(30),
rollno int(6) primary key);
insert into user4 values ("sunny",1);
insert into user4 values ("ginny",2);
./connect-standalone ../etc/schema-registry/connect-avro-standalone.properties
../etc/kafka-connect-jdbc/source-quickstart-mysql.properties
./kafka-topics --bootstrap-server localhost:9092 --list
./kafka-avro-console-consumer --bootstrap-server localhost:9092
--topic test-mysql-jdbc-user4 --from-beginning
JDBC Sink Connector
name=test-sink
connector.class=io.confluent.connect.jdbc.JdbcSinkConnector
tasks.max=1
topics=test-mysql-jdbc-user4
connection.url=jdbc:mysql://127.0.0.1:3306/mydb?user=root&password=
auto.create=true
./connect-standalone ../etc/schema-registry/connect-avro-standalone.properties
../etc/kafka-connect-jdbc/sink-quickstart-mysql.properties
Running 2 worker threads for JDBC Source and Sink Connector
./connect-standalone ../etc/schema-registry/connect-avro-standalone-1.properties
../etc/kafka-connect-jdbc/source-quickstart-mysql.properties
Install and Run mongo 3.6 using docker
docker image pull mongo:3.6
docker container run --name mongodb3.6 -d -p 27018:27017 <container-id>
mongod --replSet my-mongo-set
docker exec -it mongo3.6 /bin/bash
mongo --host 127.0.0.1:27017
rs.initiate()
use kafka-topics
Mongo sink Connect
./confluent-hub install mongodb/kafka-connect-mongodb:1.4.0
tasks.max=1
connection.uri=mongodb://localhost:27018
database=kafka-topics
connector.class=com.mongodb.kafka.connect.MongoSinkConnector
topics=test-mysql-jdbc-user4
name=mongo-sink-worker
collection=user
./connect-standalone ../etc/schema-registry/connect-avro-standalone.properties
../etc/kafka-connect-mongo/sink-quickstart-mongodb.properties
db.user.find()
Mysql Sink to Mongo Source
./connect-standalone ../etc/schema-registry/connect-avro-standalone-1.properties
../etc/kafka-connect-jdbc/source-quickstart-mysql.properties
Exercise 1 (Do any two)
Kafka Stream
Stream Processors
Streams
Stream Processor
Source Processor
Sink Processor
Source Processor
Sink Processor
KStreams
KTable
Log Compacting
Create 2 topics
./kafka-topics --create --topic promotion-code-upper -zookeeper localhost:2181
--replication-factor 1 --partitions 3
./kafka-topics --create --topic promotion-code -zookeeper localhost:2181
--replication-factor 1 --partitions 3
./kafka-console-producer --topic promotion-code --broker-list localhost:9092
--property parse.key=true --property key.separator="-"
./kafka-console-consumer --topic promotion-code-upper --bootstrap-server localhost:9092 \
--from-beginning \
--property print.key=true \
Note : Command to delete the topic
./kafka-topics --zookeeper localhost:2181 --delete --topic <topic-name>
Set Up
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.kafka.annotation.EnableKafka;
import org.springframework.kafka.annotation.EnableKafkaStreams;
@SpringBootApplication
@EnableKafkaStreams
@EnableKafka
public class KafkaStreamApplication {
public static void main(String[] args) {
SpringApplication.run(KafkaStreamApplication.class, args);
}
}
Enable Kafka and Kafka Stream for Spring Boot application
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.StreamsConfig;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.kafka.annotation.KafkaStreamsDefaultConfiguration;
import org.springframework.kafka.config.KafkaStreamsConfiguration;
import java.util.HashMap;
import java.util.Map;
@Configuration
public class KafkaStreamPropertyConfiguration {
@Bean(name = KafkaStreamsDefaultConfiguration.DEFAULT_STREAMS_CONFIG_BEAN_NAME)
public KafkaStreamsConfiguration kafkaStreamsConfiguration(){
Map<String,Object> props = new HashMap<>();
props.put(StreamsConfig.APPLICATION_ID_CONFIG,"kafka-stream");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"localhost:9092");
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG,0);
return new KafkaStreamsConfiguration(props);
}
}
Setup KafkaStreamsConfigurations
Perform transformation of promotion code from promotion-code topic to upper case and place it in promotion-code-upper topic
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.kstream.Consumed;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.Printed;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class KafkaStreamPromotionCode {
@Bean
public KStream<String,String> kStreamPromotionUppercase(StreamsBuilder streamsBuilder){
KStream<String,String > sourceStream = streamsBuilder
.stream("promotion-code", Consumed.with(Serdes.String(),Serdes.String()));
KStream<String,String> uppercaseStream = sourceStream.mapValues(e->e.toUpperCase());
uppercaseStream.to("promotion-code-upper");
sourceStream.print(Printed.<String, String>toSysOut().withLabel("code"));
uppercaseStream.print(Printed.<String, String>toSysOut().withLabel("Upper-Case-Code"));
return sourceStream;
}
}
On Publishing data from promotion-code topic producer you will see that the value has been converted in upper case and read by consumer of promotion-code-upper topic
{code:"asdnas"} => {CODE:"ASDNAS"}
Our code has also changed the code attribute to upper but we just want to change its value in upper.
Let's see how we do it.
Create Promotion code class
public class PromotionCode {
private String code;
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
@Override
public String toString() {
return "PromotionCode{" +
"code='" + code + '\'' +
'}';
}
}
Transformation using Spring JSON Serde
import com.course.kafka.kafkastream.entity.PromotionCode;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.kstream.Consumed;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.Printed;
import org.apache.kafka.streams.kstream.Produced;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.kafka.support.serializer.JsonSerde;
@Configuration
public class KafkaSteamJSONPromotionCode {
@Bean
public KStream<String,PromotionCode> kStreamPromotionUppercase(StreamsBuilder streamsBuilder){
KStream<String,PromotionCode > sourceStream = streamsBuilder
.stream("promotion-code", Consumed.with(Serdes.String(),new JsonSerde<>(PromotionCode.class)));
KStream<String,PromotionCode> uppercaseStream = sourceStream.mapValues(this::uppercasePromotionCode);
uppercaseStream.to("promotion-code-upper", Produced.with(Serdes.String(),new JsonSerde<>(PromotionCode.class)));
sourceStream.print(Printed.<String, PromotionCode>toSysOut().withLabel("code"));
uppercaseStream.print(Printed.<String, PromotionCode>toSysOut().withLabel("Upper-Case-Code"));
return sourceStream;
}
private PromotionCode uppercasePromotionCode(PromotionCode promotionCode){
promotionCode.setCode(promotionCode.getCode().toUpperCase());
return promotionCode;
}
}
Keys and Partitions
Kafka Stream Operations
stream.mapValues(v->v+10);
stream.map((k,v)->KeyValue.pair("X"+k,v*5));
stream.filter((k,v)->v%2==0)
stream.filterNot((k,v)->v%2==0)
stream.branch(
(k,v)->v>100,
(k,v)->v>20,
(k,v)->v>10,
)
stream.selectKey((k,v)->"A"+k)
//split a sentence into words
sentencesStream.flatMapValues(value->Arrays.asList(value.split("\\s+")));
// (alice, alice is nice) tranfroms to (alice,alice), (alice,is), (alice, nice)
KStream<Long, String> stream = ...;
KStream<String, Integer> transformed = stream.flatMap(
// Here, we generate two output records for each input record.
// We also change the key and value types.
// Example: (345L, "Hello") -> ("HELLO", 1000), ("hello", 9000)
(key, value) -> {
List<KeyValue<String, Integer>> result = new LinkedList<>();
result.add(KeyValue.pair(value.toUpperCase(), 1000));
result.add(KeyValue.pair(value.toLowerCase(), 9000));
return result;
}
);
stream.groupByKey()
stream.groupBy((k,v)->v%2)
strea.forEach((k,v)->insertToDatabase(v))
stream.peek((k,v)->insertIntoDatabase(v)).[nextProcess]
stream.print(Printed.toSysout())
stream.to("output-topic")
stream.through("output-topic").[nextProcess]
Problem statement
Find out word count from a stream of sentences
./kafka-topics --create --topic sentence -zookeeper localhost:2181
--replication-factor 1 --partitions 3
./kafka-topics --create --topic word-count -zookeeper localhost:2181
--replication-factor 1 --partitions 3
./kafka-console-producer --topic sentence --broker-list localhost:9092
./kafka-console-consumer --topic word-count --bootstrap-server localhost:9092 \
--property print.key=true \
--property key.separator="-"
High level DSL for find out word count from sentences
Spring kafka sentence to word-count stream transformation
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.kstream.KStream;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.Arrays;
@Configuration
public class KafkaSentenceToWordCountStreamConfig {
@Bean
KStream<String,String> sentenceToWordsStreamProcessor(StreamsBuilder streamsBuilder){
KStream<String,String> kStream = streamsBuilder.stream("sentence");
kStream
.mapValues(s->s.toLowerCase())
.flatMapValues(s-> Arrays.asList(s.split("\\s+")))
.selectKey((k,v)->v)
.groupByKey()
.count()
.toStream()
.mapValues(e->e.toString())
.peek((key,value)-> System.out.println(String.format("Key :: %s, Value :: %s",key, value)))
.to("word-count");
return kStream;
}
}
Most Favourite Colour with KTable
./kafka-topics --create --topic colour -zookeeper localhost:2181
--replication-factor 1 --partitions 3
./kafka-console-producer --topic colour --broker-list localhost:9092
--property parse.key=true --property key.separator="-"
Reading from topic as KTable
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.kstream.KStream;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class KafkaColourCountStream {
@Bean
KStream<String,String> colourCount(StreamsBuilder streamsBuilder){
return streamsBuilder.table("colour")
.groupBy((key, value) -> KeyValue.pair(value,value) )
.count()
.toStream()
.map((k,v)->KeyValue.pair(k.toString(),v.toString()))
.peek((key, value) -> System.out.println(String.format("Key :: %s, Value :: %s",key,value)));
}
}
Start producing messages with producer
>a-yellow
>b-yellow
>a-green
>b-green
>a-blue
>b-blue
Problem Statement
On the Basis on Bank Transactions calculate the Bank Balance
./kafka-topics --create --topic bank-transaction -zookeeper localhost:2181
--replication-factor 1 --partitions 3
./kafka-console-producer --topic bank-transaction --broker-list localhost:9092
Create BankTransaction Entity
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.ToString;
@NoArgsConstructor
@AllArgsConstructor
@Data
@ToString
public class BankTransaction {
private String name;
private Long amount;
}
Create Stream to calculate total balance
import com.course.kafka.kafkastream.entity.BankTransaction;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.kstream.Consumed;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.Materialized;
import org.apache.kafka.streams.kstream.Printed;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.kafka.support.serializer.JsonSerde;
@Configuration
public class KafkaBankTransactionStream {
@Bean
public KStream<String,BankTransaction> bankTransactionKStream(StreamsBuilder streamsBuilder) {
KStream<String,BankTransaction> sourceBankTransactionKStream=streamsBuilder.stream("bank-transaction", Consumed.with(Serdes.String(),new JsonSerde<>(BankTransaction.class)));
sourceBankTransactionKStream
.groupBy((k, v) -> v.getName()).aggregate(
() -> 0L,
(k, v, a) -> {
a=a+v.getAmount();
return a;
},Materialized.with(Serdes.String(),Serdes.Long())).toStream()
.print(Printed.toSysOut());
return sourceBankTransactionKStream;
}
}
Exercise 2
{"id":"1","gender":"Male","age":"32","name","Sunny","salary":"500000"}
Exactly once semantic
kafka
kafka
Kafka Stream, Producer or Consumer
1. Receive Message
2. Send Output
4. Commit Offset
3. Receive
Ack
How Kafka solves this problem ?
KafkaStreamsConfiguration
props.put(StreamsConfig.PROCESSING_GUARANTEE_CONFIG,StreamsConfig.EXACTLY_ONCE);