要在没有模式注册表的情况下反序列化AVRO,可以使用Confluent的Schema Registry客户端库进行手动解析和反序列化AVRO消息。下面是一个示例代码,演示了如何使用AWS MSK S3 Sink Connector将AVRO消息写入S3,并在不使用模式注册表的情况下进行反序列化:
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.ObjectMetadata;
import io.confluent.kafka.serializers.AbstractKafkaAvroDeserializer;
import org.apache.avro.generic.GenericRecord;
import org.apache.commons.io.IOUtils;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Properties;
public class AvroDeserializerWithoutRegistry {
public static void main(String[] args) {
String topic = "your-topic";
String bootstrapServers = "your-bootstrap-servers";
String s3Bucket = "your-s3-bucket";
String s3KeyPrefix = "your-s3-key-prefix";
Properties props = new Properties();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
props.put(ConsumerConfig.GROUP_ID_CONFIG, "avro-consumer");
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
KafkaConsumer consumer = new KafkaConsumer<>(props);
consumer.subscribe(Collections.singletonList(topic));
AmazonS3 s3Client = AmazonS3ClientBuilder.defaultClient();
while (true) {
ConsumerRecords records = consumer.poll(100);
for (ConsumerRecord record : records) {
String key = record.key();
String avroPayload = record.value();
// Deserialize AVRO payload
byte[] avroBytes = avroPayload.getBytes(StandardCharsets.ISO_8859_1);
GenericRecord genericRecord = deserializeAvro(avroBytes);
// Upload AVRO payload to S3
String s3Key = s3KeyPrefix + key + ".avro";
ObjectMetadata metadata = new ObjectMetadata();
metadata.setContentLength(avroBytes.length);
InputStream inputStream = new ByteArrayInputStream(avroBytes);
s3Client.putObject(s3Bucket, s3Key, inputStream, metadata);
IOUtils.closeQuietly(inputStream);
}
consumer.commitSync();
}
}
private static GenericRecord deserializeAvro(byte[] avroBytes) {
// Replace "your-schema" with your actual AVRO schema
String schema = "your-schema";
AbstractKafkaAvroDeserializer deserializer = new AbstractKafkaAvroDeserializer() {
@Override
protected Object deserialize(String topic, Boolean isKey, byte[] payload) {
return null;
}
};
deserializer.configure(Collections.singletonMap("schema.registry.url", ""), false);
return (GenericRecord) deserializer.deserialize("", avroBytes, schema);
}
}
请确保替换代码中的以下值:
your-topic:要消费的Kafka主题your-bootstrap-servers:Kafka集群的引导服务器地址your-s3-bucket:要将AVRO消息写入的S3存储桶your-s3-key-prefix:用于AVRO消息在S3中的键前缀your-schema:用于AVRO消息的实际模式在上述代码中,我们使用了AbstractKafkaAvroDeserializer类来手动反序列化AVRO消息,替代了通常使用的模式注册表。然后,我们使用AWS SDK将AVRO消息上传到S3存储桶中。
请注意,由于没有使用模式注册表,我们需要手动提供AVRO模式。在示例代码中,将"your-schema"替换为您的AVRO模式。