From dc2596f2fc7b644a411a6ffc8f04724bf79cf86b Mon Sep 17 00:00:00 2001 From: Jennifer Huang <47805623+Jennifer88huang@users.noreply.github.com> Date: Wed, 2 Dec 2020 11:08:54 +0800 Subject: [PATCH 1/4] [website] Update the format issue (#8773) ### Motivation The whole Pulsar website could not be built correctly with some syntax errors. Found the error here https://github.com/apache/pulsar/runs/1483341061?check_suite_focus=true ### Modifications Fix the syntax error. --- site2/website/data/connectors.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site2/website/data/connectors.js b/site2/website/data/connectors.js index 9933d53a08ddef..4182f2c98648c5 100644 --- a/site2/website/data/connectors.js +++ b/site2/website/data/connectors.js @@ -136,7 +136,7 @@ module.exports = [ longName: 'NSQ source', type: 'Source', link: 'https://nsq.io/', - } + }, { name: 'rabbitmq', longName: 'RabbitMQ source and sink', From 456f265a4c291b36c02a831e8d939cf5e58be880 Mon Sep 17 00:00:00 2001 From: Jennifer Huang <47805623+Jennifer88huang@users.noreply.github.com> Date: Wed, 2 Dec 2020 13:51:14 +0800 Subject: [PATCH 2/4] [docs] Update Websocket content (#8762) * update * Update site2/docs/client-libraries-websocket.md Co-authored-by: HuanliMeng <48120384+Huanli-Meng@users.noreply.github.com> Co-authored-by: HuanliMeng <48120384+Huanli-Meng@users.noreply.github.com> --- site2/docs/client-libraries-websocket.md | 6 +++--- site2/docs/getting-started-clients.md | 2 +- site2/website/sidebars.json | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/site2/docs/client-libraries-websocket.md b/site2/docs/client-libraries-websocket.md index 7ada71b4be6025..fd3d9c8a28d22f 100644 --- a/site2/docs/client-libraries-websocket.md +++ b/site2/docs/client-libraries-websocket.md @@ -1,13 +1,13 @@ --- id: client-libraries-websocket -title: Pulsar's WebSocket API +title: Pulsar WebSocket API sidebar_label: WebSocket --- -Pulsar's [WebSocket](https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API) API is meant to provide a simple way to interact with Pulsar using languages that do not have an official [client library](getting-started-clients.md). Through WebSockets you can publish and consume messages and use all the features available in the [Java](client-libraries-java.md), [Go](client-libraries-go.md), [Python](client-libraries-python.md) and [C++](client-libraries-cpp.md) client libraries. +Pulsar [WebSocket](https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API) API provides a simple way to interact with Pulsar using languages that do not have an official [client library](getting-started-clients.md). Through WebSocket, you can publish and consume messages and use features available on the [Client Features Matrix](https://github.com/apache/pulsar/wiki/Client-Features-Matrix) page. -> You can use Pulsar's WebSocket API with any WebSocket client library. See examples for Python and Node.js [below](#client-examples). +> You can use Pulsar WebSocket API with any WebSocket client library. See examples for Python and Node.js [below](#client-examples). ## Running the WebSocket service diff --git a/site2/docs/getting-started-clients.md b/site2/docs/getting-started-clients.md index d1c218f28df2fb..add817b890452c 100644 --- a/site2/docs/getting-started-clients.md +++ b/site2/docs/getting-started-clients.md @@ -1,7 +1,7 @@ --- id: client-libraries title: Pulsar client libraries -sidebar_label: Use Pulsar with client libraries +sidebar_label: Overview --- Pulsar supports the following client libraries: diff --git a/site2/website/sidebars.json b/site2/website/sidebars.json index d0968018dde8a1..31bf9f26487c0d 100644 --- a/site2/website/sidebars.json +++ b/site2/website/sidebars.json @@ -104,6 +104,7 @@ "performance-pulsar-perf" ], "Client libraries": [ + "client-libraries", "client-libraries-java", "client-libraries-go", "client-libraries-python", From 543bf920155bfc4e414e8c874d32f5616580db17 Mon Sep 17 00:00:00 2001 From: WangJialing <65590138+wangjialing218@users.noreply.github.com> Date: Wed, 2 Dec 2020 16:12:51 +0800 Subject: [PATCH 3/4] exclude ahc.properties for license check (#8745) ### Motivation license-maven-plugin check fail for ahc.properties, maybe related with license-maven-plugin update in #8706 ![licenseCheck](https://user-images.githubusercontent.com/65590138/100595150-81dd9d80-3335-11eb-8539-3dd07f982442.PNG) ### Modifications Add ahc.properties to exclude list of license-maven-plugin --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index c64fb717037aa6..e39a82719666fd 100644 --- a/pom.xml +++ b/pom.xml @@ -1170,6 +1170,7 @@ flexible messaging model and an intuitive client API. **/src/main/java/org/apache/bookkeeper/mledger/util/AbstractCASReferenceCounted.java **/ByteBufCodedInputStream.java **/ByteBufCodedOutputStream.java + **/ahc.properties bin/proto/* conf/schema_example.conf data/** From ee4cddf35970f6230b8c0a3f3c247e10e85b1973 Mon Sep 17 00:00:00 2001 From: lipenghui Date: Wed, 2 Dec 2020 16:21:19 +0800 Subject: [PATCH 4/4] Update site for 2.7.0 (#8768) --- site2/website/releases.json | 1 + .../version-2.7.0/admin-api-brokers.md | 158 + .../version-2.7.0/admin-api-clusters.md | 222 ++ .../version-2.7.0/admin-api-functions.md | 579 ++++ .../version-2.7.0/admin-api-namespaces.md | 886 ++++++ .../admin-api-non-partitioned-topics.md | 8 + .../admin-api-non-persistent-topics.md | 8 + .../version-2.7.0/admin-api-overview.md | 91 + .../admin-api-partitioned-topics.md | 8 + .../version-2.7.0/admin-api-permissions.md | 121 + .../admin-api-persistent-topics.md | 8 + .../version-2.7.0/admin-api-tenants.md | 157 + .../version-2.7.0/admin-api-topics.md | 1206 ++++++++ .../version-2.7.0/administration-proxy.md | 76 + .../administration-pulsar-manager.md | 183 ++ .../version-2.7.0/administration-zk-bk.md | 348 +++ .../version-2.7.0/client-libraries-java.md | 882 ++++++ .../version-2.7.0/client-libraries-node.md | 431 +++ .../version-2.7.0/client-libraries-python.md | 291 ++ .../client-libraries-websocket.md | 448 +++ .../concepts-architecture-overview.md | 156 + .../version-2.7.0/concepts-authentication.md | 9 + .../version-2.7.0/concepts-messaging.md | 518 ++++ .../version-2.7.0/concepts-multi-tenancy.md | 55 + .../version-2.7.0/concepts-transactions.md | 30 + .../version-2.7.0/cookbooks-compaction.md | 127 + .../version-2.7.0/cookbooks-deduplication.md | 124 + .../version-2.7.0/cookbooks-non-persistent.md | 59 + .../version-2.7.0/cookbooks-partitioned.md | 7 + .../cookbooks-retention-expiry.md | 318 ++ .../version-2.7.0/cookbooks-tiered-storage.md | 301 ++ .../version-2.7.0/deploy-aws.md | 227 ++ .../version-2.7.0/deploy-bare-metal.md | 461 +++ .../version-2.7.0/deploy-docker.md | 52 + .../version-2.7.0/deploy-monitoring.md | 95 + .../developing-binary-protocol.md | 556 ++++ .../version-2.7.0/functions-develop.md | 1084 +++++++ .../version-2.7.0/functions-package.md | 431 +++ .../version-2.7.0/functions-runtime.md | 313 ++ .../version-2.7.0/functions-worker.md | 286 ++ .../version-2.7.0/getting-started-clients.md | 35 + .../version-2.7.0/getting-started-helm.md | 358 +++ .../version-2.7.0/helm-deploy.md | 375 +++ .../version-2.7.0/helm-overview.md | 100 + .../version-2.7.0/helm-upgrade.md | 34 + .../versioned_docs/version-2.7.0/io-cli.md | 606 ++++ .../version-2.7.0/io-connectors.md | 232 ++ .../version-2.7.0/io-hdfs2-sink.md | 59 + .../version-2.7.0/io-nsq-source.md | 21 + .../version-2.7.0/io-quickstart.md | 816 ++++++ .../version-2.7.0/io-rabbitmq-source.md | 81 + .../versioned_docs/version-2.7.0/io-use.md | 1505 ++++++++++ .../version-2.7.0/reference-cli-tools.md | 745 +++++ .../version-2.7.0/reference-configuration.md | 781 +++++ .../version-2.7.0/reference-metrics.md | 404 +++ .../version-2.7.0/reference-pulsar-admin.md | 2567 +++++++++++++++++ .../version-2.7.0/reference-terminology.md | 167 ++ .../version-2.7.0/schema-get-started.md | 95 + .../version-2.7.0/security-authorization.md | 101 + .../version-2.7.0/security-bouncy-castle.md | 139 + .../version-2.7.0/security-extending.md | 196 ++ .../version-2.7.0/security-oauth2.md | 207 ++ .../sql-deployment-configurations.md | 159 + .../version-2.7.0/tiered-storage-aws.md | 282 ++ .../version-2.7.0/tiered-storage-azure.md | 225 ++ .../tiered-storage-filesystem.md | 269 ++ .../version-2.7.0/tiered-storage-gcs.md | 274 ++ .../version-2.7.0/tiered-storage-overview.md | 50 + .../version-2.7.0/transaction-api.md | 150 + .../version-2.7.0/transaction-guarantee.md | 17 + .../version-2.7.0-sidebars.json | 160 + site2/website/versions.json | 1 + 72 files changed, 22532 insertions(+) create mode 100644 site2/website/versioned_docs/version-2.7.0/admin-api-brokers.md create mode 100644 site2/website/versioned_docs/version-2.7.0/admin-api-clusters.md create mode 100644 site2/website/versioned_docs/version-2.7.0/admin-api-functions.md create mode 100644 site2/website/versioned_docs/version-2.7.0/admin-api-namespaces.md create mode 100644 site2/website/versioned_docs/version-2.7.0/admin-api-non-partitioned-topics.md create mode 100644 site2/website/versioned_docs/version-2.7.0/admin-api-non-persistent-topics.md create mode 100644 site2/website/versioned_docs/version-2.7.0/admin-api-overview.md create mode 100644 site2/website/versioned_docs/version-2.7.0/admin-api-partitioned-topics.md create mode 100644 site2/website/versioned_docs/version-2.7.0/admin-api-permissions.md create mode 100644 site2/website/versioned_docs/version-2.7.0/admin-api-persistent-topics.md create mode 100644 site2/website/versioned_docs/version-2.7.0/admin-api-tenants.md create mode 100644 site2/website/versioned_docs/version-2.7.0/admin-api-topics.md create mode 100644 site2/website/versioned_docs/version-2.7.0/administration-proxy.md create mode 100644 site2/website/versioned_docs/version-2.7.0/administration-pulsar-manager.md create mode 100644 site2/website/versioned_docs/version-2.7.0/administration-zk-bk.md create mode 100644 site2/website/versioned_docs/version-2.7.0/client-libraries-java.md create mode 100644 site2/website/versioned_docs/version-2.7.0/client-libraries-node.md create mode 100644 site2/website/versioned_docs/version-2.7.0/client-libraries-python.md create mode 100644 site2/website/versioned_docs/version-2.7.0/client-libraries-websocket.md create mode 100644 site2/website/versioned_docs/version-2.7.0/concepts-architecture-overview.md create mode 100644 site2/website/versioned_docs/version-2.7.0/concepts-authentication.md create mode 100644 site2/website/versioned_docs/version-2.7.0/concepts-messaging.md create mode 100644 site2/website/versioned_docs/version-2.7.0/concepts-multi-tenancy.md create mode 100644 site2/website/versioned_docs/version-2.7.0/concepts-transactions.md create mode 100644 site2/website/versioned_docs/version-2.7.0/cookbooks-compaction.md create mode 100644 site2/website/versioned_docs/version-2.7.0/cookbooks-deduplication.md create mode 100644 site2/website/versioned_docs/version-2.7.0/cookbooks-non-persistent.md create mode 100644 site2/website/versioned_docs/version-2.7.0/cookbooks-partitioned.md create mode 100644 site2/website/versioned_docs/version-2.7.0/cookbooks-retention-expiry.md create mode 100644 site2/website/versioned_docs/version-2.7.0/cookbooks-tiered-storage.md create mode 100644 site2/website/versioned_docs/version-2.7.0/deploy-aws.md create mode 100644 site2/website/versioned_docs/version-2.7.0/deploy-bare-metal.md create mode 100644 site2/website/versioned_docs/version-2.7.0/deploy-docker.md create mode 100644 site2/website/versioned_docs/version-2.7.0/deploy-monitoring.md create mode 100644 site2/website/versioned_docs/version-2.7.0/developing-binary-protocol.md create mode 100644 site2/website/versioned_docs/version-2.7.0/functions-develop.md create mode 100644 site2/website/versioned_docs/version-2.7.0/functions-package.md create mode 100644 site2/website/versioned_docs/version-2.7.0/functions-runtime.md create mode 100644 site2/website/versioned_docs/version-2.7.0/functions-worker.md create mode 100644 site2/website/versioned_docs/version-2.7.0/getting-started-clients.md create mode 100644 site2/website/versioned_docs/version-2.7.0/getting-started-helm.md create mode 100644 site2/website/versioned_docs/version-2.7.0/helm-deploy.md create mode 100644 site2/website/versioned_docs/version-2.7.0/helm-overview.md create mode 100644 site2/website/versioned_docs/version-2.7.0/helm-upgrade.md create mode 100644 site2/website/versioned_docs/version-2.7.0/io-cli.md create mode 100644 site2/website/versioned_docs/version-2.7.0/io-connectors.md create mode 100644 site2/website/versioned_docs/version-2.7.0/io-hdfs2-sink.md create mode 100644 site2/website/versioned_docs/version-2.7.0/io-nsq-source.md create mode 100644 site2/website/versioned_docs/version-2.7.0/io-quickstart.md create mode 100644 site2/website/versioned_docs/version-2.7.0/io-rabbitmq-source.md create mode 100644 site2/website/versioned_docs/version-2.7.0/io-use.md create mode 100644 site2/website/versioned_docs/version-2.7.0/reference-cli-tools.md create mode 100644 site2/website/versioned_docs/version-2.7.0/reference-configuration.md create mode 100644 site2/website/versioned_docs/version-2.7.0/reference-metrics.md create mode 100644 site2/website/versioned_docs/version-2.7.0/reference-pulsar-admin.md create mode 100644 site2/website/versioned_docs/version-2.7.0/reference-terminology.md create mode 100644 site2/website/versioned_docs/version-2.7.0/schema-get-started.md create mode 100644 site2/website/versioned_docs/version-2.7.0/security-authorization.md create mode 100644 site2/website/versioned_docs/version-2.7.0/security-bouncy-castle.md create mode 100644 site2/website/versioned_docs/version-2.7.0/security-extending.md create mode 100644 site2/website/versioned_docs/version-2.7.0/security-oauth2.md create mode 100644 site2/website/versioned_docs/version-2.7.0/sql-deployment-configurations.md create mode 100644 site2/website/versioned_docs/version-2.7.0/tiered-storage-aws.md create mode 100644 site2/website/versioned_docs/version-2.7.0/tiered-storage-azure.md create mode 100644 site2/website/versioned_docs/version-2.7.0/tiered-storage-filesystem.md create mode 100644 site2/website/versioned_docs/version-2.7.0/tiered-storage-gcs.md create mode 100644 site2/website/versioned_docs/version-2.7.0/tiered-storage-overview.md create mode 100644 site2/website/versioned_docs/version-2.7.0/transaction-api.md create mode 100644 site2/website/versioned_docs/version-2.7.0/transaction-guarantee.md create mode 100644 site2/website/versioned_sidebars/version-2.7.0-sidebars.json diff --git a/site2/website/releases.json b/site2/website/releases.json index 0f1d737ab34f25..b5c1523403cf7c 100644 --- a/site2/website/releases.json +++ b/site2/website/releases.json @@ -1,5 +1,6 @@ [ "2.6.2", + "2.7.0", "2.6.1", "2.6.0", "2.5.2", diff --git a/site2/website/versioned_docs/version-2.7.0/admin-api-brokers.md b/site2/website/versioned_docs/version-2.7.0/admin-api-brokers.md new file mode 100644 index 00000000000000..2d19417bebf013 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/admin-api-brokers.md @@ -0,0 +1,158 @@ +--- +id: version-2.7.0-admin-api-brokers +title: Managing Brokers +sidebar_label: Brokers +original_id: admin-api-brokers +--- + +Pulsar brokers consist of two components: + +1. An HTTP server exposing a {@inject: rest:REST:/} interface administration and [topic](reference-terminology.md#topic) lookup. +2. A dispatcher that handles all Pulsar [message](reference-terminology.md#message) transfers. + +[Brokers](reference-terminology.md#broker) can be managed via: + +* The [`brokers`](reference-pulsar-admin.md#brokers) command of the [`pulsar-admin`](reference-pulsar-admin.md) tool +* The `/admin/v2/brokers` endpoint of the admin {@inject: rest:REST:/} API +* The `brokers` method of the {@inject: javadoc:PulsarAdmin:/admin/org/apache/pulsar/client/admin/PulsarAdmin.html} object in the [Java API](client-libraries-java.md) + +In addition to being configurable when you start them up, brokers can also be [dynamically configured](#dynamic-broker-configuration). + +> See the [Configuration](reference-configuration.md#broker) page for a full listing of broker-specific configuration parameters. + +## Brokers resources + +### List active brokers + +Fetch all available active brokers that are serving traffic. + + + + +```shell +$ pulsar-admin brokers list use +``` + +``` +broker1.use.org.com:8080 +``` + + + +{@inject: endpoint|GET|/admin/v2/brokers/:cluster|operation/getActiveBrokers} + + + +```java +admin.brokers().getActiveBrokers(clusterName) +``` + + + +#### list of namespaces owned by a given broker + +It finds all namespaces which are owned and served by a given broker. + + + + +```shell +$ pulsar-admin brokers namespaces use \ + --url broker1.use.org.com:8080 +``` + +```json +{ + "my-property/use/my-ns/0x00000000_0xffffffff": { + "broker_assignment": "shared", + "is_controlled": false, + "is_active": true + } +} +``` + + +{@inject: endpoint|GET|/admin/v2/brokers/:cluster/:broker/ownedNamespaces|operation/getOwnedNamespaes} + + + +```java +admin.brokers().getOwnedNamespaces(cluster,brokerUrl); +``` + + +### Dynamic broker configuration + +One way to configure a Pulsar [broker](reference-terminology.md#broker) is to supply a [configuration](reference-configuration.md#broker) when the broker is [started up](reference-cli-tools.md#pulsar-broker). + +But since all broker configuration in Pulsar is stored in ZooKeeper, configuration values can also be dynamically updated *while the broker is running*. When you update broker configuration dynamically, ZooKeeper will notify the broker of the change and the broker will then override any existing configuration values. + +* The [`brokers`](reference-pulsar-admin.md#brokers) command for the [`pulsar-admin`](reference-pulsar-admin.md) tool has a variety of subcommands that enable you to manipulate a broker's configuration dynamically, enabling you to [update config values](#update-dynamic-configuration) and more. +* In the Pulsar admin {@inject: rest:REST:/} API, dynamic configuration is managed through the `/admin/v2/brokers/configuration` endpoint. + +### Update dynamic configuration + + + + +The [`update-dynamic-config`](reference-pulsar-admin.md#brokers-update-dynamic-config) subcommand will update existing configuration. It takes two arguments: the name of the parameter and the new value using the `config` and `value` flag respectively. Here's an example for the [`brokerShutdownTimeoutMs`](reference-configuration.md#broker-brokerShutdownTimeoutMs) parameter: + +```shell +$ pulsar-admin brokers update-dynamic-config --config brokerShutdownTimeoutMs --value 100 +``` + + + +{@inject: endpoint|POST|/admin/v2/brokers/configuration/:configName/:configValue|operation/updateDynamicConfiguration} + + + +```java +admin.brokers().updateDynamicConfiguration(configName, configValue); +``` + + +### List updated values + +Fetch a list of all potentially updatable configuration parameters. + + + +```shell +$ pulsar-admin brokers list-dynamic-config +brokerShutdownTimeoutMs +``` + + + +{@inject: endpoint|GET|/admin/v2/brokers/configuration|operation/getDynamicConfigurationName} + + + +```java +admin.brokers().getDynamicConfigurationNames(); +``` + + +### List all + +Fetch a list of all parameters that have been dynamically updated. + + + + +```shell +$ pulsar-admin brokers get-all-dynamic-config +brokerShutdownTimeoutMs:100 +``` + + + +{@inject: endpoint|GET|/admin/v2/brokers/configuration/values|operation/getAllDynamicConfigurations} + + + +```java +admin.brokers().getAllDynamicConfigurations(); +``` + diff --git a/site2/website/versioned_docs/version-2.7.0/admin-api-clusters.md b/site2/website/versioned_docs/version-2.7.0/admin-api-clusters.md new file mode 100644 index 00000000000000..faa5cef5723506 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/admin-api-clusters.md @@ -0,0 +1,222 @@ +--- +id: version-2.7.0-admin-api-clusters +title: Managing Clusters +sidebar_label: Clusters +original_id: admin-api-clusters +--- + +Pulsar clusters consist of one or more Pulsar [brokers](reference-terminology.md#broker), one or more [BookKeeper](reference-terminology.md#bookkeeper) +servers (aka [bookies](reference-terminology.md#bookie)), and a [ZooKeeper](https://zookeeper.apache.org) cluster that provides configuration and coordination management. + +Clusters can be managed via: + +* The [`clusters`](reference-pulsar-admin.md#clusters) command of the [`pulsar-admin`](reference-pulsar-admin.md) tool +* The `/admin/v2/clusters` endpoint of the admin {@inject: rest:REST:/} API +* The `clusters` method of the {@inject: javadoc:PulsarAdmin:/admin/org/apache/pulsar/client/admin/PulsarAdmin} object in the [Java API](client-libraries-java.md) + +## Clusters resources + +### Provision + +New clusters can be provisioned using the admin interface. + +> Please note that this operation requires superuser privileges. + + + + +You can provision a new cluster using the [`create`](reference-pulsar-admin.md#clusters-create) subcommand. Here's an example: + +```shell +$ pulsar-admin clusters create cluster-1 \ + --url http://my-cluster.org.com:8080 \ + --broker-url pulsar://my-cluster.org.com:6650 +``` + + + +{@inject: endpoint|PUT|/admin/v2/clusters/:cluster|operation/createCluster} + + + +```java +ClusterData clusterData = new ClusterData( + serviceUrl, + serviceUrlTls, + brokerServiceUrl, + brokerServiceUrlTls +); +admin.clusters().createCluster(clusterName, clusterData); +``` + + +### Initialize cluster metadata + +When provision a new cluster, you need to initialize that cluster's [metadata](concepts-architecture-overview.md#metadata-store). When initializing cluster metadata, you need to specify all of the following: + +* The name of the cluster +* The local ZooKeeper connection string for the cluster +* The configuration store connection string for the entire instance +* The web service URL for the cluster +* A broker service URL enabling interaction with the [brokers](reference-terminology.md#broker) in the cluster + +You must initialize cluster metadata *before* starting up any [brokers](admin-api-brokers.md) that will belong to the cluster. + +> **No cluster metadata initialization through the REST API or the Java admin API** +> +> Unlike most other admin functions in Pulsar, cluster metadata initialization cannot be performed via the admin REST API +> or the admin Java client, as metadata initialization involves communicating with ZooKeeper directly. +> Instead, you can use the [`pulsar`](reference-cli-tools.md#pulsar) CLI tool, in particular +> the [`initialize-cluster-metadata`](reference-cli-tools.md#pulsar-initialize-cluster-metadata) command. + +Here's an example cluster metadata initialization command: + +```shell +bin/pulsar initialize-cluster-metadata \ + --cluster us-west \ + --zookeeper zk1.us-west.example.com:2181 \ + --configuration-store zk1.us-west.example.com:2184 \ + --web-service-url http://pulsar.us-west.example.com:8080/ \ + --web-service-url-tls https://pulsar.us-west.example.com:8443/ \ + --broker-service-url pulsar://pulsar.us-west.example.com:6650/ \ + --broker-service-url-tls pulsar+ssl://pulsar.us-west.example.com:6651/ +``` + +You'll need to use `--*-tls` flags only if you're using [TLS authentication](security-tls-authentication.md) in your instance. + +### Get configuration + +You can fetch the [configuration](reference-configuration.md) for an existing cluster at any time. + + + + +Use the [`get`](reference-pulsar-admin.md#clusters-get) subcommand and specify the name of the cluster. Here's an example: + +```shell +$ pulsar-admin clusters get cluster-1 +{ + "serviceUrl": "http://my-cluster.org.com:8080/", + "serviceUrlTls": null, + "brokerServiceUrl": "pulsar://my-cluster.org.com:6650/", + "brokerServiceUrlTls": null + "peerClusterNames": null +} +``` + + + +{@inject: endpoint|GET|/admin/v2/clusters/:cluster|operation/getCluster} + + + +```java +admin.clusters().getCluster(clusterName); +``` + + +### Update + +You can update the configuration for an existing cluster at any time. + + + + +Use the [`update`](reference-pulsar-admin.md#clusters-update) subcommand and specify new configuration values using flags. + +```shell +$ pulsar-admin clusters update cluster-1 \ + --url http://my-cluster.org.com:4081 \ + --broker-url pulsar://my-cluster.org.com:3350 +``` + + + +{@inject: endpoint|POST|/admin/v2/clusters/:cluster|operation/updateCluster} + + + +```java +ClusterData clusterData = new ClusterData( + serviceUrl, + serviceUrlTls, + brokerServiceUrl, + brokerServiceUrlTls +); +admin.clusters().updateCluster(clusterName, clusterData); +``` + + +### Delete + +Clusters can be deleted from a Pulsar [instance](reference-terminology.md#instance). + + + + +Use the [`delete`](reference-pulsar-admin.md#clusters-delete) subcommand and specify the name of the cluster. + +``` +$ pulsar-admin clusters delete cluster-1 +``` + + + +{@inject: endpoint|DELETE|/admin/v2/clusters/:cluster|operation/deleteCluster} + + + +```java +admin.clusters().deleteCluster(clusterName); +``` + + +### List + +You can fetch a list of all clusters in a Pulsar [instance](reference-terminology.md#instance). + + + + +Use the [`list`](reference-pulsar-admin.md#clusters-list) subcommand. + +```shell +$ pulsar-admin clusters list +cluster-1 +cluster-2 +``` + + + +{@inject: endpoint|GET|/admin/v2/clusters|operation/getClusters} + + + +```java +admin.clusters().getClusters(); +``` + + +### Update peer-cluster data + +Peer clusters can be configured for a given cluster in a Pulsar [instance](reference-terminology.md#instance). + + + + +Use the [`update-peer-clusters`](reference-pulsar-admin.md#clusters-update-peer-clusters) subcommand and specify the list of peer-cluster names. + +``` +$ pulsar-admin update-peer-clusters cluster-1 --peer-clusters cluster-2 +``` + + + +{@inject: endpoint|POST|/admin/v2/clusters/:cluster/peers|operation/setPeerClusterNames} + + + +```java +admin.clusters().updatePeerClusterNames(clusterName, peerClusterList); +``` + \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/admin-api-functions.md b/site2/website/versioned_docs/version-2.7.0/admin-api-functions.md new file mode 100644 index 00000000000000..fb919251b58325 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/admin-api-functions.md @@ -0,0 +1,579 @@ +--- +id: version-2.7.0-admin-api-functions +title: Manage Functions +sidebar_label: Functions +original_id: admin-api-functions +--- + +**Pulsar Functions** are lightweight compute processes that + +* consume messages from one or more Pulsar topics +* apply a user-supplied processing logic to each message +* publish the results of the computation to another topic + +Functions can be managed via the following methods. + +Method | Description +---|--- +**Admin CLI** | The [`functions`](reference-pulsar-admin.md#functions) command of the [`pulsar-admin`](reference-pulsar-admin.md) tool. +**REST API** |The `/admin/v3/functions` endpoint of the admin {@inject: rest:REST:/} API. +**Java Admin API**| The `functions` method of the {@inject: javadoc:PulsarAdmin:/admin/org/apache/pulsar/client/admin/PulsarAdmin} object in the [Java API](client-libraries-java.md). + +## Function resources + +You can perform the following operations on functions. + +### Create a function + +You can create a Pulsar function in cluster mode (deploy it on a Pulsar cluster) using Admin CLI, REST API or Java Admin API. + + + + +Use the [`create`](reference-pulsar-admin.md#functions-create) subcommand. + +**Example** + +```shell +$ pulsar-admin functions create \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) \ + --inputs test-input-topic \ + --output persistent://public/default/test-output-topic \ + --classname org.apache.pulsar.functions.api.examples.ExclamationFunction \ + --jar /examples/api-examples.jar +``` + + + +{@inject: endpoint|POST|/admin/v3/functions/{tenant}/{namespace}/{functionName} + + + +```java +FunctionConfig functionConfig = new FunctionConfig(); +functionConfig.setTenant(tenant); +functionConfig.setNamespace(namespace); +functionConfig.setName(functionName); +functionConfig.setRuntime(FunctionConfig.Runtime.JAVA); +functionConfig.setParallelism(1); +functionConfig.setClassName("org.apache.pulsar.functions.api.examples.ExclamationFunction"); +functionConfig.setProcessingGuarantees(FunctionConfig.ProcessingGuarantees.ATLEAST_ONCE); +functionConfig.setTopicsPattern(sourceTopicPattern); +functionConfig.setSubName(subscriptionName); +functionConfig.setAutoAck(true); +functionConfig.setOutput(sinkTopic); +admin.functions().createFunction(functionConfig, fileName); +``` + + +### Update a function + +You can update a Pulsar function that has been deployed to a Pulsar cluster using Admin CLI, REST API or Java Admin API. + + + + +Use the [`update`](reference-pulsar-admin.md#functions-update) subcommand. + +**Example** + +```shell +$ pulsar-admin functions update \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) \ + --output persistent://public/default/update-output-topic \ + # other options +``` + + + +{@inject: endpoint|PUT|/admin/v3/functions/{tenant}/{namespace}/{functionName} + + + +```java +FunctionConfig functionConfig = new FunctionConfig(); +functionConfig.setTenant(tenant); +functionConfig.setNamespace(namespace); +functionConfig.setName(functionName); +functionConfig.setRuntime(FunctionConfig.Runtime.JAVA); +functionConfig.setParallelism(1); +functionConfig.setClassName("org.apache.pulsar.functions.api.examples.ExclamationFunction"); +UpdateOptions updateOptions = new UpdateOptions(); +updateOptions.setUpdateAuthData(updateAuthData); +admin.functions().updateFunction(functionConfig, userCodeFile, updateOptions); +``` + + +### Start an instance of a function + +You can start a stopped function instance with `instance-id` using Admin CLI, REST API or Java Admin API. + + + + +Use the [`start`](reference-pulsar-admin.md#functions-start) subcommand. + +```shell +$ pulsar-admin functions start \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) \ + --instance-id 1 +``` + + + +{@inject: endpoint|POST|/admin/v3/functions/{tenant}/{namespace}/{functionName}/{instanceId}/start + + + +```java +admin.functions().startFunction(tenant, namespace, functionName, Integer.parseInt(instanceId)); +``` + + +### Start all instances of a function + +You can start all stopped function instances using Admin CLI, REST API or Java Admin API. + + + + +Use the [`start`](reference-pulsar-admin.md#functions-start) subcommand. + +**Example** + +```shell +$ pulsar-admin functions start \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) \ +``` + + + +{@inject: endpoint|POST|/admin/v3/functions/{tenant}/{namespace}/{functionName}/start + + + +```java +admin.functions().startFunction(tenant, namespace, functionName); +``` + + +### Stop an instance of a function + +You can stop a function instance with `instance-id` using Admin CLI, REST API or Java Admin API. + + + + +Use the [`stop`](reference-pulsar-admin.md#functions-stop) subcommand. + +**Example** + +```shell +$ pulsar-admin functions stop \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) \ + --instance-id 1 +``` + + + +{@inject: endpoint|POST|/admin/v3/functions/{tenant}/{namespace}/{functionName}/{instanceId}/stop + + + +```java +admin.functions().stopFunction(tenant, namespace, functionName, Integer.parseInt(instanceId)); +``` + + +### Stop all instances of a function + +You can stop all function instances using Admin CLI, REST API or Java Admin API. + + + + +Use the [`stop`](reference-pulsar-admin.md#functions-stop) subcommand. + +**Example** + +```shell +$ pulsar-admin functions stop \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) \ +``` + + + +{@inject: endpoint|POST|/admin/v3/functions/{tenant}/{namespace}/{functionName}/stop + + + +```java +admin.functions().stopFunction(tenant, namespace, functionName); +``` + + +### Restart an instance of a function + +Restart a function instance with `instance-id` using Admin CLI, REST API or Java Admin API. + + + + +Use the [`restart`](reference-pulsar-admin.md#functions-restart) subcommand. + +**Example** + +```shell +$ pulsar-admin functions restart \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) \ + --instance-id 1 +``` + + + +{@inject: endpoint|POST|/admin/v3/functions/{tenant}/{namespace}/{functionName}/{instanceId}/restart + + + +```java +admin.functions().restartFunction(tenant, namespace, functionName, Integer.parseInt(instanceId)); +``` + + +### Restart all instances of a function + +You can restart all function instances using Admin CLI, REST API or Java admin API. + + + + +Use the [`restart`](reference-pulsar-admin.md#functions-restart) subcommand. + +**Example** + +```shell +$ pulsar-admin functions restart \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) \ +``` + + + +{@inject: endpoint|POST|/admin/v3/functions/{tenant}/{namespace}/{functionName}/restart + + + +```java +admin.functions().restartFunction(tenant, namespace, functionName); +``` + + +### List all functions + +You can list all Pulsar functions running under a specific tenant and namespace using Admin CLI, REST API or Java Admin API. + + + + +Use the [`list`](reference-pulsar-admin.md#functions-list) subcommand. + +**Example** + +```shell +$ pulsar-admin functions list \ + --tenant public \ + --namespace default +``` + + + +{@inject: endpoint|GET|/admin/v3/functions/{tenant}/{namespace} + + + +```java +admin.functions().getFunctions(tenant, namespace); +``` + + +### Delete a function + +You can delete a Pulsar function that is running on a Pulsar cluster using Admin CLI, REST API or Java Admin API. + + + + +Use the [`delete`](reference-pulsar-admin.md#functions-delete) subcommand. + +**Example** + +```shell +$ pulsar-admin functions delete \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) +``` + + + +{@inject: endpoint|DELETE|/admin/v3/functions/{tenant}/{namespace}/{functionName} + + + +```java +admin.functions().deleteFunction(tenant, namespace, functionName); +``` + + +### Get info about a function + +You can get information about a Pulsar function currently running in cluster mode using Admin CLI, REST API or Java Admin API. + + + + +Use the [`get`](reference-pulsar-admin.md#functions-get) subcommand. + +**Example** + +```shell +$ pulsar-admin functions get \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) +``` + + + +{@inject: endpoint|GET|/admin/v3/functions/{tenant}/{namespace}/{functionName} + + + +```java +admin.functions().getFunction(tenant, namespace, functionName); +``` + + +### Get status of an instance of a function + +You can get the current status of a Pulsar function instance with `instance-id` using Admin CLI, REST API or Java Admin API. + + + +Use the [`status`](reference-pulsar-admin.md#functions-status) subcommand. + +**Example** + +```shell +$ pulsar-admin functions status \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) \ + --instance-id 1 +``` + + + +{@inject: endpoint|GET|/admin/v3/functions/{tenant}/{namespace}/{functionName}/{instanceId}/status + + + +```java +admin.functions().getFunctionStatus(tenant, namespace, functionName, Integer.parseInt(instanceId)); +``` + + +### Get status of all instances of a function + +You can get the current status of a Pulsar function instance using Admin CLI, REST API or Java Admin API. + + + + +Use the [`status`](reference-pulsar-admin.md#functions-status) subcommand. + +**Example** + +```shell +$ pulsar-admin functions status \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) +``` + + + +{@inject: endpoint|GET|/admin/v3/functions/{tenant}/{namespace}/{functionName}/status + + + +```java +admin.functions().getFunctionStatus(tenant, namespace, functionName); +``` + + +### Get stats of an instance of a function + +You can get the current stats of a Pulsar Function instance with `instance-id` using Admin CLI, REST API or Java admin API. + + + +Use the [`stats`](reference-pulsar-admin.md#functions-stats) subcommand. + +**Example** + +```shell +$ pulsar-admin functions stats \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) \ + --instance-id 1 +``` + + + +{@inject: endpoint|GET|/admin/v3/functions/{tenant}/{namespace}/{functionName}/{instanceId}/stats + + + +```java +admin.functions().getFunctionStats(tenant, namespace, functionName, Integer.parseInt(instanceId)); +``` + + +### Get stats of all instances of a function + +You can get the current stats of a Pulsar function using Admin CLI, REST API or Java admin API. + + + + +Use the [`stats`](reference-pulsar-admin.md#functions-stats) subcommand. + +**Example** + +```shell +$ pulsar-admin functions stats \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) +``` + + + +{@inject: endpoint|GET|/admin/v3/functions/{tenant}/{namespace}/{functionName}/stats + + + +```java +admin.functions().getFunctionStats(tenant, namespace, functionName); +``` + + +### Trigger a function + +You can trigger a specified Pulsar function with a supplied value using Admin CLI, REST API or Java admin API. + + + + +Use the [`trigger`](reference-pulsar-admin.md#functions-trigger) subcommand. + +**Example** + +```shell +$ pulsar-admin functions trigger \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) \ + --topic (the name of input topic) \ + --trigger-value \"hello pulsar\" + # or --trigger-file (the path of trigger file) +``` + + +{@inject: endpoint|POST|/admin/v3/functions/{tenant}/{namespace}/{functionName}/trigger + + + +```java +admin.functions().triggerFunction(tenant, namespace, functionName, topic, triggerValue, triggerFile); +``` + + +### Put state associated with a function + +You can put the state associated with a Pulsar function using Admin CLI, REST API or Java admin API. + + + + +Use the [`putstate`](reference-pulsar-admin.md#functions-putstate) subcommand. + +**Example** + +```shell +$ pulsar-admin functions putstate \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) \ + --state "{\"key\":\"pulsar\", \"stringValue\":\"hello pulsar\"}" +``` + + + +{@inject: endpoint|POST|/admin/v3/functions/{tenant}/{namespace}/{functionName}/state/{key} + + + +```java +TypeReference typeRef = new TypeReference() {}; +FunctionState stateRepr = ObjectMapperFactory.getThreadLocal().readValue(state, typeRef); +admin.functions().putFunctionState(tenant, namespace, functionName, stateRepr); +``` + + +### Fetch state associated with a function + +You can fetch the current state associated with a Pulsar function using Admin CLI, REST API or Java admin API. + + + + +Use the [`querystate`](reference-pulsar-admin.md#functions-querystate) subcommand. + +**Example** + +```shell +$ pulsar-admin functions querystate \ + --tenant public \ + --namespace default \ + --name (the name of Pulsar Functions) \ + --key (the key of state) +``` + + + +{@inject: endpoint|GET|/admin/v3/functions/{tenant}/{namespace}/{functionName}/state/{key} + + + +```java +admin.functions().getFunctionState(tenant, namespace, functionName, key); +``` + \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/admin-api-namespaces.md b/site2/website/versioned_docs/version-2.7.0/admin-api-namespaces.md new file mode 100644 index 00000000000000..b442adf5da1a6a --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/admin-api-namespaces.md @@ -0,0 +1,886 @@ +--- +id: version-2.7.0-admin-api-namespaces +title: Managing Namespaces +sidebar_label: Namespaces +original_id: admin-api-namespaces +--- + +Pulsar [namespaces](reference-terminology.md#namespace) are logical groupings of [topics](reference-terminology.md#topic). + +Namespaces can be managed via: + +* The [`namespaces`](reference-pulsar-admin.md#clusters) command of the [`pulsar-admin`](reference-pulsar-admin.md) tool +* The `/admin/v2/namespaces` endpoint of the admin {@inject: rest:REST:/} API +* The `namespaces` method of the {@inject: javadoc:PulsarAdmin:/admin/org/apache/pulsar/client/admin/PulsarAdmin} object in the [Java API](client-libraries-java.md) + +## Namespaces resources + +### Create + +You can create new namespaces under a given [tenant](reference-terminology.md#tenant). + + + + +Use the [`create`](reference-pulsar-admin.md#namespaces-create) subcommand and specify the namespace by name: + +```shell +$ pulsar-admin namespaces create test-tenant/test-namespace +``` + + + +{@inject: endpoint|PUT|/admin/v2/namespaces/:tenant/:namespace|operation/createNamespace} + + + +```java +admin.namespaces().createNamespace(namespace); +``` + + +### Get policies + +You can fetch the current policies associated with a namespace at any time. + + + + +Use the [`policies`](reference-pulsar-admin.md#namespaces-policies) subcommand and specify the namespace: + +```shell +$ pulsar-admin namespaces policies test-tenant/test-namespace +{ + "auth_policies": { + "namespace_auth": {}, + "destination_auth": {} + }, + "replication_clusters": [], + "bundles_activated": true, + "bundles": { + "boundaries": [ + "0x00000000", + "0xffffffff" + ], + "numBundles": 1 + }, + "backlog_quota_map": {}, + "persistence": null, + "latency_stats_sample_rate": {}, + "message_ttl_in_seconds": 0, + "retention_policies": null, + "deleted": false +} +``` + + + +{@inject: endpoint|GET|/admin/v2/namespaces/:tenant/:namespace|operation/getPolicies} + + + +```java +admin.namespaces().getPolicies(namespace); +``` + + +### List namespaces within a tenant + +You can list all namespaces within a given Pulsar [tenant](reference-terminology.md#tenant). + + + + +Use the [`list`](reference-pulsar-admin.md#namespaces-list) subcommand and specify the tenant: + +```shell +$ pulsar-admin namespaces list test-tenant +test-tenant/ns1 +test-tenant/ns2 +``` + + + +{@inject: endpoint|GET|/admin/v2/namespaces/:tenant|operation/getTenantNamespaces} + + + +```java +admin.namespaces().getNamespaces(tenant); +``` + + +### Delete + +You can delete existing namespaces from a tenant. + + + + +Use the [`delete`](reference-pulsar-admin.md#namespaces-delete) subcommand and specify the namespace: + +```shell +$ pulsar-admin namespaces delete test-tenant/ns1 +``` + + + +{@inject: endpoint|DELETE|/admin/v2/namespaces/:tenant/:namespace|operation/deleteNamespace} + + + +```java +admin.namespaces().deleteNamespace(namespace); +``` + + +#### Set replication cluster + +It sets replication clusters for a namespace, so Pulsar can internally replicate publish message from one colo to another colo. + + + + +``` +$ pulsar-admin namespaces set-clusters test-tenant/ns1 \ + --clusters cl1 +``` + + + +``` +{@inject: endpoint|POST|/admin/v2/namespaces/:tenant/:namespace/replication|operation/setNamespaceReplicationClusters} +``` + + + +```java +admin.namespaces().setNamespaceReplicationClusters(namespace, clusters); +``` + + +#### Get replication cluster + +It gives a list of replication clusters for a given namespace. + + + + +``` +$ pulsar-admin namespaces get-clusters test-tenant/cl1/ns1 +``` + +``` +cl2 +``` + + + +``` +{@inject: endpoint|GET|/admin/v2/namespaces/{tenant}/{namespace}/replication|operation/getNamespaceReplicationClusters} +``` + + + +```java +admin.namespaces().getNamespaceReplicationClusters(namespace) +``` + + +#### Set backlog quota policies + +Backlog quota helps the broker to restrict bandwidth/storage of a namespace once it reaches a certain threshold limit. Admin can set the limit and take corresponding action after the limit is reached. + + 1. producer_request_hold: broker will hold and not persist produce request payload + + 2. producer_exception: broker disconnects with the client by giving an exception. + + 3. consumer_backlog_eviction: broker will start discarding backlog messages + + Backlog quota restriction can be taken care by defining restriction of backlog-quota-type: destination_storage + + + + +``` +$ pulsar-admin namespaces set-backlog-quota --limit 10 --policy producer_request_hold test-tenant/ns1 +``` + +``` +N/A +``` + + + +``` +{@inject: endpoint|POST|/admin/v2/namespaces/{tenant}/{namespace}/backlogQuota|operation/setBacklogQuota} +``` + + + +```java +admin.namespaces().setBacklogQuota(namespace, new BacklogQuota(limit, policy)) +``` + + +#### Get backlog quota policies + +It shows a configured backlog quota for a given namespace. + + + + +``` +$ pulsar-admin namespaces get-backlog-quotas test-tenant/ns1 +``` + +```json +{ + "destination_storage": { + "limit": 10, + "policy": "producer_request_hold" + } +} +``` + + + +``` +{@inject: endpoint|GET|/admin/v2/namespaces/{tenant}/{namespace}/backlogQuotaMap|operation/getBacklogQuotaMap} +``` + + + +```java +admin.namespaces().getBacklogQuotaMap(namespace); +``` + + +#### Remove backlog quota policies + +It removes backlog quota policies for a given namespace + + + + +``` +$ pulsar-admin namespaces remove-backlog-quota test-tenant/ns1 +``` + +``` +N/A +``` + + + +``` +{@inject: endpoint|DELETE|/admin/v2/namespaces/{tenant}/{namespace}/backlogQuota|operation/removeBacklogQuota} +``` + + + +```java +admin.namespaces().removeBacklogQuota(namespace, backlogQuotaType) +``` + + +#### Set persistence policies + +Persistence policies allow to configure persistency-level for all topic messages under a given namespace. + + - Bookkeeper-ack-quorum: Number of acks (guaranteed copies) to wait for each entry, default: 0 + + - Bookkeeper-ensemble: Number of bookies to use for a topic, default: 0 + + - Bookkeeper-write-quorum: How many writes to make of each entry, default: 0 + + - Ml-mark-delete-max-rate: Throttling rate of mark-delete operation (0 means no throttle), default: 0.0 + + + + +``` +$ pulsar-admin namespaces set-persistence --bookkeeper-ack-quorum 2 --bookkeeper-ensemble 3 --bookkeeper-write-quorum 2 --ml-mark-delete-max-rate 0 test-tenant/ns1 +``` + +``` +N/A +``` + + + +``` +{@inject: endpoint|POST|/admin/v2/namespaces/{tenant}/{namespace}/persistence|operation/setPersistence} +``` + + + +```java +admin.namespaces().setPersistence(namespace,new PersistencePolicies(bookkeeperEnsemble, bookkeeperWriteQuorum,bookkeeperAckQuorum,managedLedgerMaxMarkDeleteRate)) +``` + + +#### Get persistence policies + +It shows the configured persistence policies of a given namespace. + + + + +``` +$ pulsar-admin namespaces get-persistence test-tenant/ns1 +``` + +```json +{ + "bookkeeperEnsemble": 3, + "bookkeeperWriteQuorum": 2, + "bookkeeperAckQuorum": 2, + "managedLedgerMaxMarkDeleteRate": 0 +} +``` + + + +``` +{@inject: endpoint|GET|/admin/v2/namespaces/{tenant}/{namespace}/persistence|operation/getPersistence} +``` + + + +```java +admin.namespaces().getPersistence(namespace) +``` + + +#### Unload namespace bundle + +The namespace bundle is a virtual group of topics which belong to the same namespace. If the broker gets overloaded with the number of bundles, this command can help unload a bundle from that broker, so it can be served by some other less-loaded brokers. The namespace bundle ID ranges from 0x00000000 to 0xffffffff. + + + + +``` +$ pulsar-admin namespaces unload --bundle 0x00000000_0xffffffff test-tenant/ns1 +``` + +``` +N/A +``` + + + +``` +{@inject: endpoint|PUT|/admin/v2/namespaces/{tenant}/{namespace}/{bundle}/unload|operation/unloadNamespaceBundle} +``` + + + +```java +admin.namespaces().unloadNamespaceBundle(namespace, bundle) +``` + + +#### Set message-ttl + +It configures message’s time to live (in seconds) duration. + + + + +``` +$ pulsar-admin namespaces set-message-ttl --messageTTL 100 test-tenant/ns1 +``` + +``` +N/A +``` + + + +``` +{@inject: endpoint|POST|/admin/v2/namespaces/{tenant}/{namespace}/messageTTL|operation/setNamespaceMessageTTL} +``` + + + +```java +admin.namespaces().setNamespaceMessageTTL(namespace, messageTTL) +``` + + +#### Get message-ttl + +It gives a message ttl of configured namespace. + + + + +``` +$ pulsar-admin namespaces get-message-ttl test-tenant/ns1 +``` + +``` +100 +``` + + + +``` +{@inject: endpoint|GET|/admin/v2/namespaces/{tenant}/{namespace}/messageTTL|operation/getNamespaceMessageTTL} +``` + + + +```java +admin.namespaces().getNamespaceMessageTTL(namespace) +``` + + +#### Split bundle + +Each namespace bundle can contain multiple topics and each bundle can be served by only one broker. +If a single bundle is creating an excessive load on a broker, an admin splits the bundle using this command permitting one or more of the new bundles to be unloaded thus spreading the load across the brokers. + + + + +``` +$ pulsar-admin namespaces split-bundle --bundle 0x00000000_0xffffffff test-tenant/ns1 +``` + +``` +N/A +``` + + + +``` +{@inject: endpoint|PUT|/admin/v2/namespaces/{tenant}/{namespace}/{bundle}/split|operation/splitNamespaceBundle} +``` + + + +```java +admin.namespaces().splitNamespaceBundle(namespace, bundle) +``` + + +#### Clear backlog + +It clears all message backlog for all the topics that belong to a specific namespace. You can also clear backlog for a specific subscription as well. + + + + +``` +$ pulsar-admin namespaces clear-backlog --sub my-subscription test-tenant/ns1 +``` + +``` +N/A +``` + + + +``` +{@inject: endpoint|POST|/admin/v2/namespaces/{tenant}/{namespace}/clearBacklog|operation/clearNamespaceBacklogForSubscription} +``` + + + +```java +admin.namespaces().clearNamespaceBacklogForSubscription(namespace, subscription) +``` + + +#### Clear bundle backlog + +It clears all message backlog for all the topics that belong to a specific NamespaceBundle. You can also clear backlog for a specific subscription as well. + + + + +``` +$ pulsar-admin namespaces clear-backlog --bundle 0x00000000_0xffffffff --sub my-subscription test-tenant/ns1 +``` + +``` +N/A +``` + + + +``` +{@inject: endpoint|POST|/admin/v2/namespaces/{tenant}/{namespace}/{bundle}/clearBacklog|operation/clearNamespaceBundleBacklogForSubscription} +``` + + + +```java +admin.namespaces().clearNamespaceBundleBacklogForSubscription(namespace, bundle, subscription) +``` + + +#### Set retention + +Each namespace contains multiple topics and the retention size (storage size) of each topic should not exceed a specific threshold or it should be stored for a certain period. This command helps configure the retention size and time of topics in a given namespace. + + + + +``` +$ pulsar-admin set-retention --size 10 --time 100 test-tenant/ns1 +``` + +``` +N/A +``` + + + +``` +{@inject: endpoint|POST|/admin/v2/namespaces/{tenant}/{namespace}/retention|operation/setRetention} +``` + + + +```java +admin.namespaces().setRetention(namespace, new RetentionPolicies(retentionTimeInMin, retentionSizeInMB)) +``` + + +#### Get retention + +It shows retention information of a given namespace. + + + + +``` +$ pulsar-admin namespaces get-retention test-tenant/ns1 +``` + +```json +{ + "retentionTimeInMinutes": 10, + "retentionSizeInMB": 100 +} +``` + + + +``` +{@inject: endpoint|GET|/admin/v2/namespaces/{tenant}/{namespace}/retention|operation/getRetention} +``` + + + +```java +admin.namespaces().getRetention(namespace) +``` + + +#### Set dispatch throttling + +It sets message dispatch rate for all the topics under a given namespace. +The dispatch rate can be restricted by the number of messages per X seconds (`msg-dispatch-rate`) or by the number of message-bytes per X second (`byte-dispatch-rate`). +dispatch rate is in second and it can be configured with `dispatch-rate-period`. Default value of `msg-dispatch-rate` and `byte-dispatch-rate` is -1 which +disables the throttling. + +> **Note** +> - If neither `clusterDispatchRate` nor `topicDispatchRate` is configured, dispatch throttling is disabled. +> > +> - If `topicDispatchRate` is not configured, `clusterDispatchRate` takes effect. +> > +> - If `topicDispatchRate` is configured, `topicDispatchRate` takes effect. + + + + +``` +$ pulsar-admin namespaces set-dispatch-rate test-tenant/ns1 \ + --msg-dispatch-rate 1000 \ + --byte-dispatch-rate 1048576 \ + --dispatch-rate-period 1 +``` + + + +``` +{@inject: endpoint|POST|/admin/v2/namespaces/{tenant}/{namespace}/dispatchRate|operation/setDispatchRate} +``` + + + +```java +admin.namespaces().setDispatchRate(namespace, new DispatchRate(1000, 1048576, 1)) +``` + + +#### Get configured message-rate + +It shows configured message-rate for the namespace (topics under this namespace can dispatch this many messages per second) + + + + +``` +$ pulsar-admin namespaces get-dispatch-rate test-tenant/ns1 +``` + +```json +{ + "dispatchThrottlingRatePerTopicInMsg" : 1000, + "dispatchThrottlingRatePerTopicInByte" : 1048576, + "ratePeriodInSecond" : 1 +} +``` + + + +``` +{@inject: endpoint|GET|/admin/v2/namespaces/{tenant}/{namespace}/dispatchRate|operation/getDispatchRate} +``` + + + +```java +admin.namespaces().getDispatchRate(namespace) +``` + + +#### Set dispatch throttling for subscription + +It sets message dispatch rate for all the subscription of topics under a given namespace. +The dispatch rate can be restricted by the number of messages per X seconds (`msg-dispatch-rate`) or by the number of message-bytes per X second (`byte-dispatch-rate`). +dispatch rate is in second and it can be configured with `dispatch-rate-period`. Default value of `msg-dispatch-rate` and `byte-dispatch-rate` is -1 which +disables the throttling. + + + + +``` +$ pulsar-admin namespaces set-subscription-dispatch-rate test-tenant/ns1 \ + --msg-dispatch-rate 1000 \ + --byte-dispatch-rate 1048576 \ + --dispatch-rate-period 1 +``` + + + +``` +{@inject: endpoint|POST|/admin/v2/namespaces/{tenant}/{namespace}/subscriptionDispatchRate|operation/setDispatchRate} +``` + + + +```java +admin.namespaces().setSubscriptionDispatchRate(namespace, new DispatchRate(1000, 1048576, 1)) +``` + + +#### Get configured message-rate + +It shows configured message-rate for the namespace (topics under this namespace can dispatch this many messages per second) + + + + +``` +$ pulsar-admin namespaces get-subscription-dispatch-rate test-tenant/ns1 +``` + +```json +{ + "dispatchThrottlingRatePerTopicInMsg" : 1000, + "dispatchThrottlingRatePerTopicInByte" : 1048576, + "ratePeriodInSecond" : 1 +} +``` + + + +``` +{@inject: endpoint|GET|/admin/v2/namespaces/{tenant}/{namespace}/subscriptionDispatchRate|operation/getDispatchRate} +``` + + + +```java +admin.namespaces().getSubscriptionDispatchRate(namespace) +``` + + +#### Set dispatch throttling for replicator + +It sets message dispatch rate for all the replicator between replication clusters under a given namespace. +The dispatch rate can be restricted by the number of messages per X seconds (`msg-dispatch-rate`) or by the number of message-bytes per X second (`byte-dispatch-rate`). +dispatch rate is in second and it can be configured with `dispatch-rate-period`. Default value of `msg-dispatch-rate` and `byte-dispatch-rate` is -1 which +disables the throttling. + + + + +``` +$ pulsar-admin namespaces set-replicator-dispatch-rate test-tenant/ns1 \ + --msg-dispatch-rate 1000 \ + --byte-dispatch-rate 1048576 \ + --dispatch-rate-period 1 +``` + + + +``` +{@inject: endpoint|POST|/admin/v2/namespaces/{tenant}/{namespace}/replicatorDispatchRate|operation/setDispatchRate} +``` + + + +```java +admin.namespaces().setReplicatorDispatchRate(namespace, new DispatchRate(1000, 1048576, 1)) +``` + + +#### Get configured message-rate + +It shows configured message-rate for the namespace (topics under this namespace can dispatch this many messages per second) + + + + +``` +$ pulsar-admin namespaces get-replicator-dispatch-rate test-tenant/ns1 +``` + +```json +{ + "dispatchThrottlingRatePerTopicInMsg" : 1000, + "dispatchThrottlingRatePerTopicInByte" : 1048576, + "ratePeriodInSecond" : 1 +} +``` + + + +``` +{@inject: endpoint|GET|/admin/v2/namespaces/{tenant}/{namespace}/replicatorDispatchRate|operation/getDispatchRate} +``` + + + +```java +admin.namespaces().getReplicatorDispatchRate(namespace) +``` + + +#### Get deduplication snapshot interval + +It shows configured `deduplicationSnapshotInterval` for a namespace (Each topic under the namespace will take a deduplication snapshot according to this interval) + + + + +``` +$ pulsar-admin namespaces get-deduplication-snapshot-interval test-tenant/ns1 +``` + + + +``` +{@inject: endpoint|GET|/admin/v2/namespaces/{tenant}/{namespace}/deduplicationSnapshotInterval} +``` + + + +```java +admin.namespaces().getDeduplicationSnapshotInterval(namespace) +``` + + +#### Set deduplication snapshot interval + +Set configured `deduplicationSnapshotInterval` for a namespace. Each topic under the namespace will take a deduplication snapshot according to this interval. +`brokerDeduplicationEnabled` must be set to `true` for this property to take effect. + + + + +``` +$ pulsar-admin namespaces set-deduplication-snapshot-interval test-tenant/ns1 --interval 1000 +``` + + + +``` +{@inject: endpoint|POST|/admin/v2/namespaces/{tenant}/{namespace}/deduplicationSnapshotInterval} +``` + +```json +{ + "interval": 1000 +} +``` + + + +```java +admin.namespaces().setDeduplicationSnapshotInterval(namespace, 1000) +``` + + +#### Remove deduplication snapshot interval + +Remove configured `deduplicationSnapshotInterval` of a namespace (Each topic under the namespace will take a deduplication snapshot according to this interval) + + + + +``` +$ pulsar-admin namespaces remove-deduplication-snapshot-interval test-tenant/ns1 +``` + + + +``` +{@inject: endpoint|POST|/admin/v2/namespaces/{tenant}/{namespace}/deduplicationSnapshotInterval} +``` + + + +```java +admin.namespaces().removeDeduplicationSnapshotInterval(namespace) +``` + + +### Namespace isolation + +Coming soon. + +### Unloading from a broker + +You can unload a namespace, or a [namespace bundle](reference-terminology.md#namespace-bundle), from the Pulsar [broker](reference-terminology.md#broker) that is currently responsible for it. + +#### pulsar-admin + +Use the [`unload`](reference-pulsar-admin.md#unload) subcommand of the [`namespaces`](reference-pulsar-admin.md#namespaces) command. + + + + +```shell +$ pulsar-admin namespaces unload my-tenant/my-ns +``` + + + +``` +{@inject: endpoint|PUT|/admin/v2/namespaces/{tenant}/{namespace}/unload|operation/unloadNamespace} +``` + + + +```java +admin.namespaces().unload(namespace) +``` + \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/admin-api-non-partitioned-topics.md b/site2/website/versioned_docs/version-2.7.0/admin-api-non-partitioned-topics.md new file mode 100644 index 00000000000000..c20551e6369801 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/admin-api-non-partitioned-topics.md @@ -0,0 +1,8 @@ +--- +id: version-2.7.0-admin-api-non-partitioned-topics +title: Managing non-partitioned topics +sidebar_label: Non-partitioned topics +original_id: admin-api-non-partitioned-topics +--- + +For details of the content, refer to [manage topics](admin-api-topics.md). \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/admin-api-non-persistent-topics.md b/site2/website/versioned_docs/version-2.7.0/admin-api-non-persistent-topics.md new file mode 100644 index 00000000000000..601d55bab533d8 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/admin-api-non-persistent-topics.md @@ -0,0 +1,8 @@ +--- +id: version-2.7.0-admin-api-non-persistent-topics +title: Managing non-persistent topics +sidebar_label: Non-Persistent topics +original_id: admin-api-non-persistent-topics +--- + +For details of the content, refer to [manage topics](admin-api-topics.md). \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/admin-api-overview.md b/site2/website/versioned_docs/version-2.7.0/admin-api-overview.md new file mode 100644 index 00000000000000..9930a84f8b6f2e --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/admin-api-overview.md @@ -0,0 +1,91 @@ +--- +id: version-2.7.0-admin-api-overview +title: The Pulsar admin interface +sidebar_label: Overview +original_id: admin-api-overview +--- + +The Pulsar admin interface enables you to manage all of the important entities in a Pulsar [instance](reference-terminology.md#instance), such as [tenants](reference-terminology.md#tenant), [topics](reference-terminology.md#topic), and [namespaces](reference-terminology.md#namespace). + +You can currently interact with the admin interface via: + +- Making HTTP calls against the admin {@inject: rest:REST:/} API provided by Pulsar [brokers](reference-terminology.md#broker). For some restful apis, they might be redirected to topic owner brokers for serving + with [`307 Temporary Redirect`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/307), hence the HTTP callers should handle `307 Temporary Redirect`. If you are using `curl`, you should specify `-L` + to handle redirections. +- The `pulsar-admin` CLI tool, which is available in the `bin` folder of your [Pulsar installation](getting-started-standalone.md): + +```shell +$ bin/pulsar-admin +``` + +Full documentation for this tool can be found in the [Pulsar command-line tools](reference-pulsar-admin.md) doc. + +- A Java client interface. + +> #### The REST API is the admin interface +> Under the hood, both the `pulsar-admin` CLI tool and the Java client both use the REST API. If you’d like to implement your own admin interface client, you should use the REST API as well. Full documentation can be found here. + +In this document, examples from each of the three available interfaces will be shown. + +## Admin setup + +Each of Pulsar's three admin interfaces---the [`pulsar-admin`](reference-pulsar-admin.md) CLI tool, the [Java admin API](/api/admin), and the {@inject: rest:REST:/} API ---requires some special setup if you have [authentication](security-overview.md#authentication-providers) enabled in your Pulsar [instance](reference-terminology.md#instance). + + + + +If you have [authentication](security-overview.md#authentication-providers) enabled, you will need to provide an auth configuration to use the [`pulsar-admin`](reference-pulsar-admin.md) tool. By default, the configuration for the `pulsar-admin` tool is found in the [`conf/client.conf`](reference-configuration.md#client) file. Here are the available parameters: + +|Name|Description|Default| +|----|-----------|-------| +|webServiceUrl|The web URL for the cluster.|http://localhost:8080/| +|brokerServiceUrl|The Pulsar protocol URL for the cluster.|pulsar://localhost:6650/| +|authPlugin|The authentication plugin.| | +|authParams|The authentication parameters for the cluster, as a comma-separated string.| | +|useTls|Whether or not TLS authentication will be enforced in the cluster.|false| +|tlsAllowInsecureConnection|Accept untrusted TLS certificate from client.|false| +|tlsTrustCertsFilePath|Path for the trusted TLS certificate file.| | + + + +You can find documentation for the REST API exposed by Pulsar [brokers](reference-terminology.md#broker) in this reference {@inject: rest:document:/}. + + + +To use the Java admin API, instantiate a {@inject: javadoc:PulsarAdmin:/admin/org/apache/pulsar/client/admin/PulsarAdmin} object, specifying a URL for a Pulsar [broker](reference-terminology.md#broker) and a {@inject: javadoc:PulsarAdminBuilder:/admin/org/apache/pulsar/client/admin/PulsarAdminBuilder}. Here's a minimal example using `localhost`: + +```java +String url = "http://localhost:8080"; +// Pass auth-plugin class fully-qualified name if Pulsar-security enabled +String authPluginClassName = "com.org.MyAuthPluginClass"; +// Pass auth-param if auth-plugin class requires it +String authParams = "param1=value1"; +boolean useTls = false; +boolean tlsAllowInsecureConnection = false; +String tlsTrustCertsFilePath = null; +PulsarAdmin admin = PulsarAdmin.builder() +.authentication(authPluginClassName,authParams) +.serviceHttpUrl(url) +.tlsTrustCertsFilePath(tlsTrustCertsFilePath) +.allowTlsInsecureConnection(tlsAllowInsecureConnection) +.build(); +``` + +If you have multiple brokers to use, you can use multi-host like Pulsar service. For example, +```java +String url = "http://localhost:8080,localhost:8081,localhost:8082"; +// Pass auth-plugin class fully-qualified name if Pulsar-security enabled +String authPluginClassName = "com.org.MyAuthPluginClass"; +// Pass auth-param if auth-plugin class requires it +String authParams = "param1=value1"; +boolean useTls = false; +boolean tlsAllowInsecureConnection = false; +String tlsTrustCertsFilePath = null; +PulsarAdmin admin = PulsarAdmin.builder() +.authentication(authPluginClassName,authParams) +.serviceHttpUrl(url) +.tlsTrustCertsFilePath(tlsTrustCertsFilePath) +.allowTlsInsecureConnection(tlsAllowInsecureConnection) +.build(); +``` + diff --git a/site2/website/versioned_docs/version-2.7.0/admin-api-partitioned-topics.md b/site2/website/versioned_docs/version-2.7.0/admin-api-partitioned-topics.md new file mode 100644 index 00000000000000..ae6c496a6f20e3 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/admin-api-partitioned-topics.md @@ -0,0 +1,8 @@ +--- +id: version-2.7.0-admin-api-partitioned-topics +title: Managing partitioned topics +sidebar_label: Partitioned topics +original_id: admin-api-partitioned-topics +--- + +For details of the content, refer to [manage topics](admin-api-topics.md). \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/admin-api-permissions.md b/site2/website/versioned_docs/version-2.7.0/admin-api-permissions.md new file mode 100644 index 00000000000000..0c41937d3f3368 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/admin-api-permissions.md @@ -0,0 +1,121 @@ +--- +id: version-2.7.0-admin-api-permissions +title: Managing permissions +sidebar_label: Permissions +original_id: admin-api-permissions +--- + +Permissions in Pulsar are managed at the [namespace](reference-terminology.md#namespace) level +(that is, within [tenants](reference-terminology.md#tenant) and [clusters](reference-terminology.md#cluster)). + +## Grant permissions + +You can grant permissions to specific roles for lists of operations such as `produce` and `consume`. + + + + +Use the [`grant-permission`](reference-pulsar-admin.md#grant-permission) subcommand and specify a namespace, actions using the `--actions` flag, and a role using the `--role` flag: + +```shell +$ pulsar-admin namespaces grant-permission test-tenant/ns1 \ + --actions produce,consume \ + --role admin10 +``` + +Wildcard authorization can be performed when `authorizationAllowWildcardsMatching` is set to `true` in `broker.conf`. + +e.g. +```shell +$ pulsar-admin namespaces grant-permission test-tenant/ns1 \ + --actions produce,consume \ + --role 'my.role.*' +``` + +Then, roles `my.role.1`, `my.role.2`, `my.role.foo`, `my.role.bar`, etc. can produce and consume. + +```shell +$ pulsar-admin namespaces grant-permission test-tenant/ns1 \ + --actions produce,consume \ + --role '*.role.my' +``` + +Then, roles `1.role.my`, `2.role.my`, `foo.role.my`, `bar.role.my`, etc. can produce and consume. + +**Note**: A wildcard matching works at **the beginning or end of the role name only**. + +e.g. +```shell +$ pulsar-admin namespaces grant-permission test-tenant/ns1 \ + --actions produce,consume \ + --role 'my.*.role' +``` + +In this case, only the role `my.*.role` has permissions. +Roles `my.1.role`, `my.2.role`, `my.foo.role`, `my.bar.role`, etc. **cannot** produce and consume. + + + +{@inject: endpoint|POST|/admin/v2/namespaces/:tenant/:namespace/permissions/:role|operation/grantPermissionOnNamespace} + + + +```java +admin.namespaces().grantPermissionOnNamespace(namespace, role, getAuthActions(actions)); +``` + + +## Get permissions + +You can see which permissions have been granted to which roles in a namespace. + + + + +Use the [`permissions`](reference-pulsar-admin#permissions) subcommand and specify a namespace: + +```shell +$ pulsar-admin namespaces permissions test-tenant/ns1 +{ + "admin10": [ + "produce", + "consume" + ] +} +``` + + + +{@inject: endpoint|GET|/admin/v2/namespaces/:tenant/:namespace/permissions|operation/getPermissions} + + + +```java +admin.namespaces().getPermissions(namespace); +``` + + +## Revoke permissions + +You can revoke permissions from specific roles, which means that those roles will no longer have access to the specified namespace. + + + + +Use the [`revoke-permission`](reference-pulsar-admin.md#revoke-permission) subcommand and specify a namespace and a role using the `--role` flag: + +```shell +$ pulsar-admin namespaces revoke-permission test-tenant/ns1 \ + --role admin10 +``` + + + +{@inject: endpoint|DELETE|/admin/v2/namespaces/:tenant/:namespace/permissions/:role|operation/revokePermissionsOnNamespace} + + + +```java +admin.namespaces().revokePermissionsOnNamespace(namespace, role); +``` + \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/admin-api-persistent-topics.md b/site2/website/versioned_docs/version-2.7.0/admin-api-persistent-topics.md new file mode 100644 index 00000000000000..681acb8b4dd578 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/admin-api-persistent-topics.md @@ -0,0 +1,8 @@ +--- +id: version-2.7.0-admin-api-persistent-topics +title: Managing persistent topics +sidebar_label: Persistent topics +original_id: admin-api-persistent-topics +--- + +For details of the content, refer to [manage topics](admin-api-topics.md). \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/admin-api-tenants.md b/site2/website/versioned_docs/version-2.7.0/admin-api-tenants.md new file mode 100644 index 00000000000000..392c0c5ee0390b --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/admin-api-tenants.md @@ -0,0 +1,157 @@ +--- +id: version-2.7.0-admin-api-tenants +title: Managing Tenants +sidebar_label: Tenants +original_id: admin-api-tenants +--- + +Tenants, like namespaces, can be managed using the [admin API](admin-api-overview.md). There are currently two configurable aspects of tenants: + +* Admin roles +* Allowed clusters + +## Tenant resources + +### List + +You can list all of the tenants associated with an [instance](reference-terminology.md#instance). + + + + +Use the [`list`](reference-pulsar-admin.md#tenants-list) subcommand. + +```shell +$ pulsar-admin tenants list +my-tenant-1 +my-tenant-2 +``` + + + +{@inject: endpoint|GET|/admin/v2/tenants|operation/getTenants} + + + +```java +admin.tenants().getTenants(); +``` + + +### Create + +You can create a new tenant. + + + + +Use the [`create`](reference-pulsar-admin.md#tenants-create) subcommand: + +```shell +$ pulsar-admin tenants create my-tenant +``` + +When creating a tenant, you can assign admin roles using the `-r`/`--admin-roles` flag. You can specify multiple roles as a comma-separated list. Here are some examples: + +```shell +$ pulsar-admin tenants create my-tenant \ + --admin-roles role1,role2,role3 + +$ pulsar-admin tenants create my-tenant \ + -r role1 +``` + + +{@inject: endpoint|POST|/admin/v2/functions/{tenant} + + + +```java +admin.tenants().createTenant(tenantName, tenantInfo); +``` + + + +### Get configuration + +You can fetch the [configuration](reference-configuration.md) for an existing tenant at any time. + + + + +Use the [`get`](reference-pulsar-admin.md#tenants-get) subcommand and specify the name of the tenant. Here's an example: + +```shell +$ pulsar-admin tenants get my-tenant +{ + "adminRoles": [ + "admin1", + "admin2" + ], + "allowedClusters": [ + "cl1", + "cl2" + ] +} +``` + + +{@inject: endpoint|GET|/admin/v2/tenants/:cluster|operation/getTenant} + + + +```java +admin.tenants().getTenantInfo(tenantName); +``` + + + +### Delete + +Tenants can be deleted from a Pulsar [instance](reference-terminology.md#instance). + + + + +Use the [`delete`](reference-pulsar-admin.md#tenants-delete) subcommand and specify the name of the tenant. + +```shell +$ pulsar-admin tenants delete my-tenant +``` + + + +{@inject: endpoint|DELETE|/admin/v2/tenants/:cluster|operation/deleteTenant} + + + +```java +admin.Tenants().deleteTenant(tenantName); +``` + + +### Update + +You can update a tenant's configuration. + + + + +Use the [`update`](reference-pulsar-admin.md#tenants-update) subcommand. + +```shell +$ pulsar-admin tenants update my-tenant +``` + + + +{@inject: endpoint|DELETE|/admin/v2/tenants/:cluster|operation/updateTenant} + + + +```java + +admin.tenants().updateTenant(tenantName, tenantInfo); +``` + + diff --git a/site2/website/versioned_docs/version-2.7.0/admin-api-topics.md b/site2/website/versioned_docs/version-2.7.0/admin-api-topics.md new file mode 100644 index 00000000000000..7d71bf57791bd1 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/admin-api-topics.md @@ -0,0 +1,1206 @@ +--- +id: version-2.7.0-admin-api-topics +title: Manage topics +sidebar_label: Topics +original_id: admin-api-topics +--- + +Pulsar has persistent and non-persistent topics. Persistent topic is a logical endpoint for publishing and consuming messages. The topic name structure for persistent topics is: + +```shell +persistent://tenant/namespace/topic +``` + +Non-persistent topics are used in applications that only consume real-time published messages and do not need persistent guarantee. In this way, it reduces message-publish latency by removing overhead of persisting messages. The topic name structure for non-persistent topics is: + +```shell +non-persistent://tenant/namespace/topic +``` +## Manage topic resources +Whether it is persistent or non-persistent topic, you can obtain the topic resources through `pulsar-admin` tool, REST API and Java. + +> **Note** +> In REST API, `:schema` stands for persistent or non-persistent. `:tenant`, `:namespace`, `:x` are variables, replace them with the real tenant, namespace, and `x` names when using them. +> Take {@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace|operation/getList} as an example, to get the list of persistent topics in REST API, use `https://pulsar.apache.org/admin/v2/persistent/my-tenant/my-namespace`. To get the list of non-persistent topics in REST API, use `https://pulsar.apache.org/admin/v2/non-persistent/my-tenant/my-namespace`. + +### List of topics + +You can get the list of topics under a given namespace in the following ways. + + + + +```shell +$ pulsar-admin topics list \ + my-tenant/my-namespace +``` + + +{@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace|operation/getList} + + +```java +String namespace = "my-tenant/my-namespace"; +admin.topics().getList(namespace); +``` + + +### Grant permission + +You can grant permissions on a client role to perform specific actions on a given topic in the following ways. + + + +```shell +$ pulsar-admin topics grant-permission \ + --actions produce,consume --role application1 \ + persistent://test-tenant/ns1/tp1 \ +``` + + +{@inject: endpoint|POST|/admin/v2/:schema/:tenant/:namespace/:topic/permissions/:role|operation/grantPermissionsOnTopic} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +String role = "test-role"; +Set actions = Sets.newHashSet(AuthAction.produce, AuthAction.consume); +admin.topics().grantPermission(topic, role, actions); +``` + + + +### Get permission + +You can fetch permission in the following ways. + + + +```shell +$ pulsar-admin topics permissions \ + persistent://test-tenant/ns1/tp1 \ + +{ + "application1": [ + "consume", + "produce" + ] +} +``` + + +{@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace/:topic/permissions|operation/getPermissionsOnTopic} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +admin.topics().getPermissions(topic); +``` + + + +### Revoke permission + +You can revoke a permission granted on a client role in the following ways. + + +```shell +$ pulsar-admin topics revoke-permission \ + --role application1 \ + persistent://test-tenant/ns1/tp1 \ + +{ + "application1": [ + "consume", + "produce" + ] +} +``` + + +{@inject: endpoint|DELETE|/admin/v2/:schema/:tenant/:namespace/:topic/permissions/:role|operation/revokePermissionsOnTopic} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +String role = "test-role"; +admin.topics().revokePermissions(topic, role); +``` + + + +### Delete topic + +You can delete a topic in the following ways. You cannot delete a topic if any active subscription or producers is connected to the topic. + + + +```shell +$ pulsar-admin topics delete \ + persistent://test-tenant/ns1/tp1 \ +``` + + +{@inject: endpoint|DELETE|/admin/v2/:schema/:tenant/:namespace/:topic|operation/deleteTopic} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +admin.topics().delete(topic); +``` + + + +### Unload topic + +You can unload a topic in the following ways. + + +```shell +$ pulsar-admin topics unload \ + persistent://test-tenant/ns1/tp1 \ +``` + + +{@inject: endpoint|PUT|/admin/v2/:schema/:tenant/:namespace/:topic/unload|operation/unloadTopic} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +admin.topics().unload(topic); +``` + + + +### Get stats + +You can check the following statistics of a given non-partitioned topic. + + - **msgRateIn**: The sum of all local and replication publishers' publish rates (msg/s). + + - **msgThroughputIn**: The sum of all local and replication publishers' publish rates (bytes/s). + + - **msgRateOut**: The sum of all local and replication consumers' dispatch rates(msg/s). + + - **msgThroughputOut**: The sum of all local and replication consumers' dispatch rates (bytes/s). + + - **averageMsgSize**: The average size (in bytes) of messages published within the last interval. + + - **storageSize**: The sum of the ledgers' storage size for this topic. The space used to store the messages for the topic. + + - **publishers**: The list of all local publishers into the topic. The list ranges from zero to thousands. + + - **msgRateIn**: The total rate of messages (msg/s) published by this publisher. + + - **msgThroughputIn**: The total throughput (bytes/s) of the messages published by this publisher. + + - **averageMsgSize**: The average message size in bytes from this publisher within the last interval. + + - **producerId**: The internal identifier for this producer on this topic. + + - **producerName**: The internal identifier for this producer, generated by the client library. + + - **address**: The IP address and source port for the connection of this producer. + + - **connectedSince**: The timestamp when this producer is created or reconnected last time. + + - **subscriptions**: The list of all local subscriptions to the topic. + + - **my-subscription**: The name of this subscription. It is defined by the client. + + - **msgRateOut**: The total rate of messages (msg/s) delivered on this subscription. + + - **msgThroughputOut**: The total throughput (bytes/s) delivered on this subscription. + + - **msgBacklog**: The number of messages in the subscription backlog. + + - **type**: The subscription type. + + - **msgRateExpired**: The rate at which messages were discarded instead of dispatched from this subscription due to TTL. + + - **lastExpireTimestamp**: The timestamp of the last message expire execution. + + - **lastConsumedFlowTimestamp**: The timestamp of the last flow command received. + + - **lastConsumedTimestamp**: The latest timestamp of all the consumed timestamp of the consumers. + + - **lastAckedTimestamp**: The latest timestamp of all the acked timestamp of the consumers. + + - **consumers**: The list of connected consumers for this subscription. + + - **msgRateOut**: The total rate of messages (msg/s) delivered to the consumer. + + - **msgThroughputOut**: The total throughput (bytes/s) delivered to the consumer. + + - **consumerName**: The internal identifier for this consumer, generated by the client library. + + - **availablePermits**: The number of messages that the consumer has space for in the client library's listen queue. `0` means the client library's queue is full and `receive()` isn't being called. A non-zero value means this consumer is ready for dispatched messages. + + - **unackedMessages**: The number of unacknowledged messages for the consumer. + + - **blockedConsumerOnUnackedMsgs**: The flag used to verify if the consumer is blocked due to reaching threshold of the unacknowledged messages. + + - **lastConsumedTimestamp**: The timestamp when the consumer reads a message the last time. + + - **lastAckedTimestamp**: The timestamp when the consumer acknowledges a message the last time. + + - **replication**: This section gives the stats for cross-colo replication of this topic + + - **msgRateIn**: The total rate (msg/s) of messages received from the remote cluster. + + - **msgThroughputIn**: The total throughput (bytes/s) received from the remote cluster. + + - **msgRateOut**: The total rate of messages (msg/s) delivered to the replication-subscriber. + + - **msgThroughputOut**: The total throughput (bytes/s) delivered to the replication-subscriber. + + - **msgRateExpired**: The total rate of messages (msg/s) expired. + + - **replicationBacklog**: The number of messages pending to be replicated to remote cluster. + + - **connected**: Whether the outbound replicator is connected. + + - **replicationDelayInSeconds**: How long the oldest message has been waiting to be sent through the connection, if connected is `true`. + + - **inboundConnection**: The IP and port of the broker in the remote cluster's publisher connection to this broker. + + - **inboundConnectedSince**: The TCP connection being used to publish messages to the remote cluster. If there are no local publishers connected, this connection is automatically closed after a minute. + + - **outboundConnection**: The address of the outbound replication connection. + + - **outboundConnectedSince**: The timestamp of establishing outbound connection. + +The following is an example of a topic status. + +```json +{ + "msgRateIn": 4641.528542257553, + "msgThroughputIn": 44663039.74947473, + "msgRateOut": 0, + "msgThroughputOut": 0, + "averageMsgSize": 1232439.816728665, + "storageSize": 135532389160, + "publishers": [ + { + "msgRateIn": 57.855383881403576, + "msgThroughputIn": 558994.7078932219, + "averageMsgSize": 613135, + "producerId": 0, + "producerName": null, + "address": null, + "connectedSince": null + } + ], + "subscriptions": { + "my-topic_subscription": { + "msgRateOut": 0, + "msgThroughputOut": 0, + "msgBacklog": 116632, + "type": null, + "msgRateExpired": 36.98245516804671, + "consumers": [] + } + }, + "replication": {} +} +``` +To get the status of a topic, you can use the following ways. + + + +```shell +$ pulsar-admin topics stats \ + persistent://test-tenant/ns1/tp1 \ +``` + + +{@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace/:topic/stats|operation/getStats} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +admin.topics().getStats(topic); +``` + + + +### Get internal stats + +You can get the detailed statistics of a topic. + + - **entriesAddedCounter**: Messages published since this broker loaded this topic. + + - **numberOfEntries**: The total number of messages being tracked. + + - **totalSize**: The total storage size in bytes of all messages. + + - **currentLedgerEntries**: The count of messages written to the ledger that is currently open for writing. + + - **currentLedgerSize**: The size in bytes of messages written to the ledger that is currently open for writing. + + - **lastLedgerCreatedTimestamp**: The time when the last ledger is created. + + - **lastLedgerCreationFailureTimestamp:** The time when the last ledger failed. + + - **waitingCursorsCount**: The number of cursors that are "caught up" and waiting for a new message to be published. + + - **pendingAddEntriesCount**: The number of messages that complete (asynchronous) write requests. + + - **lastConfirmedEntry**: The ledgerid:entryid of the last message that is written successfully. If the entryid is `-1`, then the ledger is open, yet no entries are written. + + - **state**: The state of this ledger for writing. The state `LedgerOpened` means that a ledger is open for saving published messages. + + - **ledgers**: The ordered list of all ledgers for this topic holding messages. + + - **ledgerId**: The ID of this ledger. + + - **entries**: The total number of entries belong to this ledger. + + - **size**: The size of messages written to this ledger (in bytes). + + - **offloaded**: Whether this ledger is offloaded. + + - **compactedLedger**: The ledgers holding un-acked messages after topic compaction. + + - **ledgerId**: The ID of this ledger. + + - **entries**: The total number of entries belong to this ledger. + + - **size**: The size of messages written to this ledger (in bytes). + + - **offloaded**: Whether this ledger is offloaded. The value is `false` for the compacted topic ledger. + + - **cursors**: The list of all cursors on this topic. Each subscription in the topic stats has a cursor. + + - **markDeletePosition**: All messages before the markDeletePosition are acknowledged by the subscriber. + + - **readPosition**: The latest position of subscriber for reading message. + + - **waitingReadOp**: This is true when the subscription has read the latest message published to the topic and is waiting for new messages to be published. + + - **pendingReadOps**: The counter for how many outstanding read requests to the BookKeepers in progress. + + - **messagesConsumedCounter**: The number of messages this cursor has acked since this broker loaded this topic. + + - **cursorLedger**: The ledger being used to persistently store the current markDeletePosition. + + - **cursorLedgerLastEntry**: The last entryid used to persistently store the current markDeletePosition. + + - **individuallyDeletedMessages**: If acknowledges are being done out of order, the ranges of messages acknowledged between the markDeletePosition and the read-position shows. + + - **lastLedgerSwitchTimestamp**: The last time the cursor ledger is rolled over. + + - **state**: The state of the cursor ledger: `Open` means you have a cursor ledger for saving updates of the markDeletePosition. + +The following is an example of the detailed statistics of a topic. + +```json +{ + "entriesAddedCounter": 20449518, + "numberOfEntries": 3233, + "totalSize": 331482, + "currentLedgerEntries": 3233, + "currentLedgerSize": 331482, + "lastLedgerCreatedTimestamp": "2016-06-29 03:00:23.825", + "lastLedgerCreationFailureTimestamp": null, + "waitingCursorsCount": 1, + "pendingAddEntriesCount": 0, + "lastConfirmedEntry": "324711539:3232", + "state": "LedgerOpened", + "ledgers": [ + { + "ledgerId": 324711539, + "entries": 0, + "size": 0, + "offloaded": true + } + ], + "compactedLedger": { + "ledgerId": 324711540, + "entries": 10, + "size": 100, + "offloaded": false + }, + "cursors": { + "my-subscription": { + "markDeletePosition": "324711539:3133", + "readPosition": "324711539:3233", + "waitingReadOp": true, + "pendingReadOps": 0, + "messagesConsumedCounter": 20449501, + "cursorLedger": 324702104, + "cursorLedgerLastEntry": 21, + "individuallyDeletedMessages": "[(324711539:3134‥324711539:3136], (324711539:3137‥324711539:3140], ]", + "lastLedgerSwitchTimestamp": "2016-06-29 01:30:19.313", + "state": "Open" + } + } +} +``` +To get the internal status of a topic, you can use the following ways. + + +```shell +$ pulsar-admin topics stats-internal \ + persistent://test-tenant/ns1/tp1 \ +``` + + +{@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace/:topic/internalStats|operation/getInternalStats} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +admin.topics().getInternalStats(topic); +``` + + + +### Peek messages + +You can peek a number of messages for a specific subscription of a given topic in the following ways. + + +```shell +$ pulsar-admin topics peek-messages \ + --count 10 --subscription my-subscription \ + persistent://test-tenant/ns1/tp1 \ + +Message ID: 315674752:0 +Properties: { "X-Pulsar-publish-time" : "2015-07-13 17:40:28.451" } +msg-payload +``` + + +{@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace/:topic/subscription/:subName/position/:messagePosition|operation/peekNthMessage} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +String subName = "my-subscription"; +int numMessages = 1; +admin.topics().peekMessages(topic, subName, numMessages); +``` + + + +### Get message by ID + +You can fetch the message with the given ledger ID and entry ID in the following ways. + + + +```shell +$ ./bin/pulsar-admin topics get-message-by-id \ + persistent://public/default/my-topic \ + -l 10 -e 0 +``` + + +{@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace/:topic/ledger/:ledgerId/entry/:entryId|operation/getMessageById} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +long ledgerId = 10; +long entryId = 10; +admin.topics().getMessageById(topic, ledgerId, entryId); +``` + + + +### Skip messages + +You can skip a number of messages for a specific subscription of a given topic in the following ways. + + + +```shell +$ pulsar-admin topics skip \ + --count 10 --subscription my-subscription \ + persistent://test-tenant/ns1/tp1 \ +``` + + +{@inject: endpoint|POST|/admin/v2/:schema/:tenant/:namespace/:topic/subscription/:subName/skip/:numMessages|operation/skipMessages} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +String subName = "my-subscription"; +int numMessages = 1; +admin.topics().skipMessages(topic, subName, numMessages); +``` + + + +### Skip all messages + +You can skip all the old messages for a specific subscription of a given topic. + + + +```shell +$ pulsar-admin topics skip-all \ + --subscription my-subscription \ + persistent://test-tenant/ns1/tp1 \ +``` + + +{@inject: endpoint|POST|/admin/v2/:schema/:tenant/:namespace/:topic/subscription/:subName/skip_all|operation/skipAllMessages} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +String subName = "my-subscription"; +admin.topics().skipAllMessages(topic, subName); +``` + + + +### Reset cursor + +You can reset a subscription cursor position back to the position which is recorded X minutes before. It essentially calculates time and position of cursor at X minutes before and resets it at that position. You can reset the cursor in the following ways. + + + +```shell +$ pulsar-admin topics reset-cursor \ + --subscription my-subscription --time 10 \ + persistent://test-tenant/ns1/tp1 \ +``` + + +{@inject: endpoint|POST|/admin/v2/:schema/:tenant/:namespace/:topic/subscription/:subName/resetcursor/:timestamp|operation/resetCursor} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +String subName = "my-subscription"; +long timestamp = 2342343L; +admin.topics().skipAllMessages(topic, subName, timestamp); +``` + + + +### Lookup of topic + +You can locate the broker URL which is serving the given topic in the following ways. + + + +```shell +$ pulsar-admin topics lookup \ + persistent://test-tenant/ns1/tp1 \ + + "pulsar://broker1.org.com:4480" +``` + + +{@inject: endpoint|GET|/lookup/v2/topic/:schema/:tenant:namespace/:topic|/} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +admin.lookup().lookupDestination(topic); +``` + + + +### Get bundle + +You can check the range of the bundle which contains given topic in the following ways. + + + +```shell +$ pulsar-admin topics bundle-range \ + persistent://test-tenant/ns1/tp1 \ + + "0x00000000_0xffffffff" +``` + + +{@inject: endpoint|GET|/lookup/v2/topic/:topic_domain/:tenant/:namespace/:topic/bundle|/} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +admin.lookup().getBundleRange(topic); +``` + + + +### Get subscriptions + +You can check all subscription names for a given topic in the following ways. + + + +```shell +$ pulsar-admin topics subscriptions \ + persistent://test-tenant/ns1/tp1 \ + + my-subscription +``` + + +{@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace/:topic/subscriptions|operation/getSubscriptions} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +admin.topics().getSubscriptions(topic); +``` + + + +### Unsubscribe + +When a subscription does not process messages any more, you can unsubscribe it in the following ways. + + + +```shell +$ pulsar-admin topics unsubscribe \ + --subscription my-subscription \ + persistent://test-tenant/ns1/tp1 \ +``` + + +{@inject: endpoint|DELETE|/admin/v2/namespaces/:tenant/:namespace/:topic/subscription/:subscription|operation/deleteSubscription} + + +```java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +String subscriptionName = "my-subscription"; +admin.topics().deleteSubscription(topic, subscriptionName); +``` + + + +### Last Message Id + +You can get the last committed message ID for a persistent topic. It is available since 2.3.0 release. + + + +```shell +pulsar-admin topics last-message-id topic-name +``` + + +{@inject: endpoint|Get|/admin/v2/:schema/:tenant/:namespace/:topic/lastMessageId} + + +```Java +String topic = "persistent://my-tenant/my-namespace/my-topic"; +admin.topics().getLastMessage(topic); +``` + + + +## Manage non-partitioned topics +You can use Pulsar [admin API](admin-api-overview.md) to create, delete and check status of non-partitioned topics. + +### Create +Non-partitioned topics must be explicitly created. When creating a new non-partitioned topic, you need to provide a name for the topic. + +By default, 60 seconds after creation, topics are considered inactive and deleted automatically to avoid generating trash data. To disable this feature, set `brokerDeleteInactiveTopicsEnabled` to `false`. To change the frequency of checking inactive topics, set `brokerDeleteInactiveTopicsFrequencySeconds` to a specific value. + +For more information about the two parameters, see [here](reference-configuration.md#broker). + +You can create non-partitioned topics in the following ways. + + +When you create non-partitioned topics with the [`create`](reference-pulsar-admin.md#create-3) command, you need to specify the topic name as an argument. + +```shell +$ bin/pulsar-admin topics create \ + persistent://my-tenant/my-namespace/my-topic +``` +> **Note** +> When you create a non-partitioned topic with the suffix '-partition-' followed by numeric value like 'xyz-topic-partition-x' for the topic name, if a partitioned topic with same suffix 'xyz-topic-partition-y' exists, then the numeric value(x) for the non-partitioned topic must be larger than the number of partitions(y) of the partitioned topic. Otherwise, you cannot create such a non-partitioned topic. + + +{@inject: endpoint|PUT|/admin/v2/:schema/:tenant/:namespace/:topic|operation/createNonPartitionedTopic} + + +```java +String topicName = "persistent://my-tenant/my-namespace/my-topic"; +admin.topics().createNonPartitionedTopic(topicName); +``` + + + +### Delete +You can delete non-partitioned topics in the following ways. + + +```shell +$ bin/pulsar-admin topics delete \ + persistent://my-tenant/my-namespace/my-topic +``` + + +{@inject: endpoint|DELETE|/admin/v2/:schema/:tenant/:namespace/:topic|operation/deleteTopic} + + +```java +admin.topics().delete(topic); +``` + + + +### List + +You can get the list of topics under a given namespace in the following ways. + + +```shell +$ pulsar-admin topics list tenant/namespace +persistent://tenant/namespace/topic1 +persistent://tenant/namespace/topic2 +``` + + +{@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace|operation/getList} + + +```java +admin.topics().getList(namespace); +``` + + + +### Stats + +You can check the current statistics of a given topic. The following is an example. For description of each stats, refer to [get stats](#get-stats). + +```json +{ + "msgRateIn": 4641.528542257553, + "msgThroughputIn": 44663039.74947473, + "msgRateOut": 0, + "msgThroughputOut": 0, + "averageMsgSize": 1232439.816728665, + "storageSize": 135532389160, + "publishers": [ + { + "msgRateIn": 57.855383881403576, + "msgThroughputIn": 558994.7078932219, + "averageMsgSize": 613135, + "producerId": 0, + "producerName": null, + "address": null, + "connectedSince": null + } + ], + "subscriptions": { + "my-topic_subscription": { + "msgRateOut": 0, + "msgThroughputOut": 0, + "msgBacklog": 116632, + "type": null, + "msgRateExpired": 36.98245516804671, + "consumers": [] + } + }, + "replication": {} +} +``` +You can check the current statistics of a given topic and its connected producers and consumers in the following ways. + + +```shell +$ pulsar-admin topics stats \ + persistent://test-tenant/namespace/topic \ + --get-precise-backlog +``` + + +{@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace/:topic/stats|operation/getStats} + + +```java +admin.topics().getStats(topic, false /* is precise backlog */); +``` + + +## Manage partitioned topics +You can use Pulsar [admin API](admin-api-overview.md) to create, update, delete and check status of partitioned topics. + +### Create + +Partitioned topics must be explicitly created. When creating a new partitioned topic, you need to provide a name and the number of partitions for the topic. + +By default, 60 seconds after creation, topics are considered inactive and deleted automatically to avoid generating trash data. To disable this feature, set `brokerDeleteInactiveTopicsEnabled` to `false`. To change the frequency of checking inactive topics, set `brokerDeleteInactiveTopicsFrequencySeconds` to a specific value. + +For more information about the two parameters, see [here](reference-configuration.md#broker). + +You can create partitioned topics in the following ways. + + +When you create partitioned topics with the [`create-partitioned-topic`](reference-pulsar-admin.md#create-partitioned-topic) +command, you need to specify the topic name as an argument and the number of partitions using the `-p` or `--partitions` flag. + +```shell +$ bin/pulsar-admin topics create-partitioned-topic \ + persistent://my-tenant/my-namespace/my-topic \ + --partitions 4 +``` + +> **Note** +> If a non-partitioned topic with the suffix '-partition-' followed by a numeric value like 'xyz-topic-partition-10', you can not create a partitioned topic with name 'xyz-topic', because the partitions of the partitioned topic could override the existing non-partitioned topic. To create such partitioned topic, you have to delete that non-partitioned topic first. + + +{@inject: endpoint|PUT|/admin/v2/:schema/:tenant/:namespace/:topic/partitions|operation/createPartitionedTopic} + + +```java +String topicName = "persistent://my-tenant/my-namespace/my-topic"; +int numPartitions = 4; +admin.topics().createPartitionedTopic(topicName, numPartitions); +``` + + + +### Create missed partitions + +When topic auto-creation is disabled, and you have a partitioned topic without any partitions, you can use the [`create-missed-partitions`](reference-pulsar-admin.md#create-missed-partitions) command to create partitions for the topic. + + + +You can create missed partitions with the [`create-missed-partitions`](reference-pulsar-admin.md#create-missed-partitions) command and specify the topic name as an argument. + +```shell +$ bin/pulsar-admin topics create-missed-partitions \ + persistent://my-tenant/my-namespace/my-topic \ +``` + + +{@inject: endpoint|POST|/admin/v2/:schema/:tenant/:namespace/:topic|operation/createMissedPartitions} + + +```java +String topicName = "persistent://my-tenant/my-namespace/my-topic"; +admin.topics().createMissedPartitions(topicName); +``` + + + +### Get metadata + +Partitioned topics are associated with metadata, you can view it as a JSON object. The following metadata field is available. + +Field | Description +:-----|:------- +`partitions` | The number of partitions into which the topic is divided. + + + +You can check the number of partitions in a partitioned topic with the [`get-partitioned-topic-metadata`](reference-pulsar-admin.md#get-partitioned-topic-metadata) subcommand. + +```shell +$ pulsar-admin topics get-partitioned-topic-metadata \ + persistent://my-tenant/my-namespace/my-topic +{ + "partitions": 4 +} +``` + + +{@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace/:topic/partitions|operation/getPartitionedMetadata} + + +```java +String topicName = "persistent://my-tenant/my-namespace/my-topic"; +admin.topics().getPartitionedTopicMetadata(topicName); +``` + + + +### Update + +You can update the number of partitions for an existing partitioned topic *if* the topic is non-global. However, you can only add the partition number. Decrementing the number of partitions would delete the topic, which is not supported in Pulsar. + +Producers and consumers can find the newly created partitions automatically. + + + +You can update partitioned topics with the [`update-partitioned-topic`](reference-pulsar-admin.md#update-partitioned-topic) command. + +```shell +$ pulsar-admin topics update-partitioned-topic \ + persistent://my-tenant/my-namespace/my-topic \ + --partitions 8 +``` + + +{@inject: endpoint|POST|/admin/v2/:schema/:tenant/:cluster/:namespace/:destination/partitions|operation/updatePartitionedTopic} + + +```java +admin.topics().updatePartitionedTopic(topic, numPartitions); +``` + + + +### Delete +You can delete partitioned topics with the [`delete-partitioned-topic`](reference-pulsar-admin.md#delete-partitioned-topic) command, REST API and Java. + + + +```shell +$ bin/pulsar-admin topics delete-partitioned-topic \ + persistent://my-tenant/my-namespace/my-topic +``` + + +{@inject: endpoint|DELETE|/admin/v2/:schema/:topic/:namespace/:destination/partitions|operation/deletePartitionedTopic} + + +```java +admin.topics().delete(topic); +``` + + + +### List +You can get the list of topics under a given namespace in the following ways. + + +```shell +$ pulsar-admin topics list tenant/namespace +persistent://tenant/namespace/topic1 +persistent://tenant/namespace/topic2 +``` + + +{@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace|operation/getPartitionedTopicList} + + +```java +admin.topics().getList(namespace); +``` + + + +### Stats + +You can check the current statistics of a given partitioned topic. The following is an example. For description of each stats, refer to [get stats](#get-stats). + +```json +{ + "msgRateIn" : 999.992947159793, + "msgThroughputIn" : 1070918.4635439808, + "msgRateOut" : 0.0, + "msgThroughputOut" : 0.0, + "bytesInCounter" : 270318763, + "msgInCounter" : 252489, + "bytesOutCounter" : 0, + "msgOutCounter" : 0, + "averageMsgSize" : 1070.926056966454, + "msgChunkPublished" : false, + "storageSize" : 270316646, + "backlogSize" : 200921133, + "publishers" : [ { + "msgRateIn" : 999.992947159793, + "msgThroughputIn" : 1070918.4635439808, + "averageMsgSize" : 1070.3333333333333, + "chunkedMessageRate" : 0.0, + "producerId" : 0 + } ], + "subscriptions" : { + "test" : { + "msgRateOut" : 0.0, + "msgThroughputOut" : 0.0, + "bytesOutCounter" : 0, + "msgOutCounter" : 0, + "msgRateRedeliver" : 0.0, + "chuckedMessageRate" : 0, + "msgBacklog" : 144318, + "msgBacklogNoDelayed" : 144318, + "blockedSubscriptionOnUnackedMsgs" : false, + "msgDelayed" : 0, + "unackedMessages" : 0, + "msgRateExpired" : 0.0, + "lastExpireTimestamp" : 0, + "lastConsumedFlowTimestamp" : 0, + "lastConsumedTimestamp" : 0, + "lastAckedTimestamp" : 0, + "consumers" : [ ], + "isDurable" : true, + "isReplicated" : false + } + }, + "replication" : { }, + "metadata" : { + "partitions" : 3 + }, + "partitions" : { } +} +``` + +You can check the current statistics of a given partitioned topic and its connected producers and consumers in the following ways. + + + +```shell +$ pulsar-admin topics partitioned-stats \ + persistent://test-tenant/namespace/topic \ + --per-partition +``` + + +{@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace/:topic/partitioned-stats|operation/getPartitionedStats} + + +```java +admin.topics().getPartitionedStats(topic, true /* per partition */, false /* is precise backlog */); +``` + + + +### Internal stats + +You can check the detailed statistics of a topic. The following is an example. For description of each stats, refer to [get internal stats](#get-internal-stats). + +```json +{ + "entriesAddedCounter": 20449518, + "numberOfEntries": 3233, + "totalSize": 331482, + "currentLedgerEntries": 3233, + "currentLedgerSize": 331482, + "lastLedgerCreatedTimestamp": "2016-06-29 03:00:23.825", + "lastLedgerCreationFailureTimestamp": null, + "waitingCursorsCount": 1, + "pendingAddEntriesCount": 0, + "lastConfirmedEntry": "324711539:3232", + "state": "LedgerOpened", + "ledgers": [ + { + "ledgerId": 324711539, + "entries": 0, + "size": 0 + } + ], + "cursors": { + "my-subscription": { + "markDeletePosition": "324711539:3133", + "readPosition": "324711539:3233", + "waitingReadOp": true, + "pendingReadOps": 0, + "messagesConsumedCounter": 20449501, + "cursorLedger": 324702104, + "cursorLedgerLastEntry": 21, + "individuallyDeletedMessages": "[(324711539:3134‥324711539:3136], (324711539:3137‥324711539:3140], ]", + "lastLedgerSwitchTimestamp": "2016-06-29 01:30:19.313", + "state": "Open" + } + } +} +``` + +You can get the internal stats for the partitioned topic in the following ways. + + + +```shell +$ pulsar-admin topics stats-internal \ + persistent://test-tenant/namespace/topic +``` + + +{@inject: endpoint|GET|/admin/v2/:schema/:tenant/:namespace/:topic/internalStats|operation/getInternalStats} + + +```java +admin.topics().getInternalStats(topic); +``` + + + +## Publish to partitioned topics + +By default, Pulsar topics are served by a single broker, which limits the maximum throughput of a topic. *Partitioned topics* can span multiple brokers and thus allow for higher throughput. + +You can publish to partitioned topics using Pulsar client libraries. When publishing to partitioned topics, you must specify a routing mode. If you do not specify any routing mode when you create a new producer, the round robin routing mode is used. + +### Routing mode + +You can specify the routing mode in the ProducerConfiguration object that you use to configure your producer. The routing mode determines which partition(internal topic) that each message should be published to. + +The following {@inject: javadoc:MessageRoutingMode:/client/org/apache/pulsar/client/api/MessageRoutingMode} options are available. + +Mode | Description +:--------|:------------ +`RoundRobinPartition` | If no key is provided, the producer publishes messages across all partitions in round-robin policy to achieve the maximum throughput. Round-robin is not done per individual message, round-robin is set to the same boundary of batching delay to ensure that batching is effective. If a key is specified on the message, the partitioned producer hashes the key and assigns message to a particular partition. This is the default mode. +`SinglePartition` | If no key is provided, the producer picks a single partition randomly and publishes all messages into that partition. If a key is specified on the message, the partitioned producer hashes the key and assigns message to a particular partition. +`CustomPartition` | Use custom message router implementation that is called to determine the partition for a particular message. You can create a custom routing mode by using the Java client and implementing the {@inject: javadoc:MessageRouter:/client/org/apache/pulsar/client/api/MessageRouter} interface. + +The following is an example: + +```java +String pulsarBrokerRootUrl = "pulsar://localhost:6650"; +String topic = "persistent://my-tenant/my-namespace/my-topic"; + +PulsarClient pulsarClient = PulsarClient.builder().serviceUrl(pulsarBrokerRootUrl).build(); +Producer producer = pulsarClient.newProducer() + .topic(topic) + .messageRoutingMode(MessageRoutingMode.SinglePartition) + .create(); +producer.send("Partitioned topic message".getBytes()); +``` + +### Custom message router + +To use a custom message router, you need to provide an implementation of the {@inject: javadoc:MessageRouter:/client/org/apache/pulsar/client/api/MessageRouter} interface, which has just one `choosePartition` method: + +```java +public interface MessageRouter extends Serializable { + int choosePartition(Message msg); +} +``` + +The following router routes every message to partition 10: + +```java +public class AlwaysTenRouter implements MessageRouter { + public int choosePartition(Message msg) { + return 10; + } +} +``` + +With that implementation, you can send + +```java +String pulsarBrokerRootUrl = "pulsar://localhost:6650"; +String topic = "persistent://my-tenant/my-cluster-my-namespace/my-topic"; + +PulsarClient pulsarClient = PulsarClient.builder().serviceUrl(pulsarBrokerRootUrl).build(); +Producer producer = pulsarClient.newProducer() + .topic(topic) + .messageRouter(new AlwaysTenRouter()) + .create(); +producer.send("Partitioned topic message".getBytes()); +``` + +### How to choose partitions when using a key +If a message has a key, it supersedes the round robin routing policy. The following example illustrates how to choose the partition when using a key. + +```java +// If the message has a key, it supersedes the round robin routing policy + if (msg.hasKey()) { + return signSafeMod(hash.makeHash(msg.getKey()), topicMetadata.numPartitions()); + } + + if (isBatchingEnabled) { // if batching is enabled, choose partition on `partitionSwitchMs` boundary. + long currentMs = clock.millis(); + return signSafeMod(currentMs / partitionSwitchMs + startPtnIdx, topicMetadata.numPartitions()); + } else { + return signSafeMod(PARTITION_INDEX_UPDATER.getAndIncrement(this), topicMetadata.numPartitions()); + } +``` \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/administration-proxy.md b/site2/website/versioned_docs/version-2.7.0/administration-proxy.md new file mode 100644 index 00000000000000..c93471f64fd8e4 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/administration-proxy.md @@ -0,0 +1,76 @@ +--- +id: version-2.7.0-administration-proxy +title: Pulsar proxy +sidebar_label: Pulsar proxy +original_id: administration-proxy +--- + +Pulsar proxy is an optional gateway. Pulsar proxy is used when direction connections between clients and Pulsar brokers are either infeasible or undesirable. For example, when you run Pulsar in a cloud environment or on [Kubernetes](https://kubernetes.io) or an analogous platform, you can run Pulsar proxy. + +## Configure the proxy + +Before using the proxy, you need to configure it with the brokers addresses in the cluster. You can configure the proxy to connect directly to service discovery, or specify a broker URL in the configuration. + +### Use service discovery + +Pulsar uses [ZooKeeper](https://zookeeper.apache.org) for service discovery. To connect the proxy to ZooKeeper, specify the following in `conf/proxy.conf`. +```properties +zookeeperServers=zk-0,zk-1,zk-2 +configurationStoreServers=zk-0:2184,zk-remote:2184 +``` + +> To use service discovery, you need to open the network ACLs, so the proxy can connects to the ZooKeeper nodes through the ZooKeeper client port (port `2181`) and the configuration store client port (port `2184`). + +> However, it is not secure to use service discovery. Because if the network ACL is open, when someone compromises a proxy, they have full access to ZooKeeper. + +### Use broker URLs + +It is more secure to specify a URL to connect to the brokers. + +Proxy authorization requires access to ZooKeeper, so if you use these broker URLs to connect to the brokers, you need to disable authorization at the Proxy level. Brokers still authorize requests after the proxy forwards them. + +You can configure the broker URLs in `conf/proxy.conf` as follows. + +```properties +brokerServiceURL=pulsar://brokers.example.com:6650 +brokerWebServiceURL=http://brokers.example.com:8080 +functionWorkerWebServiceURL=http://function-workers.example.com:8080 +``` + +If you use TLS, configure the broker URLs in the following way: +```properties +brokerServiceURLTLS=pulsar+ssl://brokers.example.com:6651 +brokerWebServiceURLTLS=https://brokers.example.com:8443 +functionWorkerWebServiceURL=https://function-workers.example.com:8443 +``` + +The hostname in the URLs provided should be a DNS entry which points to multiple brokers or a virtual IP address, which is backed by multiple broker IP addresses, so that the proxy does not lose connectivity to Pulsar cluster if a single broker becomes unavailable. + +The ports to connect to the brokers (6650 and 8080, or in the case of TLS, 6651 and 8443) should be open in the network ACLs. + +Note that if you do not use functions, you do not need to configure `functionWorkerWebServiceURL`. + +## Start the proxy + +To start the proxy: + +```bash +$ cd /path/to/pulsar/directory +$ bin/pulsar proxy +``` + +> You can run multiple instances of the Pulsar proxy in a cluster. + +## Stop the proxy + +Pulsar proxy runs in the foreground by default. To stop the proxy, simply stop the process in which the proxy is running. + +## Proxy frontends + +You can run Pulsar proxy behind some kind of load-distributing frontend, such as an [HAProxy](https://www.digitalocean.com/community/tutorials/an-introduction-to-haproxy-and-load-balancing-concepts) load balancer. + +## Use Pulsar clients with the proxy + +Once your Pulsar proxy is up and running, preferably behind a load-distributing [frontend](#proxy-frontends), clients can connect to the proxy via whichever address that the frontend uses. If the address is the DNS address `pulsar.cluster.default`, for example, the connection URL for clients is `pulsar://pulsar.cluster.default:6650`. + +For more information on Proxy configuration, refer to [Pulsar proxy](reference-configuration.md#pulsar-proxy). diff --git a/site2/website/versioned_docs/version-2.7.0/administration-pulsar-manager.md b/site2/website/versioned_docs/version-2.7.0/administration-pulsar-manager.md new file mode 100644 index 00000000000000..8fddeec97b5a60 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/administration-pulsar-manager.md @@ -0,0 +1,183 @@ +--- +id: version-2.7.0-administration-pulsar-manager +title: Pulsar Manager +sidebar_label: Pulsar Manager +original_id: administration-pulsar-manager +--- + +Pulsar Manager is a web-based GUI management and monitoring tool that helps administrators and users manage and monitor tenants, namespaces, topics, subscriptions, brokers, clusters, and so on, and supports dynamic configuration of multiple environments. + +> Note +> If you monitor your current stats with [Pulsar dashboard](administration-dashboard.md), you can try to use Pulsar Manager instead. Pulsar dashboard is deprecated. + +## Install + +The easiest way to use the Pulsar Manager is to run it inside a [Docker](https://www.docker.com/products/docker) container. + + +```shell +docker pull apachepulsar/pulsar-manager:v0.2.0 +docker run -it \ + -p 9527:9527 -p 7750:7750 \ + -e SPRING_CONFIGURATION_FILE=/pulsar-manager/pulsar-manager/application.properties \ + apachepulsar/pulsar-manager:v0.2.0 +``` + +* `SPRING_CONFIGURATION_FILE`: Default configuration file for spring. + +#### Set administrator account and password + + ```shell +CSRF_TOKEN=$(curl http://localhost:7750/pulsar-manager/csrf-token) +curl \ + -H 'X-XSRF-TOKEN: $CSRF_TOKEN' \ + -H 'Cookie: XSRF-TOKEN=$CSRF_TOKEN;' \ + -H "Content-Type: application/json" \ + -X PUT http://localhost:7750/pulsar-manager/users/superuser \ + -d '{"name": "admin", "password": "apachepulsar", "description": "test", "email": "username@test.org"}' +``` + +You can find the docker image in the [Docker Hub](https://github.com/apache/pulsar-manager/tree/master/docker) directory and build an image from the source code as well: + +``` +git clone https://github.com/apache/pulsar-manager +cd pulsar-manager/front-end +npm install --save +npm run build:prod +cd .. +./gradlew build -x test +cd .. +docker build -f docker/Dockerfile --build-arg BUILD_DATE=`date -u +"%Y-%m-%dT%H:%M:%SZ"` --build-arg VCS_REF=`latest` --build-arg VERSION=`latest` -t apachepulsar/pulsar-manager . +``` + +### Use custom databases + +If you have a large amount of data, you can use a custom database. The following is an example of PostgreSQL. + +1. Initialize database and table structures using the [file](https://github.com/apache/pulsar-manager/tree/master/src/main/resources/META-INF/sql/postgresql-schema.sql). + +2. Modify the [configuration file](https://github.com/apache/pulsar-manager/blob/master/src/main/resources/application.properties) and add PostgreSQL configuration. + +``` +spring.datasource.driver-class-name=org.postgresql.Driver +spring.datasource.url=jdbc:postgresql://127.0.0.1:5432/pulsar_manager +spring.datasource.username=postgres +spring.datasource.password=postgres +``` + +3. Compile to generate a new executable jar package. + +``` +./gradlew build -x test +``` + +### Enable JWT authentication + +If you want to turn on JWT authentication, configure the following parameters: + +* `backend.jwt.token`: token for the superuser. You need to configure this parameter during cluster initialization. +* `jwt.broker.token.mode`: two modes of generating token, SECRET and PRIVATE. +* `jwt.broker.public.key`: configure this option if you are using the PRIVATE mode. +* `jwt.broker.private.key`: configure this option if you are using the PRIVATE mode. +* `jwt.broker.secret.key`: configure this option if you are using the SECRET mode. + +For more information, see [Token Authentication Admin of Pulsar](http://pulsar.apache.org/docs/en/security-token-admin/). + + +If you want to enable JWT authentication, use one of the following methods. + + +* Method 1: use command-line tool + +``` +wget https://dist.apache.org/repos/dist/release/pulsar/pulsar-manager/apache-pulsar-manager-0.2.0/apache-pulsar-manager-0.2.0-bin.tar.gz +tar -zxvf apache-pulsar-manager-0.2.0-bin.tar.gz +cd pulsar-manager +tar -zxvf pulsar-manager.tar +cd pulsar-manager +cp -r ../dist ui +./bin/pulsar-manager --redirect.host=http://localhost --redirect.port=9527 insert.stats.interval=600000 --backend.jwt.token=token --jwt.broker.token.mode=PRIVATE --jwt.broker.private.key=file:///path/broker-private.key --jwt.broker.public.key=file:///path/broker-public.key +``` +Firstly, [set the administrator account and password](#set-administrator-account-and-password) + +Secondly, log in to Pulsar manager through http://localhost:7750/ui/index.html. + +* Method 2: configure the application.properties file + +``` +backend.jwt.token=token + +jwt.broker.token.mode=PRIVATE +jwt.broker.public.key=file:///path/broker-public.key +jwt.broker.private.key=file:///path/broker-private.key + +or +jwt.broker.token.mode=SECRET +jwt.broker.secret.key=file:///path/broker-secret.key +``` + +* Method 3: use Docker and enable token authentication. + +``` +export JWT_TOKEN="your-token" +docker run -it -p 9527:9527 -p 7750:7750 -e REDIRECT_HOST=http://localhost -e REDIRECT_PORT=9527 -e DRIVER_CLASS_NAME=org.postgresql.Driver -e URL='jdbc:postgresql://127.0.0.1:5432/pulsar_manager' -e USERNAME=pulsar -e PASSWORD=pulsar -e LOG_LEVEL=DEBUG -e JWT_TOKEN=$JWT_TOKEN -v $PWD:/data apachepulsar/pulsar-manager:v0.2.0 /bin/sh +``` + +* `JWT_TOKEN`: the token of superuser configured for the broker. It is generated by the `bin/pulsar tokens create --secret-key` or `bin/pulsar tokens create --private-key` command. +* `REDIRECT_HOST`: the IP address of the front-end server. +* `REDIRECT_PORT`: the port of the front-end server. +* `DRIVER_CLASS_NAME`: the driver class name of the PostgreSQL database. +* `URL`: the JDBC URL of your PostgreSQL database, such as jdbc:postgresql://127.0.0.1:5432/pulsar_manager. The docker image automatically start a local instance of the PostgresSQL database. +* `USERNAME`: the username of PostgreSQL. +* `PASSWORD`: the password of PostgreSQL. +* `LOG_LEVEL`: the level of log. + +* Method 4: use Docker and turn on **token authentication** and **token management** by private key and public key. + +``` +export JWT_TOKEN="your-token" +export PRIVATE_KEY="file:///pulsar-manager/secret/my-private.key" +export PUBLIC_KEY="file:///pulsar-manager/secret/my-public.key" +docker run -it -p 9527:9527 -p 7750:7750 -e REDIRECT_HOST=http://localhost -e REDIRECT_PORT=9527 -e DRIVER_CLASS_NAME=org.postgresql.Driver -e URL='jdbc:postgresql://127.0.0.1:5432/pulsar_manager' -e USERNAME=pulsar -e PASSWORD=pulsar -e LOG_LEVEL=DEBUG -e JWT_TOKEN=$JWT_TOKEN -e PRIVATE_KEY=$PRIVATE_KEY -e PUBLIC_KEY=$PUBLIC_KEY -v $PWD:/data -v $PWD/secret:/pulsar-manager/secret apachepulsar/pulsar-manager:v0.2.0 /bin/sh +``` + +* `JWT_TOKEN`: the token of superuser configured for the broker. It is generated by the `bin/pulsar tokens create --private-key` command. +* `PRIVATE_KEY`: private key path mounted in container, genrated by `bin/pulsar tokens create-key-pair` command. +* `PUBLIC_KEY`: public key path mounted in container, genrated by `bin/pulsar tokens create-key-pair` command. +* `$PWD/secret`: the folder where the private key and public key generated by the `bin/pulsar tokens create-key-pair` command are placed locally +* `REDIRECT_HOST`: the IP address of the front-end server. +* `REDIRECT_PORT`: the port of the front-end server. +* `DRIVER_CLASS_NAME`: the driver class name of the PostgreSQL database. +* `URL`: the JDBC URL of your PostgreSQL database, such as jdbc:postgresql://127.0.0.1:5432/pulsar_manager. The docker image automatically start a local instance of the PostgresSQL database. +* `USERNAME`: the username of PostgreSQL. +* `PASSWORD`: the password of PostgreSQL. +* `LOG_LEVEL`: the level of log. + +* Method 5: use Docker and turn on **token authentication** and **token management** by secret key. + + +``` +export JWT_TOKEN="your-token" +export SECRET_KEY="file:///pulsar-manager/secret/my-secret.key" +docker run -it -p 9527:9527 -p 7750:7750 -e REDIRECT_HOST=http://localhost -e REDIRECT_PORT=9527 -e DRIVER_CLASS_NAME=org.postgresql.Driver -e URL='jdbc:postgresql://127.0.0.1:5432/pulsar_manager' -e USERNAME=pulsar -e PASSWORD=pulsar -e LOG_LEVEL=DEBUG -e JWT_TOKEN=$JWT_TOKEN -e SECRET_KEY=$SECRET_KEY -v $PWD:/data -v $PWD/secret:/pulsar-manager/secret apachepulsar/pulsar-manager:v0.2.0 /bin/sh +``` + +* `JWT_TOKEN`: the token of superuser configured for the broker. It is generated by the `bin/pulsar tokens create --secret-key` command. +* `SECRET_KEY`: secret key path mounted in container, genrated by `bin/pulsar tokens create-secret-key` command. +* `$PWD/secret`: the folder where the secret key generated by the `bin/pulsar tokens create-secret-key` command are placed locally +* `REDIRECT_HOST`: the IP address of the front-end server. +* `REDIRECT_PORT`: the port of the front-end server. +* `DRIVER_CLASS_NAME`: the driver class name of the PostgreSQL database. +* `URL`: the JDBC URL of your PostgreSQL database, such as jdbc:postgresql://127.0.0.1:5432/pulsar_manager. The docker image automatically start a local instance of the PostgresSQL database. +* `USERNAME`: the username of PostgreSQL. +* `PASSWORD`: the password of PostgreSQL. +* `LOG_LEVEL`: the level of log. + +* For more information about backend configurations, see [here](https://github.com/apache/pulsar-manager/blob/master/src/README.md). +* For more information about frontend configurations, see [here](https://github.com/apache/pulsar-manager/tree/master/front-end). + +## Log in + +[Set the administrator account and password](#set-administrator-account-and-password). + +Visit http://localhost:9527 to log in. diff --git a/site2/website/versioned_docs/version-2.7.0/administration-zk-bk.md b/site2/website/versioned_docs/version-2.7.0/administration-zk-bk.md new file mode 100644 index 00000000000000..76401f610e6310 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/administration-zk-bk.md @@ -0,0 +1,348 @@ +--- +id: version-2.7.0-administration-zk-bk +title: ZooKeeper and BookKeeper administration +sidebar_label: ZooKeeper and BookKeeper +original_id: administration-zk-bk +--- + +Pulsar relies on two external systems for essential tasks: + +* [ZooKeeper](https://zookeeper.apache.org/) is responsible for a wide variety of configuration-related and coordination-related tasks. +* [BookKeeper](http://bookkeeper.apache.org/) is responsible for [persistent storage](concepts-architecture-overview.md#persistent-storage) of message data. + +ZooKeeper and BookKeeper are both open-source [Apache](https://www.apache.org/) projects. + +> Skip to the [How Pulsar uses ZooKeeper and BookKeeper](#how-pulsar-uses-zookeeper-and-bookkeeper) section below for a more schematic explanation of the role of these two systems in Pulsar. + + +## ZooKeeper + +Each Pulsar instance relies on two separate ZooKeeper quorums. + +* [Local ZooKeeper](#deploy-local-zookeeper) operates at the cluster level and provides cluster-specific configuration management and coordination. Each Pulsar cluster needs to have a dedicated ZooKeeper cluster. +* [Configuration Store](#deploy-configuration-store) operates at the instance level and provides configuration management for the entire system (and thus across clusters). An independent cluster of machines or the same machines that local ZooKeeper uses can provide the configuration store quorum. + +### Deploy local ZooKeeper + +ZooKeeper manages a variety of essential coordination-related and configuration-related tasks for Pulsar. + +To deploy a Pulsar instance, you need to stand up one local ZooKeeper cluster *per Pulsar cluster*. + +To begin, add all ZooKeeper servers to the quorum configuration specified in the [`conf/zookeeper.conf`](reference-configuration.md#zookeeper) file. Add a `server.N` line for each node in the cluster to the configuration, where `N` is the number of the ZooKeeper node. The following is an example for a three-node cluster: + +```properties +server.1=zk1.us-west.example.com:2888:3888 +server.2=zk2.us-west.example.com:2888:3888 +server.3=zk3.us-west.example.com:2888:3888 +``` + +On each host, you need to specify the node ID in `myid` file of each node, which is in `data/zookeeper` folder of each server by default (you can change the file location via the [`dataDir`](reference-configuration.md#zookeeper-dataDir) parameter). + +> See the [Multi-server setup guide](https://zookeeper.apache.org/doc/r3.4.10/zookeeperAdmin.html#sc_zkMulitServerSetup) in the ZooKeeper documentation for detailed information on `myid` and more. + + +On a ZooKeeper server at `zk1.us-west.example.com`, for example, you can set the `myid` value like this: + +```shell +$ mkdir -p data/zookeeper +$ echo 1 > data/zookeeper/myid +``` + +On `zk2.us-west.example.com` the command is `echo 2 > data/zookeeper/myid` and so on. + +Once you add each server to the `zookeeper.conf` configuration and each server has the appropriate `myid` entry, you can start ZooKeeper on all hosts (in the background, using nohup) with the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool: + +```shell +$ bin/pulsar-daemon start zookeeper +``` + +### Deploy configuration store + +The ZooKeeper cluster configured and started up in the section above is a *local* ZooKeeper cluster that you can use to manage a single Pulsar cluster. In addition to a local cluster, however, a full Pulsar instance also requires a configuration store for handling some instance-level configuration and coordination tasks. + +If you deploy a [single-cluster](#single-cluster-pulsar-instance) instance, you do not need a separate cluster for the configuration store. If, however, you deploy a [multi-cluster](#multi-cluster-pulsar-instance) instance, you need to stand up a separate ZooKeeper cluster for configuration tasks. + +#### Single-cluster Pulsar instance + +If your Pulsar instance consists of just one cluster, then you can deploy a configuration store on the same machines as the local ZooKeeper quorum but run on different TCP ports. + +To deploy a ZooKeeper configuration store in a single-cluster instance, add the same ZooKeeper servers that the local quorom uses to the configuration file in [`conf/global_zookeeper.conf`](reference-configuration.md#configuration-store) using the same method for [local ZooKeeper](#local-zookeeper), but make sure to use a different port (2181 is the default for ZooKeeper). The following is an example that uses port 2184 for a three-node ZooKeeper cluster: + +```properties +clientPort=2184 +server.1=zk1.us-west.example.com:2185:2186 +server.2=zk2.us-west.example.com:2185:2186 +server.3=zk3.us-west.example.com:2185:2186 +``` + +As before, create the `myid` files for each server on `data/global-zookeeper/myid`. + +#### Multi-cluster Pulsar instance + +When you deploy a global Pulsar instance, with clusters distributed across different geographical regions, the configuration store serves as a highly available and strongly consistent metadata store that can tolerate failures and partitions spanning whole regions. + +The key here is to make sure the ZK quorum members are spread across at least 3 regions and that other regions run as observers. + +Again, given the very low expected load on the configuration store servers, you can share the same hosts used for the local ZooKeeper quorum. + +For example, you can assume a Pulsar instance with the following clusters `us-west`, `us-east`, `us-central`, `eu-central`, `ap-south`. Also you can assume, each cluster has its own local ZK servers named such as + +``` +zk[1-3].${CLUSTER}.example.com +``` + +In this scenario you want to pick the quorum participants from few clusters and let all the others be ZK observers. For example, to form a 7 servers quorum, you can pick 3 servers from `us-west`, 2 from `us-central` and 2 from `us-east`. + +This guarantees that writes to configuration store is possible even if one of these regions is unreachable. + +The ZK configuration in all the servers looks like: + +```properties +clientPort=2184 +server.1=zk1.us-west.example.com:2185:2186 +server.2=zk2.us-west.example.com:2185:2186 +server.3=zk3.us-west.example.com:2185:2186 +server.4=zk1.us-central.example.com:2185:2186 +server.5=zk2.us-central.example.com:2185:2186 +server.6=zk3.us-central.example.com:2185:2186:observer +server.7=zk1.us-east.example.com:2185:2186 +server.8=zk2.us-east.example.com:2185:2186 +server.9=zk3.us-east.example.com:2185:2186:observer +server.10=zk1.eu-central.example.com:2185:2186:observer +server.11=zk2.eu-central.example.com:2185:2186:observer +server.12=zk3.eu-central.example.com:2185:2186:observer +server.13=zk1.ap-south.example.com:2185:2186:observer +server.14=zk2.ap-south.example.com:2185:2186:observer +server.15=zk3.ap-south.example.com:2185:2186:observer +``` + +Additionally, ZK observers need to have: + +```properties +peerType=observer +``` + +##### Start the service + +Once your configuration store configuration is in place, you can start up the service using [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) + +```shell +$ bin/pulsar-daemon start configuration-store +``` + + + +### ZooKeeper configuration + +In Pulsar, ZooKeeper configuration is handled by two separate configuration files in the `conf` directory of your Pulsar installation: `conf/zookeeper.conf` for [local ZooKeeper](#local-zookeeper) and `conf/global-zookeeper.conf` for [configuration store](#configuration-store). + +#### Local ZooKeeper + +The [`conf/zookeeper.conf`](reference-configuration.md#zookeeper) file handles the configuration for local ZooKeeper. The table below shows the available parameters: + +|Name|Description|Default| +|---|---|---| +|tickTime| The tick is the basic unit of time in ZooKeeper, measured in milliseconds and used to regulate things like heartbeats and timeouts. tickTime is the length of a single tick. |2000| +|initLimit| The maximum time, in ticks, that the leader ZooKeeper server allows follower ZooKeeper servers to successfully connect and sync. The tick time is set in milliseconds using the tickTime parameter. |10| +|syncLimit| The maximum time, in ticks, that a follower ZooKeeper server is allowed to sync with other ZooKeeper servers. The tick time is set in milliseconds using the tickTime parameter. |5| +|dataDir| The location where ZooKeeper stores in-memory database snapshots as well as the transaction log of updates to the database. |data/zookeeper| +|clientPort| The port on which the ZooKeeper server listens for connections. |2181| +|autopurge.snapRetainCount| In ZooKeeper, auto purge determines how many recent snapshots of the database stored in dataDir to retain within the time interval specified by autopurge.purgeInterval (while deleting the rest). |3| +|autopurge.purgeInterval| The time interval, in hours, which triggers the ZooKeeper database purge task. Setting to a non-zero number enables auto purge; setting to 0 disables. Read this guide before enabling auto purge. |1| +|maxClientCnxns| The maximum number of client connections. Increase this if you need to handle more ZooKeeper clients. |60| + + +#### Configuration Store + +The [`conf/global-zookeeper.conf`](reference-configuration.md#configuration-store) file handles the configuration for configuration store. The table below shows the available parameters: + + +## BookKeeper + +BookKeeper stores all durable message in Pulsar. BookKeeper is a distributed [write-ahead log](https://en.wikipedia.org/wiki/Write-ahead_logging) WAL system that guarantees read consistency of independent message logs calls ledgers. Individual BookKeeper servers are also called *bookies*. + +> To manage message persistence, retention, and expiry in Pulsar, refer to [cookbook](cookbooks-retention-expiry.md). + +### Hardware requirements + +Bookie hosts store message data on disk. To provide optimal performance, ensure that the bookies have a suitable hardware configuration. The following are two key dimensions of bookie hardware capacity: + +- Disk I/O capacity read/write +- Storage capacity + +Message entries written to bookies are always synced to disk before returning an acknowledgement to the Pulsar broker by default. To ensure low write latency, BookKeeper is designed to use multiple devices: + +- A **journal** to ensure durability. For sequential writes, it is critical to have fast [fsync](https://linux.die.net/man/2/fsync) operations on bookie hosts. Typically, small and fast [solid-state drives](https://en.wikipedia.org/wiki/Solid-state_drive) (SSDs) should suffice, or [hard disk drives](https://en.wikipedia.org/wiki/Hard_disk_drive) (HDDs) with a [RAID](https://en.wikipedia.org/wiki/RAID) controller and a battery-backed write cache. Both solutions can reach fsync latency of ~0.4 ms. +- A **ledger storage device** stores data. Writes happen in the background, so write I/O is not a big concern. Reads happen sequentially most of the time and the backlog is drained only in case of consumer drain. To store large amounts of data, a typical configuration involves multiple HDDs with a RAID controller. + +### Configure BookKeeper + +You can configure BookKeeper bookies using the [`conf/bookkeeper.conf`](reference-configuration.md#bookkeeper) configuration file. When you configure each bookie, ensure that the [`zkServers`](reference-configuration.md#bookkeeper-zkServers) parameter is set to the connection string for local ZooKeeper of the Pulsar cluster. + +The minimum configuration changes required in `conf/bookkeeper.conf` are as follows: + +```properties +# Change to point to journal disk mount point +journalDirectory=data/bookkeeper/journal + +# Point to ledger storage disk mount point +ledgerDirectories=data/bookkeeper/ledgers + +# Point to local ZK quorum +zkServers=zk1.example.com:2181,zk2.example.com:2181,zk3.example.com:2181 + +# Change the ledger manager type +ledgerManagerType=hierarchical +``` + +To change the ZooKeeper root path that BookKeeper uses, use `zkLedgersRootPath=/MY-PREFIX/ledgers` instead of `zkServers=localhost:2181/MY-PREFIX`. + +> For more information about BookKeeper, refer to the official [BookKeeper docs](http://bookkeeper.apache.org). + +### Deploy BookKeeper + +BookKeeper provides [persistent message storage](concepts-architecture-overview.md#persistent-storage) for Pulsar. Each Pulsar broker has its own cluster of bookies. The BookKeeper cluster shares a local ZooKeeper quorum with the Pulsar cluster. + +### Start bookies manually + +You can start a bookie in the foreground or as a background daemon. + +To start a bookie in the foreground, use the [`bookkeeper`](reference-cli-tools.md#bookkeeper) CLI tool: + +```bash +$ bin/bookkeeper bookie +``` + +To start a bookie in the background, use the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool: + +```bash +$ bin/pulsar-daemon start bookie +``` + +You can verify whether the bookie works properly with the `bookiesanity` command for the [BookKeeper shell](reference-cli-tools.md#bookkeeper-shell): + +```shell +$ bin/bookkeeper shell bookiesanity +``` + +When you use this command, you create a new ledger on the local bookie, write a few entries, read them back and finally delete the ledger. + +### Decommission bookies cleanly + +Before you decommission a bookie, you need to check your environment and meet the following requirements. + +1. Ensure the state of your cluster supports decommissioning the target bookie. Check if `EnsembleSize >= Write Quorum >= Ack Quorum` is `true` with one less bookie. + +2. Ensure the target bookie is listed after using the `listbookies` command. + +3. Ensure that no other process is ongoing (upgrade etc). + +And then you can decommission bookies safely. To decommission bookies, complete the following steps. + +1. Log in to the bookie node, check if there are underreplicated ledgers. The decommission command force to replicate the underreplicated ledgers. +`$ bin/bookkeeper shell listunderreplicated` + +2. Stop the bookie by killing the bookie process. Make sure that no liveness/readiness probes setup for the bookies to spin them back up if you deploy it in a Kubernetes environment. + +3. Run the decommission command. + - If you have logged in to the node to be decommissioned, you do not need to provide `-bookieid`. + - If you are running the decommission command for the target bookie node from another bookie node, you should mention the target bookie ID in the arguments for `-bookieid` + `$ bin/bookkeeper shell decommissionbookie` + or + `$ bin/bookkeeper shell decommissionbookie -bookieid ` + +4. Validate that no ledgers are on the decommissioned bookie. +`$ bin/bookkeeper shell listledgers -bookieid ` + +You can run the following command to check if the bookie you have decommissioned is listed in the bookies list: + +```bash +./bookkeeper shell listbookies -rw -h +./bookkeeper shell listbookies -ro -h +``` + +## BookKeeper persistence policies + +In Pulsar, you can set *persistence policies* at the namespace level, which determines how BookKeeper handles persistent storage of messages. Policies determine four things: + +* The number of acks (guaranteed copies) to wait for each ledger entry. +* The number of bookies to use for a topic. +* The number of writes to make for each ledger entry. +* The throttling rate for mark-delete operations. + +### Set persistence policies + +You can set persistence policies for BookKeeper at the [namespace](reference-terminology.md#namespace) level. + +#### Pulsar-admin + +Use the [`set-persistence`](reference-pulsar-admin.md#namespaces-set-persistence) subcommand and specify a namespace as well as any policies that you want to apply. The available flags are: + +Flag | Description | Default +:----|:------------|:------- +`-a`, `--bookkeeper-ack-quorom` | The number of acks (guaranteed copies) to wait on for each entry | 0 +`-e`, `--bookkeeper-ensemble` | The number of [bookies](reference-terminology.md#bookie) to use for topics in the namespace | 0 +`-w`, `--bookkeeper-write-quorum` | The number of writes to make for each entry | 0 +`-r`, `--ml-mark-delete-max-rate` | Throttling rate for mark-delete operations (0 means no throttle) | 0 + +The following is an example: + +```shell +$ pulsar-admin namespaces set-persistence my-tenant/my-ns \ + --bookkeeper-ack-quorom 3 \ + --bookeeper-ensemble 2 +``` + +#### REST API + +{@inject: endpoint|POST|/admin/v2/namespaces/:tenant/:namespace/persistence|operation/setPersistence} + +#### Java + +```java +int bkEnsemble = 2; +int bkQuorum = 3; +int bkAckQuorum = 2; +double markDeleteRate = 0.7; +PersistencePolicies policies = + new PersistencePolicies(ensemble, quorum, ackQuorum, markDeleteRate); +admin.namespaces().setPersistence(namespace, policies); +``` + +### List persistence policies + +You can see which persistence policy currently applies to a namespace. + +#### Pulsar-admin + +Use the [`get-persistence`](reference-pulsar-admin.md#namespaces-get-persistence) subcommand and specify the namespace. + +The following is an example: + +```shell +$ pulsar-admin namespaces get-persistence my-tenant/my-ns +{ + "bookkeeperEnsemble": 1, + "bookkeeperWriteQuorum": 1, + "bookkeeperAckQuorum", 1, + "managedLedgerMaxMarkDeleteRate": 0 +} +``` + +#### REST API + +{@inject: endpoint|GET|/admin/v2/namespaces/:tenant/:namespace/persistence|operation/getPersistence} + +#### Java + +```java +PersistencePolicies policies = admin.namespaces().getPersistence(namespace); +``` + +## How Pulsar uses ZooKeeper and BookKeeper + +This diagram illustrates the role of ZooKeeper and BookKeeper in a Pulsar cluster: + +![ZooKeeper and BookKeeper](assets/pulsar-system-architecture.png) + +Each Pulsar cluster consists of one or more message brokers. Each broker relies on an ensemble of bookies. diff --git a/site2/website/versioned_docs/version-2.7.0/client-libraries-java.md b/site2/website/versioned_docs/version-2.7.0/client-libraries-java.md new file mode 100644 index 00000000000000..e118136394bfde --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/client-libraries-java.md @@ -0,0 +1,882 @@ +--- +id: version-2.7.0-client-libraries-java +title: Pulsar Java client +sidebar_label: Java +original_id: client-libraries-java +--- + +You can use Pulsar Java client to create Java [producer](#producer), [consumer](#consumer), and [readers](#reader-interface) of messages and to perform [administrative tasks](admin-api-overview.md). The current version of the Java client is **{{pulsar:version}}**. + +All the methods in [producer](#producer), [consumer](#consumer), and [reader](#reader) of a Java client are thread-safe. + +Javadoc for the Pulsar client is divided into two domains by package as follows. + +Package | Description | Maven Artifact +:-------|:------------|:-------------- +[`org.apache.pulsar.client.api`](/api/client) | The producer and consumer API | [org.apache.pulsar:pulsar-client:{{pulsar:version}}](http://search.maven.org/#artifactdetails%7Corg.apache.pulsar%7Cpulsar-client%7C{{pulsar:version}}%7Cjar) +[`org.apache.pulsar.client.admin`](/api/admin) | The Java [admin API](admin-api-overview.md) | [org.apache.pulsar:pulsar-client-admin:{{pulsar:version}}](http://search.maven.org/#artifactdetails%7Corg.apache.pulsar%7Cpulsar-client-admin%7C{{pulsar:version}}%7Cjar) + +This document focuses only on the client API for producing and consuming messages on Pulsar topics. For how to use the Java admin client, see [Pulsar admin interface](admin-api-overview.md). + +## Installation + +The latest version of the Pulsar Java client library is available via [Maven Central](http://search.maven.org/#artifactdetails%7Corg.apache.pulsar%7Cpulsar-client%7C{{pulsar:version}}%7Cjar). To use the latest version, add the `pulsar-client` library to your build configuration. + +### Maven + +If you use Maven, add the following information to the `pom.xml` file. + +```xml + +{{pulsar:version}} + + + + org.apache.pulsar + pulsar-client + ${pulsar.version} + +``` + +### Gradle + +If you use Gradle, add the following information to the `build.gradle` file. + +```groovy +def pulsarVersion = '{{pulsar:version}}' + +dependencies { + compile group: 'org.apache.pulsar', name: 'pulsar-client', version: pulsarVersion +} +``` + +## Connection URLs + +To connect to Pulsar using client libraries, you need to specify a [Pulsar protocol](developing-binary-protocol.md) URL. + +You can assign Pulsar protocol URLs to specific clusters and use the `pulsar` scheme. The default port is `6650`. The following is an example of `localhost`. + +```http +pulsar://localhost:6650 +``` + +If you have multiple brokers, the URL is as follows. + +```http +pulsar://localhost:6550,localhost:6651,localhost:6652 +``` + +A URL for a production Pulsar cluster is as follows. + +```http +pulsar://pulsar.us-west.example.com:6650 +``` + +If you use [TLS](security-tls-authentication.md) authentication, the URL is as follows. + +```http +pulsar+ssl://pulsar.us-west.example.com:6651 +``` + +## Client + +You can instantiate a {@inject: javadoc:PulsarClient:/client/org/apache/pulsar/client/api/PulsarClient} object using just a URL for the target Pulsar [cluster](reference-terminology.md#cluster) like this: + +```java +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://localhost:6650") + .build(); +``` + +If you have multiple brokers, you can initiate a PulsarClient like this: +```java +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://localhost:6650,localhost:6651,localhost:6652") + .build(); +``` + +> ### Default broker URLs for standalone clusters +> If you run a cluster in [standalone mode](getting-started-standalone.md), the broker is available at the `pulsar://localhost:6650` URL by default. + +If you create a client, you can use the `loadConf` configuration. The following parameters are available in `loadConf`. + +| Type | Name |
Description
| Default +|---|---|---|--- +String | `serviceUrl` |Service URL provider for Pulsar service | None +String | `authPluginClassName` | Name of the authentication plugin | None +String | `authParams` | String represents parameters for the authentication plugin

**Example**
key1:val1,key2:val2|None +long|`operationTimeoutMs`|Operation timeout |30000 +long|`statsIntervalSeconds`|Interval between each stats info

Stats is activated with positive `statsInterval`

Set `statsIntervalSeconds` to 1 second at least |60 +int|`numIoThreads`| The number of threads used for handling connections to brokers | 1 +int|`numListenerThreads`|The number of threads used for handling message listeners | 1 +boolean|`useTcpNoDelay`|Whether to use TCP no-delay flag on the connection to disable Nagle algorithm |true +boolean |`useTls` |Whether to use TLS encryption on the connection| false +string | `tlsTrustCertsFilePath` |Path to the trusted TLS certificate file|None +boolean|`tlsAllowInsecureConnection`|Whether the Pulsar client accepts untrusted TLS certificate from broker | false +boolean | `tlsHostnameVerificationEnable` | Whether to enable TLS hostname verification|false +int|`concurrentLookupRequest`|The number of concurrent lookup requests allowed to send on each broker connection to prevent overload on broker|5000 +int|`maxLookupRequest`|The maximum number of lookup requests allowed on each broker connection to prevent overload on broker | 50000 +int|`maxNumberOfRejectedRequestPerConnection`|The maximum number of rejected requests of a broker in a certain time frame (30 seconds) after the current connection is closed and the client creates a new connection to connect to a different broker|50 +int|`keepAliveIntervalSeconds`|Seconds of keeping alive interval for each client broker connection|30 +int|`connectionTimeoutMs`|Duration of waiting for a connection to a broker to be established

If the duration passes without a response from a broker, the connection attempt is dropped|10000 +int|`requestTimeoutMs`|Maximum duration for completing a request |60000 +int|`defaultBackoffIntervalNanos`| Default duration for a backoff interval | TimeUnit.MILLISECONDS.toNanos(100); +long|`maxBackoffIntervalNanos`|Maximum duration for a backoff interval|TimeUnit.SECONDS.toNanos(30) + +Check out the Javadoc for the {@inject: javadoc:PulsarClient:/client/org/apache/pulsar/client/api/PulsarClient} class for a full list of configurable parameters. + +> In addition to client-level configuration, you can also apply [producer](#configuring-producers) and [consumer](#configuring-consumers) specific configuration as described in sections below. + +## Producer + +In Pulsar, producers write messages to topics. Once you've instantiated a {@inject: javadoc:PulsarClient:/client/org/apache/pulsar/client/api/PulsarClient} object (as in the section [above](#client-configuration)), you can create a {@inject: javadoc:Producer:/client/org/apache/pulsar/client/api/Producer} for a specific Pulsar [topic](reference-terminology.md#topic). + +```java +Producer producer = client.newProducer() + .topic("my-topic") + .create(); + +// You can then send messages to the broker and topic you specified: +producer.send("My message".getBytes()); +``` + +By default, producers produce messages that consist of byte arrays. You can produce different types by specifying a message [schema](#schemas). + +```java +Producer stringProducer = client.newProducer(Schema.STRING) + .topic("my-topic") + .create(); +stringProducer.send("My message"); +``` + +> Make sure that you close your producers, consumers, and clients when you do not need them. +> ```java +> producer.close(); +> consumer.close(); +> client.close(); +> ``` +> +> Close operations can also be asynchronous: +> ```java +> producer.closeAsync() +> .thenRun(() -> System.out.println("Producer closed")) +> .exceptionally((ex) -> { +> System.err.println("Failed to close producer: " + ex); +> return null; +> }); +> ``` + +### Configure producer + +If you instantiate a `Producer` object by specifying only a topic name as the example above, use the default configuration for producer. + +If you create a producer, you can use the `loadConf` configuration. The following parameters are available in `loadConf`. + +Type | Name|
Description
| Default +|---|---|---|--- +String| `topicName`| Topic name| null| +String|`producerName`|Producer name| null +long|`sendTimeoutMs`|Message send timeout in ms.

If a message is not acknowledged by a server before the `sendTimeout` expires, an error occurs.|30000 +boolean|`blockIfQueueFull`|If it is set to `true`, when the outgoing message queue is full, the `Send` and `SendAsync` methods of producer block, rather than failing and throwing errors.

If it is set to `false`, when the outgoing message queue is full, the `Send` and `SendAsync` methods of producer fail and `ProducerQueueIsFullError` exceptions occur.

The `MaxPendingMessages` parameter determines the size of the outgoing message queue.|false +int|`maxPendingMessages`|The maximum size of a queue holding pending messages.

For example, a message waiting to receive an acknowledgment from a [broker](reference-terminology.md#broker).

By default, when the queue is full, all calls to the `Send` and `SendAsync` methods fail **unless** you set `BlockIfQueueFull` to `true`.|1000 +int|`maxPendingMessagesAcrossPartitions`|The maximum number of pending messages across partitions.

Use the setting to lower the max pending messages for each partition ({@link #setMaxPendingMessages(int)}) if the total number exceeds the configured value.|50000 +MessageRoutingMode|`messageRoutingMode`|Message routing logic for producers on [partitioned topics](concepts-architecture-overview.md#partitioned-topics).

Apply the logic only when setting no key on messages.

Available options are as follows:

  • `pulsar.RoundRobinDistribution`: round robin

  • `pulsar.UseSinglePartition`: publish all messages to a single partition

  • `pulsar.CustomPartition`: a custom partitioning scheme|`pulsar.RoundRobinDistribution` +HashingScheme|`hashingScheme`|Hashing function determining the partition where you publish a particular message (**partitioned topics only**).

    Available options are as follows:

  • `pulsar.JavaStringHash`: the equivalent of `String.hashCode()` in Java

  • `pulsar.Murmur3_32Hash`: applies the [Murmur3](https://en.wikipedia.org/wiki/MurmurHash) hashing function

  • `pulsar.BoostHash`: applies the hashing function from C++'s [Boost](https://www.boost.org/doc/libs/1_62_0/doc/html/hash.html) library |`HashingScheme.JavaStringHash` +ProducerCryptoFailureAction|`cryptoFailureAction`|Producer should take action when encryption fails.

  • **FAIL**: if encryption fails, unencrypted messages fail to send.

  • **SEND**: if encryption fails, unencrypted messages are sent. |`ProducerCryptoFailureAction.FAIL` +long|`batchingMaxPublishDelayMicros`|Batching time period of sending messages.|TimeUnit.MILLISECONDS.toMicros(1) +int|batchingMaxMessages|The maximum number of messages permitted in a batch.|1000 +boolean|`batchingEnabled`|Enable batching of messages. |true +CompressionType|`compressionType`|Message data compression type used by a producer.

    Available options:
  • [`LZ4`](https://github.com/lz4/lz4)
  • [`ZLIB`](https://zlib.net/)
  • [`ZSTD`](https://facebook.github.io/zstd/)
  • [`SNAPPY`](https://google.github.io/snappy/)| No compression + +You can configure parameters if you do not want to use the default configuration. + +For a full list, see the Javadoc for the {@inject: javadoc:ProducerBuilder:/client/org/apache/pulsar/client/api/ProducerBuilder} class. The following is an example. + +```java +Producer producer = client.newProducer() + .topic("my-topic") + .batchingMaxPublishDelay(10, TimeUnit.MILLISECONDS) + .sendTimeout(10, TimeUnit.SECONDS) + .blockIfQueueFull(true) + .create(); +``` + +### Message routing + +When using partitioned topics, you can specify the routing mode whenever you publish messages using a producer. For more information on specifying a routing mode using the Java client, see the [Partitioned Topics](cookbooks-partitioned.md) cookbook. + +### Async send + +You can publish messages [asynchronously](concepts-messaging.md#send-modes) using the Java client. With async send, the producer puts the message in a blocking queue and returns it immediately. Then the client library sends the message to the broker in the background. If the queue is full (max size configurable), the producer is blocked or fails immediately when calling the API, depending on arguments passed to the producer. + +The following is an example. + +```java +producer.sendAsync("my-async-message".getBytes()).thenAccept(msgId -> { + System.out.printf("Message with ID %s successfully sent", msgId); +}); +``` + +As you can see from the example above, async send operations return a {@inject: javadoc:MessageId:/client/org/apache/pulsar/client/api/MessageId} wrapped in a [`CompletableFuture`](http://www.baeldung.com/java-completablefuture). + +### Configure messages + +In addition to a value, you can set additional items on a given message: + +```java +producer.newMessage() + .key("my-message-key") + .value("my-async-message".getBytes()) + .property("my-key", "my-value") + .property("my-other-key", "my-other-value") + .send(); +``` + +You can terminate the builder chain with `sendAsync()` and get a future return. + +## Consumer + +In Pulsar, consumers subscribe to topics and handle messages that producers publish to those topics. You can instantiate a new [consumer](reference-terminology.md#consumer) by first instantiating a {@inject: javadoc:PulsarClient:/client/org/apache/pulsar/client/api/PulsarClient} object and passing it a URL for a Pulsar broker (as [above](#client-configuration)). + +Once you've instantiated a {@inject: javadoc:PulsarClient:/client/org/apache/pulsar/client/api/PulsarClient} object, you can create a {@inject: javadoc:Consumer:/client/org/apache/pulsar/client/api/Consumer} by specifying a [topic](reference-terminology.md#topic) and a [subscription](concepts-messaging.md#subscription-modes). + +```java +Consumer consumer = client.newConsumer() + .topic("my-topic") + .subscriptionName("my-subscription") + .subscribe(); +``` + +The `subscribe` method will auto subscribe the consumer to the specified topic and subscription. One way to make the consumer listen on the topic is to set up a `while` loop. In this example loop, the consumer listens for messages, prints the contents of any received message, and then [acknowledges](reference-terminology.md#acknowledgment-ack) that the message has been processed. If the processing logic fails, you can use [negative acknowledgement](reference-terminology.md#acknowledgment-ack) to redeliver the message later. + +```java +while (true) { + // Wait for a message + Message msg = consumer.receive(); + + try { + // Do something with the message + System.out.printf("Message received: %s", new String(msg.getData())); + + // Acknowledge the message so that it can be deleted by the message broker + consumer.acknowledge(msg); + } catch (Exception e) { + // Message failed to process, redeliver later + consumer.negativeAcknowledge(msg); + } +} +``` + +### Configure consumer + +If you instantiate a `Consumer` object by specifying only a topic and subscription name as in the example above, the consumer uses the default configuration. + +When you create a consumer, you can use the `loadConf` configuration. The following parameters are available in `loadConf`. + +Type | Name|
    Description
    | Default +|---|---|---|--- +Set<String>| `topicNames`| Topic name| Sets.newTreeSet() +Pattern| `topicsPattern`| Topic pattern |None +String| `subscriptionName`| Subscription name| None +SubscriptionType| `subscriptionType`| Subscription type

    Three subscription types are available:
  • Exclusive
  • Failover
  • Shared
  • |SubscriptionType.Exclusive +int | `receiverQueueSize` | Size of a consumer's receiver queue.

    For example, the number of messages accumulated by a consumer before an application calls `Receive`.

    A value higher than the default value increases consumer throughput, though at the expense of more memory utilization.| 1000 +long|`acknowledgementsGroupTimeMicros`|Group a consumer acknowledgment for a specified time.

    By default, a consumer uses 100ms grouping time to send out acknowledgments to a broker.

    Setting a group time of 0 sends out acknowledgments immediately.

    A longer ack group time is more efficient at the expense of a slight increase in message re-deliveries after a failure.|TimeUnit.MILLISECONDS.toMicros(100) +long|`negativeAckRedeliveryDelayMicros`|Delay to wait before redelivering messages that failed to be processed.

    When an application uses {@link Consumer#negativeAcknowledge(Message)}, failed messages are redelivered after a fixed timeout. |TimeUnit.MINUTES.toMicros(1) +int |`maxTotalReceiverQueueSizeAcrossPartitions`|The max total receiver queue size across partitions.

    This setting reduces the receiver queue size for individual partitions if the total receiver queue size exceeds this value.|50000 +String|`consumerName`|Consumer name|null +long|`ackTimeoutMillis`|Timeout of unacked messages|0 +long|`tickDurationMillis`|Granularity of the ack-timeout redelivery.

    Using an higher `tickDurationMillis` reduces the memory overhead to track messages when setting ack-timeout to a bigger value (for example, 1 hour).|1000 +int|`priorityLevel`|Priority level for a consumer to which a broker gives more priority while dispatching messages in the shared subscription mode.

    The broker follows descending priorities. For example, 0=max-priority, 1, 2,...

    In shared subscription mode, the broker **first dispatches messages to the max priority level consumers if they have permits**. Otherwise, the broker considers next priority level consumers.

    **Example 1**

    If a subscription has consumerA with `priorityLevel` 0 and consumerB with `priorityLevel` 1, then the broker **only dispatches messages to consumerA until it runs out permits** and then starts dispatching messages to consumerB.

    **Example 2**

    Consumer Priority, Level, Permits
    C1, 0, 2
    C2, 0, 1
    C3, 0, 1
    C4, 1, 2
    C5, 1, 1

    Order in which a broker dispatches messages to consumers is: C1, C2, C3, C1, C4, C5, C4.|0 +ConsumerCryptoFailureAction|`cryptoFailureAction`|Consumer should take action when it receives a message that can not be decrypted.

  • **FAIL**: this is the default option to fail messages until crypto succeeds.

  • **DISCARD**:silently acknowledge and not deliver message to an application.

  • **CONSUME**: deliver encrypted messages to applications. It is the application's responsibility to decrypt the message.

    The decompression of message fails.

    If messages contain batch messages, a client is not be able to retrieve individual messages in batch.

    Delivered encrypted message contains {@link EncryptionContext} which contains encryption and compression information in it using which application can decrypt consumed message payload.|ConsumerCryptoFailureAction.FAIL
  • +SortedMap|`properties`|A name or value property of this consumer.

    `properties` is application defined metadata attached to a consumer.

    When getting a topic stats, associate this metadata with the consumer stats for easier identification.|new TreeMap<>() +boolean|`readCompacted`|If enabling `readCompacted`, a consumer reads messages from a compacted topic rather than reading a full message backlog of a topic.

    A consumer only sees the latest value for each key in the compacted topic, up until reaching the point in the topic message when compacting backlog. Beyond that point, send messages as normal.

    Only enabling `readCompacted` on subscriptions to persistent topics, which have a single active consumer (like failure or exclusive subscriptions).

    Attempting to enable it on subscriptions to non-persistent topics or on shared subscriptions leads to a subscription call throwing a `PulsarClientException`.|false +SubscriptionInitialPosition|`subscriptionInitialPosition`|Initial position at which to set cursor when subscribing to a topic at first time.|SubscriptionInitialPosition.Latest +int|`patternAutoDiscoveryPeriod`|Topic auto discovery period when using a pattern for topic's consumer.

    The default and minimum value is 1 minute.|1 +RegexSubscriptionMode|`regexSubscriptionMode`|When subscribing to a topic using a regular expression, you can pick a certain type of topics.

  • **PersistentOnly**: only subscribe to persistent topics.

  • **NonPersistentOnly**: only subscribe to non-persistent topics.

  • **AllTopics**: subscribe to both persistent and non-persistent topics.
  • |RegexSubscriptionMode.PersistentOnly +DeadLetterPolicy|`deadLetterPolicy`|Dead letter policy for consumers.

    By default, some messages are probably redelivered many times, even to the extent that it never stops.

    By using the dead letter mechanism, messages have the max redelivery count. **When exceeding the maximum number of redeliveries, messages are sent to the Dead Letter Topic and acknowledged automatically**.

    You can enable the dead letter mechanism by setting `deadLetterPolicy`.

    **Example**

    client.newConsumer()
    .deadLetterPolicy(DeadLetterPolicy.builder().maxRedeliverCount(10).build())
    .subscribe();


    Default dead letter topic name is `{TopicName}-{Subscription}-DLQ`.

    To set a custom dead letter topic name:
    client.newConsumer()
    .deadLetterPolicy(DeadLetterPolicy.builder().maxRedeliverCount(10)
    .deadLetterTopic("your-topic-name").build())
    .subscribe();


    When specifying the dead letter policy while not specifying `ackTimeoutMillis`, you can set the ack timeout to 30000 millisecond.|None +boolean|`autoUpdatePartitions`|If `autoUpdatePartitions` is enabled, a consumer subscribes to partition increasement automatically.

    **Note**: this is only for partitioned consumers.|true +boolean|`replicateSubscriptionState`|If `replicateSubscriptionState` is enabled, a subscription state is replicated to geo-replicated clusters.|false + +You can configure parameters if you do not want to use the default configuration. For a full list, see the Javadoc for the {@inject: javadoc:ConsumerBuilder:/client/org/apache/pulsar/client/api/ConsumerBuilder} class. + +The following is an example. + +```java +Consumer consumer = client.newConsumer() + .topic("my-topic") + .subscriptionName("my-subscription") + .ackTimeout(10, TimeUnit.SECONDS) + .subscriptionType(SubscriptionType.Exclusive) + .subscribe(); +``` + +### Async receive + +The `receive` method receives messages synchronously (the consumer process is blocked until a message is available). You can also use [async receive](concepts-messaging.md#receive-modes), which returns a [`CompletableFuture`](http://www.baeldung.com/java-completablefuture) object immediately once a new message is available. + +The following is an example. + +```java +CompletableFuture asyncMessage = consumer.receiveAsync(); +``` + +Async receive operations return a {@inject: javadoc:Message:/client/org/apache/pulsar/client/api/Message} wrapped inside of a [`CompletableFuture`](http://www.baeldung.com/java-completablefuture). + +### Batch receive + +Use `batchReceive` to receive multiple messages for each call. + +The following is an example. + +```java +Messages messages = consumer.batchReceive(); +for (Object message : messages) { + // do something +} +consumer.acknowledge(messages) +``` + +> Note: +> +> Batch receive policy limits the number and bytes of messages in a single batch. You can specify a timeout to wait for enough messages. +> +> The batch receive is completed if any of the following condition is met: enough number of messages, bytes of messages, wait timeout. +> +> ```java +> Consumer consumer = client.newConsumer() +> .topic("my-topic") +> .subscriptionName("my-subscription") +> .batchReceivePolicy(BatchReceivePolicy.builder() +> .maxNumMessages(100) +> .maxNumBytes(1024 * 1024) +> .timeout(200, TimeUnit.MILLISECONDS) +> .build()) +> .subscribe(); +> ``` +> The default batch receive policy is: +> ```java +> BatchReceivePolicy.builder() +> .maxNumMessage(-1) +> .maxNumBytes(10 * 1024 * 1024) +> .timeout(100, TimeUnit.MILLISECONDS) +> .build(); +> ``` + +### Multi-topic subscriptions + +In addition to subscribing a consumer to a single Pulsar topic, you can also subscribe to multiple topics simultaneously using [multi-topic subscriptions](concepts-messaging.md#multi-topic-subscriptions). To use multi-topic subscriptions you can supply either a regular expression (regex) or a `List` of topics. If you select topics via regex, all topics must be within the same Pulsar namespace. + +The followings are some examples. + +```java +import org.apache.pulsar.client.api.Consumer; +import org.apache.pulsar.client.api.PulsarClient; + +import java.util.Arrays; +import java.util.List; +import java.util.regex.Pattern; + +ConsumerBuilder consumerBuilder = pulsarClient.newConsumer() + .subscriptionName(subscription); + +// Subscribe to all topics in a namespace +Pattern allTopicsInNamespace = Pattern.compile("public/default/.*"); +Consumer allTopicsConsumer = consumerBuilder + .topicsPattern(allTopicsInNamespace) + .subscribe(); + +// Subscribe to a subsets of topics in a namespace, based on regex +Pattern someTopicsInNamespace = Pattern.compile("public/default/foo.*"); +Consumer allTopicsConsumer = consumerBuilder + .topicsPattern(someTopicsInNamespace) + .subscribe(); +``` + +In the above example, the consumer subscribes to the `persistent` topics that can match the topic name pattern. If you want the consumer subscribes to all `persistent` and `non-persistent` topics that can match the topic name pattern, set `subscriptionTopicsMode` to `RegexSubscriptionMode.AllTopics`. + +```java +Pattern pattern = Pattern.compile("public/default/.*"); +pulsarClient.newConsumer() + .subscriptionName("my-sub") + .topicsPattern(pattern) + .subscriptionTopicsMode(RegexSubscriptionMode.AllTopics) + .subscribe(); +``` + +> #### Note +> +> By default, the `subscriptionTopicsMode` of the consumer is `PersistentOnly`. Available options of `subscriptionTopicsMode` are `PersistentOnly`, `NonPersistentOnly`, and `AllTopics`. + +You can also subscribe to an explicit list of topics (across namespaces if you wish): + +```java +List topics = Arrays.asList( + "topic-1", + "topic-2", + "topic-3" +); + +Consumer multiTopicConsumer = consumerBuilder + .topics(topics) + .subscribe(); + +// Alternatively: +Consumer multiTopicConsumer = consumerBuilder + .topic( + "topic-1", + "topic-2", + "topic-3" + ) + .subscribe(); +``` + +You can also subscribe to multiple topics asynchronously using the `subscribeAsync` method rather than the synchronous `subscribe` method. The following is an example. + +```java +Pattern allTopicsInNamespace = Pattern.compile("persistent://public/default.*"); +consumerBuilder + .topics(topics) + .subscribeAsync() + .thenAccept(this::receiveMessageFromConsumer); + +private void receiveMessageFromConsumer(Object consumer) { + ((Consumer)consumer).receiveAsync().thenAccept(message -> { + // Do something with the received message + receiveMessageFromConsumer(consumer); + }); +} +``` + +### Subscription modes + +Pulsar has various [subscription modes](concepts-messaging#subscription-modes) to match different scenarios. A topic can have multiple subscriptions with different subscription modes. However, a subscription can only have one subscription mode at a time. + +A subscription is identical with the subscription name which can specify only one subscription mode at a time. You cannot change the subscription mode unless all existing consumers of this subscription are offline. + +Different subscription modes have different message distribution modes. This section describes the differences of subscription modes and how to use them. + +In order to better describe their differences, assuming you have a topic named "my-topic", and the producer has published 10 messages. + +```java +Producer producer = client.newProducer(Schema.STRING) + .topic("my-topic") + .enableBatching(false) + .create(); +// 3 messages with "key-1", 3 messages with "key-2", 2 messages with "key-3" and 2 messages with "key-4" +producer.newMessage().key("key-1").value("message-1-1").send(); +producer.newMessage().key("key-1").value("message-1-2").send(); +producer.newMessage().key("key-1").value("message-1-3").send(); +producer.newMessage().key("key-2").value("message-2-1").send(); +producer.newMessage().key("key-2").value("message-2-2").send(); +producer.newMessage().key("key-2").value("message-2-3").send(); +producer.newMessage().key("key-3").value("message-3-1").send(); +producer.newMessage().key("key-3").value("message-3-2").send(); +producer.newMessage().key("key-4").value("message-4-1").send(); +producer.newMessage().key("key-4").value("message-4-2").send(); +``` + +#### Exclusive + +Create a new consumer and subscribe with the `Exclusive` subscription mode. + +```java +Consumer consumer = client.newConsumer() + .topic("my-topic") + .subscriptionName("my-subscription") + .subscriptionType(SubscriptionType.Exclusive) + .subscribe() +``` + +Only the first consumer is allowed to the subscription, other consumers receive an error. The first consumer receives all 10 messages, and the consuming order is the same as the producing order. + +> Note: +> +> If topic is a partitioned topic, the first consumer subscribes to all partitioned topics, other consumers are not assigned with partitions and receive an error. + +#### Failover + +Create new consumers and subscribe with the`Failover` subscription mode. + +```java +Consumer consumer1 = client.newConsumer() + .topic("my-topic") + .subscriptionName("my-subscription") + .subscriptionType(SubscriptionType.Failover) + .subscribe() +Consumer consumer2 = client.newConsumer() + .topic("my-topic") + .subscriptionName("my-subscription") + .subscriptionType(SubscriptionType.Failover) + .subscribe() +//conumser1 is the active consumer, consumer2 is the standby consumer. +//consumer1 receives 5 messages and then crashes, consumer2 takes over as an active consumer. + + +``` + +Multiple consumers can attach to the same subscription, yet only the first consumer is active, and others are standby. When the active consumer is disconnected, messages will be dispatched to one of standby consumers, and the standby consumer then becomes active consumer. + +If the first active consumer is disconnected after receiving 5 messages, the standby consumer becomes active consumer. Consumer1 will receive: + +``` +("key-1", "message-1-1") +("key-1", "message-1-2") +("key-1", "message-1-3") +("key-2", "message-2-1") +("key-2", "message-2-2") +``` + +consumer2 will receive: + +``` +("key-2", "message-2-3") +("key-3", "message-3-1") +("key-3", "message-3-2") +("key-4", "message-4-1") +("key-4", "message-4-2") +``` + +> Note: +> +> If a topic is a partitioned topic, each partition has only one active consumer, messages of one partition are distributed to only one consumer, and messages of multiple partitions are distributed to multiple consumers. + +#### Shared + +Create new consumers and subscribe with `Shared` subscription mode: + +```java +Consumer consumer1 = client.newConsumer() + .topic("my-topic") + .subscriptionName("my-subscription") + .subscriptionType(SubscriptionType.Shared) + .subscribe() + +Consumer consumer2 = client.newConsumer() + .topic("my-topic") + .subscriptionName("my-subscription") + .subscriptionType(SubscriptionType.Shared) + .subscribe() +//Both consumer1 and consumer 2 is active consumers. +``` + +In shared subscription mode, multiple consumers can attach to the same subscription and messages are delivered in a round robin distribution across consumers. + +If a broker dispatches only one message at a time, consumer1 receives the following information. + +``` +("key-1", "message-1-1") +("key-1", "message-1-3") +("key-2", "message-2-2") +("key-3", "message-3-1") +("key-4", "message-4-1") +``` + +consumer2 receives the follwoing information. + +``` +("key-1", "message-1-2") +("key-2", "message-2-1") +("key-2", "message-2-3") +("key-3", "message-3-2") +("key-4", "message-4-2") +``` + +`Shared` subscription is different from `Exclusive` and `Failover` subscription modes. `Shared` subscription has better flexibility, but cannot provide order guarantee. + +#### Key_shared + +This is a new subscription mode since 2.4.0 release, create new consumers and subscribe with `Key_Shared` subscription mode. + +```java +Consumer consumer1 = client.newConsumer() + .topic("my-topic") + .subscriptionName("my-subscription") + .subscriptionType(SubscriptionType.Key_Shared) + .subscribe() + +Consumer consumer2 = client.newConsumer() + .topic("my-topic") + .subscriptionName("my-subscription") + .subscriptionType(SubscriptionType.Key_Shared) + .subscribe() +//Both consumer1 and consumer2 are active consumers. +``` + +`Key_Shared` subscription is like `Shared` subscription, all consumers can attach to the same subscription. But it is different from `Key_Shared` subscription, messages with the same key are delivered to only one consumer in order. The possible distribution of messages between different consumers (by default we do not know in advance which keys will be assigned to a consumer, but a key will only be assigned to a consumer at the same time). + +consumer1 receives the follwoing information. + +``` +("key-1", "message-1-1") +("key-1", "message-1-2") +("key-1", "message-1-3") +("key-3", "message-3-1") +("key-3", "message-3-2") +``` + +consumer2 receives the follwoing information. + +``` +("key-2", "message-2-1") +("key-2", "message-2-2") +("key-2", "message-2-3") +("key-4", "message-4-1") +("key-4", "message-4-2") +``` + +If batching is enabled at the producer side, messages with different keys are added to a batch by default. The broker will dispatch the batch to the consumer, so the default batch mechanism may break the Key_Shared subscription guaranteed message distribution semantics. The producer needs to use the `KeyBasedBatcher`. + +```java +Producer producer = client.newProducer() + .topic("my-topic") + .batcherBuilder(BatcherBuilder.KEY_BASED) + .create(); +``` +Or the producer can disable batching. + +```java +Producer producer = client.newProducer() + .topic("my-topic") + .enableBatching(false) + .create(); +``` +> Note: +> +> If the message key is not specified, messages without key are dispatched to one consumer in order by default. + +## Reader + +With the [reader interface](concepts-clients.md#reader-interface), Pulsar clients can "manually position" themselves within a topic and reading all messages from a specified message onward. The Pulsar API for Java enables you to create {@inject: javadoc:Reader:/client/org/apache/pulsar/client/api/Reader} objects by specifying a topic and a {@inject: javadoc:MessageId:/client/org/apache/pulsar/client/api/MessageId}. + +The following is an example. + +```java +byte[] msgIdBytes = // Some message ID byte array +MessageId id = MessageId.fromByteArray(msgIdBytes); +Reader reader = pulsarClient.newReader() + .topic(topic) + .startMessageId(id) + .create(); + +while (true) { + Message message = reader.readNext(); + // Process message +} +``` + +In the example above, a `Reader` object is instantiated for a specific topic and message (by ID); the reader iterates over each message in the topic after the message is identified by `msgIdBytes` (how that value is obtained depends on the application). + +The code sample above shows pointing the `Reader` object to a specific message (by ID), but you can also use `MessageId.earliest` to point to the earliest available message on the topic of `MessageId.latest` to point to the most recent available message. + +When you create a reader, you can use the `loadConf` configuration. The following parameters are available in `loadConf`. + +| Type | Name |
    Description
    | Default +|---|---|---|--- +String|`topicName`|Topic name. |None +int|`receiverQueueSize`|Size of a consumer's receiver queue.

    For example, the number of messages that can be accumulated by a consumer before an application calls `Receive`.

    A value higher than the default value increases consumer throughput, though at the expense of more memory utilization.|1000 +ReaderListener<T>|`readerListener`|A listener that is called for message received.|None +String|`readerName`|Read name.|null +String|`subscriptionRolePrefix`|Prefix of subscription role. |null +CryptoKeyReader|`cryptoKeyReader`|Interface that abstracts the access to a key store.|null +ConsumerCryptoFailureAction|`cryptoFailureAction`|Consumer should take action when it receives a message that can not be decrypted.

  • **FAIL**: this is the default option to fail messages until crypto succeeds.

  • **DISCARD**: silently acknowledge and not deliver message to an application.

  • **CONSUME**: deliver encrypted messages to applications. It is the application's responsibility to decrypt the message.

    The message decompression fails.

    If messages contain batch messages, a client is not be able to retrieve individual messages in batch.

    Delivered encrypted message contains {@link EncryptionContext} which contains encryption and compression information in it using which application can decrypt consumed message payload.|ConsumerCryptoFailureAction.FAIL
  • +boolean|`readCompacted`|If enabling `readCompacted`, a consumer reads messages from a compacted topic rather than a full message backlog of a topic.

    A consumer only sees the latest value for each key in the compacted topic, up until reaching the point in the topic message when compacting backlog. Beyond that point, send messages as normal.

    `readCompacted` can only be enabled on subscriptions to persistent topics, which have a single active consumer (for example, failure or exclusive subscriptions).

    Attempting to enable it on subscriptions to non-persistent topics or on shared subscriptions leads to a subscription call throwing a `PulsarClientException`.|false +boolean|`resetIncludeHead`|If set to true, the first message to be returned is the one specified by `messageId`.

    If set to false, the first message to be returned is the one next to the message specified by `messageId`.|false + +### Sticky key range reader + +In sticky key range reader, broker will only dispatch messages which hash of the message key contains by the specified key hash range. Multiple key hash ranges can be specified on a reader. + +The following is an example to create a sticky key range reader. + +```java +pulsarClient.newReader() + .topic(topic) + .startMessageId(MessageId.earliest) + .keyHashRange(Range.of(0, 10000), Range.of(20001, 30000)) + .create(); +``` + +Total hash range size is 65536, so the max end of the range should be less than or equal to 65535. + +## Schema + +In Pulsar, all message data consists of byte arrays "under the hood." [Message schemas](schema-get-started.md) enable you to use other types of data when constructing and handling messages (from simple types like strings to more complex, application-specific types). If you construct, say, a [producer](#producers) without specifying a schema, then the producer can only produce messages of type `byte[]`. The following is an example. + +```java +Producer producer = client.newProducer() + .topic(topic) + .create(); +``` + +The producer above is equivalent to a `Producer` (in fact, you should *always* explicitly specify the type). If you'd like to use a producer for a different type of data, you'll need to specify a **schema** that informs Pulsar which data type will be transmitted over the [topic](reference-terminology.md#topic). + +### Schema example + +Let's say that you have a `SensorReading` class that you'd like to transmit over a Pulsar topic: + +```java +public class SensorReading { + public float temperature; + + public SensorReading(float temperature) { + this.temperature = temperature; + } + + // A no-arg constructor is required + public SensorReading() { + } + + public float getTemperature() { + return temperature; + } + + public void setTemperature(float temperature) { + this.temperature = temperature; + } +} +``` + +You could then create a `Producer` (or `Consumer`) like this: + +```java +Producer producer = client.newProducer(JSONSchema.of(SensorReading.class)) + .topic("sensor-readings") + .create(); +``` + +The following schema formats are currently available for Java: + +* No schema or the byte array schema (which can be applied using `Schema.BYTES`): + + ```java + Producer bytesProducer = client.newProducer(Schema.BYTES) + .topic("some-raw-bytes-topic") + .create(); + ``` + + Or, equivalently: + + ```java + Producer bytesProducer = client.newProducer() + .topic("some-raw-bytes-topic") + .create(); + ``` + +* `String` for normal UTF-8-encoded string data. Apply the schema using `Schema.STRING`: + + ```java + Producer stringProducer = client.newProducer(Schema.STRING) + .topic("some-string-topic") + .create(); + ``` + +* Create JSON schemas for POJOs using `Schema.JSON`. The following is an example. + + ```java + Producer pojoProducer = client.newProducer(Schema.JSON(MyPojo.class)) + .topic("some-pojo-topic") + .create(); + ``` + +* Generate Protobuf schemas using `Schema.PROTOBUF`. The following example shows how to create the Protobuf schema and use it to instantiate a new producer: + + ```java + Producer protobufProducer = client.newProducer(Schema.PROTOBUF(MyProtobuf.class)) + .topic("some-protobuf-topic") + .create(); + ``` + +* Define Avro schemas with `Schema.AVRO`. The following code snippet demonstrates how to create and use Avro schema. + + ```java + Producer avroProducer = client.newProducer(Schema.AVRO(MyAvro.class)) + .topic("some-avro-topic") + .create(); + ``` + +## Authentication + +Pulsar currently supports three authentication schemes: [TLS](security-tls-authentication.md), [Athenz](security-athenz.md), and [Oauth2](security-oauth2.md). You can use the Pulsar Java client with all of them. + +### TLS Authentication + +To use [TLS](security-tls-authentication.md), you need to set TLS to `true` using the `setUseTls` method, point your Pulsar client to a TLS cert path, and provide paths to cert and key files. + +The following is an example. + +```java +Map authParams = new HashMap<>(); +authParams.put("tlsCertFile", "/path/to/client-cert.pem"); +authParams.put("tlsKeyFile", "/path/to/client-key.pem"); + +Authentication tlsAuth = AuthenticationFactory + .create(AuthenticationTls.class.getName(), authParams); + +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar+ssl://my-broker.com:6651") + .enableTls(true) + .tlsTrustCertsFilePath("/path/to/cacert.pem") + .authentication(tlsAuth) + .build(); +``` + +### Athenz + +To use [Athenz](security-athenz.md) as an authentication provider, you need to [use TLS](#tls-authentication) and provide values for four parameters in a hash: + +* `tenantDomain` +* `tenantService` +* `providerDomain` +* `privateKey` + +You can also set an optional `keyId`. The following is an example. + +```java +Map authParams = new HashMap<>(); +authParams.put("tenantDomain", "shopping"); // Tenant domain name +authParams.put("tenantService", "some_app"); // Tenant service name +authParams.put("providerDomain", "pulsar"); // Provider domain name +authParams.put("privateKey", "file:///path/to/private.pem"); // Tenant private key path +authParams.put("keyId", "v1"); // Key id for the tenant private key (optional, default: "0") + +Authentication athenzAuth = AuthenticationFactory + .create(AuthenticationAthenz.class.getName(), authParams); + +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar+ssl://my-broker.com:6651") + .enableTls(true) + .tlsTrustCertsFilePath("/path/to/cacert.pem") + .authentication(athenzAuth) + .build(); +``` + +> #### Supported pattern formats +> The `privateKey` parameter supports the following three pattern formats: +> * `file:///path/to/file` +> * `file:/path/to/file` +> * `data:application/x-pem-file;base64,` + +### Oauth2 + +The following example shows how to use [Oauth2](security-oauth2.md) as an authentication provider for the Pulsar Java client. + +You can use the factory method to configure authentication for Pulsar Java client. + +```java +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://broker.example.com:6650/") + .authentication( + AuthenticationFactoryOAuth2.clientCredentials(this.issuerUrl, this.credentialsUrl, this.audience)) + .build(); +``` + +In addition, you can also use the encoded parameters to configure authentication for Pulsar Java client. + +```java +Authentication auth = AuthenticationFactory + .create(AuthenticationOAuth2.class.getName(), "{"type":"client_credentials","privateKey":"...","issuerUrl":"...","audience":"..."}"); +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://broker.example.com:6650/") + .authentication(auth) + .build(); +``` diff --git a/site2/website/versioned_docs/version-2.7.0/client-libraries-node.md b/site2/website/versioned_docs/version-2.7.0/client-libraries-node.md new file mode 100644 index 00000000000000..9cc75851c3eae6 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/client-libraries-node.md @@ -0,0 +1,431 @@ +--- +id: version-2.7.0-client-libraries-node +title: The Pulsar Node.js client +sidebar_label: Node.js +original_id: client-libraries-node +--- + +The Pulsar Node.js client can be used to create Pulsar [producers](#producers), [consumers](#consumers), and [readers](#readers) in Node.js. + +All the methods in [producers](#producers), [consumers](#consumers), and [readers](#readers) of a Node.js client are thread-safe. + +## Installation + +You can install the [`pulsar-client`](https://www.npmjs.com/package/pulsar-client) library via [npm](https://www.npmjs.com/). + +### Requirements +Pulsar Node.js client library is based on the C++ client library. +Follow [these instructions](client-libraries-cpp.md#compilation) and install the Pulsar C++ client library. + +### Compatibility + +Compatibility between each version of the Node.js client and the C++ client is as follows: + +| Node.js client | C++ client | +| :------------- | :------------- | +| 1.0.0 | 2.3.0 or later | +| 1.1.0 | 2.4.0 or later | +| 1.2.0 | 2.5.0 or later | + +If an incompatible version of the C++ client is installed, you may fail to build or run this library. + +### Installation using npm + +Install the `pulsar-client` library via [npm](https://www.npmjs.com/): + +```shell +$ npm install pulsar-client +``` + +> #### Note +> +> Also, this library works only in Node.js 10.x or later because it uses the [`node-addon-api`](https://github.com/nodejs/node-addon-api) module to wrap the C++ library. + +## Connection URLs +To connect to Pulsar using client libraries, you need to specify a [Pulsar protocol](developing-binary-protocol.md) URL. + +Pulsar protocol URLs are assigned to specific clusters, use the `pulsar` scheme and have a default port of 6650. Here is an example for `localhost`: + +```http +pulsar://localhost:6650 +``` + +A URL for a production Pulsar cluster may look something like this: + +```http +pulsar://pulsar.us-west.example.com:6650 +``` + +If you are using [TLS encryption](security-tls-transport.md) or [TLS Authentication](security-tls-authentication.md), the URL will look like something like this: + +```http +pulsar+ssl://pulsar.us-west.example.com:6651 +``` + +## Create a client + +In order to interact with Pulsar, you will first need a client object. You can create a client instance using a `new` operator and the `Client` method, passing in a client options object (more on configuration [below](#client-configuration)). + +Here is an example: + +```JavaScript +const Pulsar = require('pulsar-client'); + +(async () => { + const client = new Pulsar.Client({ + serviceUrl: 'pulsar://localhost:6650', + }); + + await client.close(); +})(); +``` + +### Client configuration + +The following configurable parameters are available for Pulsar clients: + +| Parameter | Description | Default | +| :-------- | :---------- | :------ | +| `serviceUrl` | The connection URL for the Pulsar cluster. See [above](#connection-urls) for more info. | | +| `authentication` | Configure the authentication provider. (default: no authentication). See [TLS Authentication](security-tls-authentication.md) for more info. | | +| `operationTimeoutSeconds` | The timeout for Node.js client operations (creating producers, subscribing to and unsubscribing from [topics](reference-terminology.md#topic)). Retries will occur until this threshold is reached, at which point the operation will fail. | 30 | +| `ioThreads` | The number of threads to use for handling connections to Pulsar [brokers](reference-terminology.md#broker). | 1 | +| `messageListenerThreads` | The number of threads used by message listeners ([consumers](#consumers) and [readers](#readers)). | 1 | +| `concurrentLookupRequest` | The number of concurrent lookup requests that can be sent on each broker connection. Setting a maximum helps to keep from overloading brokers. You should set values over the default of 50000 only if the client needs to produce and/or subscribe to thousands of Pulsar topics. | 50000 | +| `tlsTrustCertsFilePath` | The file path for the trusted TLS certificate. | | +| `tlsValidateHostname` | The boolean value of setup whether to enable TLS hostname verification. | `false` | +| `tlsAllowInsecureConnection` | The boolean value of setup whether the Pulsar client accepts untrusted TLS certificate from broker. | `false` | +| `statsIntervalInSeconds` | Interval between each stat info. Stats is activated with positive statsInterval. The value should be set to 1 second at least | 600 | +| `log` | A function that is used for logging. | `console.log` | + +## Producers + +Pulsar producers publish messages to Pulsar topics. You can [configure](#producer-configuration) Node.js producers using a producer configuration object. + +Here is an example: + +```JavaScript +const producer = await client.createProducer({ + topic: 'my-topic', +}); + +await producer.send({ + data: Buffer.from("Hello, Pulsar"), +}); + +await producer.close(); +``` + +> #### Promise operation +> When you create a new Pulsar producer, the operation will return `Promise` object and get producer instance or an error through executor function. +> In this example, using await operator instead of executor function. + +### Producer operations + +Pulsar Node.js producers have the following methods available: + +| Method | Description | Return type | +| :----- | :---------- | :---------- | +| `send(Object)` | Publishes a [message](#messages) to the producer's topic. When the message is successfully acknowledged by the Pulsar broker, or an error will be thrown, the Promise object run executor function. | `Promise` | +| `flush()` | Sends message from send queue to Pulser broker. When the message is successfully acknowledged by the Pulsar broker, or an error will be thrown, the Promise object run executor function. | `Promise` | +| `close()` | Closes the producer and releases all resources allocated to it. If `close()` is called then no more messages will be accepted from the publisher. This method will return Promise object, and when all pending publish requests have been persisted by Pulsar then run executor function. If an error is thrown, no pending writes will be retried. | `Promise` | + +### Producer configuration + +| Parameter | Description | Default | +| :-------- | :---------- | :------ | +| `topic` | The Pulsar [topic](reference-terminology.md#topic) to which the producer will publish messages. | | +| `producerName` | A name for the producer. If you do not explicitly assign a name, Pulsar will automatically generate a globally unique name. If you choose to explicitly assign a name, it will need to be unique across *all* Pulsar clusters, otherwise the creation operation will throw an error. | | +| `sendTimeoutMs` | When publishing a message to a topic, the producer will wait for an acknowledgment from the responsible Pulsar [broker](reference-terminology.md#broker). If a message is not acknowledged within the threshold set by this parameter, an error will be thrown. If you set `sendTimeoutMs` to -1, the timeout will be set to infinity (and thus removed). Removing the send timeout is recommended when using Pulsar's [message de-duplication](cookbooks-deduplication.md) feature. | 30000 | +| `initialSequenceId` | The initial sequence ID of the message. When producer send message, add sequence ID to message. The ID is increased each time to send. | | +| `maxPendingMessages` | The maximum size of the queue holding pending messages (i.e. messages waiting to receive an acknowledgment from the [broker](reference-terminology.md#broker)). By default, when the queue is full all calls to the `send` method will fail *unless* `blockIfQueueFull` is set to `true`. | 1000 | +| `maxPendingMessagesAcrossPartitions` | The maximum size of the sum of partition's pending queue. | 50000 | +| `blockIfQueueFull` | If set to `true`, the producer's `send` method will wait when the outgoing message queue is full rather than failing and throwing an error (the size of that queue is dictated by the `maxPendingMessages` parameter); if set to `false` (the default), `send` operations will fail and throw a error when the queue is full. | `false` | +| `messageRoutingMode` | The message routing logic (for producers on [partitioned topics](concepts-messaging.md#partitioned-topics)). This logic is applied only when no key is set on messages. The available options are: round robin (`RoundRobinDistribution`), or publishing all messages to a single partition (`UseSinglePartition`, the default). | `UseSinglePartition` | +| `hashingScheme` | The hashing function that determines the partition on which a particular message is published (partitioned topics only). The available options are: `JavaStringHash` (the equivalent of `String.hashCode()` in Java), `Murmur3_32Hash` (applies the [Murmur3](https://en.wikipedia.org/wiki/MurmurHash) hashing function), or `BoostHash` (applies the hashing function from C++'s [Boost](https://www.boost.org/doc/libs/1_62_0/doc/html/hash.html) library). | `BoostHash` | +| `compressionType` | The message data compression type used by the producer. The available options are [`LZ4`](https://github.com/lz4/lz4), and [`Zlib`](https://zlib.net/). | Compression None | +| `batchingEnabled` | If set to `true`, the producer send message as batch. | `true` | +| `batchingMaxPublishDelayMs` | The maximum time of delay sending message in batching. | 10 | +| `batchingMaxMessages` | The maximum size of sending message in each time of batching. | 1000 | +| `properties` | The metadata of producer. | | + +### Producer example + +This example creates a Node.js producer for the `my-topic` topic and sends 10 messages to that topic: + +```JavaScript +const Pulsar = require('pulsar-client'); + +(async () => { + // Create a client + const client = new Pulsar.Client({ + serviceUrl: 'pulsar://localhost:6650', + }); + + // Create a producer + const producer = await client.createProducer({ + topic: 'my-topic', + }); + + // Send messages + for (let i = 0; i < 10; i += 1) { + const msg = `my-message-${i}`; + producer.send({ + data: Buffer.from(msg), + }); + console.log(`Sent message: ${msg}`); + } + await producer.flush(); + + await producer.close(); + await client.close(); +})(); +``` + +## Consumers + +Pulsar consumers subscribe to one or more Pulsar topics and listen for incoming messages produced on that topic/those topics. You can [configure](#consumer-configuration) Node.js consumers using a consumer configuration object. + +Here is an example: + +```JavaScript +const consumer = await client.subscribe({ + topic: 'my-topic', + subscription: 'my-subscription', +}); + +const msg = await consumer.receive(); +console.log(msg.getData().toString()); +consumer.acknowledge(msg); + +await consumer.close(); +``` + +> #### Promise operation +> When you create a new Pulsar consumer, the operation will return `Promise` object and get consumer instance or an error through executor function. +> In this example, using await operator instead of executor function. + +### Consumer operations + +Pulsar Node.js consumers have the following methods available: + +| Method | Description | Return type | +| :----- | :---------- | :---------- | +| `receive()` | Receives a single message from the topic. When the message is available, the Promise object run executor function and get message object. | `Promise` | +| `receive(Number)` | Receives a single message from the topic with specific timeout in milliseconds. | `Promise` | +| `acknowledge(Object)` | [Acknowledges](reference-terminology.md#acknowledgment-ack) a message to the Pulsar [broker](reference-terminology.md#broker) by message object. | `void` | +| `acknowledgeId(Object)` | [Acknowledges](reference-terminology.md#acknowledgment-ack) a message to the Pulsar [broker](reference-terminology.md#broker) by message ID object. | `void` | +| `acknowledgeCumulative(Object)` | [Acknowledges](reference-terminology.md#acknowledgment-ack) *all* the messages in the stream, up to and including the specified message. The `acknowledgeCumulative` method will return void, and send the ack to the broker asynchronously. After that, the messages will *not* be redelivered to the consumer. Cumulative acking can not be used with a [shared](concepts-messaging.md#shared) subscription type. | `void` | +| `acknowledgeCumulativeId(Object)` | [Acknowledges](reference-terminology.md#acknowledgment-ack) *all* the messages in the stream, up to and including the specified message ID. | `void` | +| `negativeAcknowledge(Message)`| [Negatively acknowledges](reference-terminology.md#negative-acknowledgment-nack) a message to the Pulsar broker by message object. | `void` | +| `negativeAcknowledgeId(MessageId)` | [Negatively acknowledges](reference-terminology.md#negative-acknowledgment-nack) a message to the Pulsar broker by message ID object. | `void` | +| `close()` | Closes the consumer, disabling its ability to receive messages from the broker. | `Promise` | + +### Consumer configuration + +| Parameter | Description | Default | +| :-------- | :---------- | :------ | +| `topic` | The Pulsar [topic](reference-terminology.md#topic) on which the consumer will establish a subscription and listen for messages. | | +| `subscription` | The subscription name for this consumer. | | +| `subscriptionType` | Available options are `Exclusive`, `Shared`, and `Failover`. | `Exclusive` | +| `subscriptionInitialPosition` | Initial position at which to set cursor when subscribing to a topic at first time. | `SubscriptionInitialPosition.Latest` | +| `ackTimeoutMs` | Acknowledge timeout in milliseconds. | 0 | +| `nAckRedeliverTimeoutMs` | Delay to wait before redelivering messages that failed to be processed. | 60000 | +| `receiverQueueSize` | Sets the size of the consumer's receiver queue, i.e. the number of messages that can be accumulated by the consumer before the application calls `receive`. A value higher than the default of 1000 could increase consumer throughput, though at the expense of more memory utilization. | 1000 | +| `receiverQueueSizeAcrossPartitions` | Set the max total receiver queue size across partitions. This setting will be used to reduce the receiver queue size for individual partitions if the total exceeds this value. | 50000 | +| `consumerName` | The name of consumer. Currently(v2.4.1), [failover](concepts-messaging.md#failover) mode use consumer name in ordering. | | +| `properties` | The metadata of consumer. | | +| `listener`| A listener that is called for a message received. | | +| `readCompacted`| If enabling `readCompacted`, a consumer reads messages from a compacted topic rather than reading a full message backlog of a topic.

    A consumer only sees the latest value for each key in the compacted topic, up until reaching the point in the topic message when compacting backlog. Beyond that point, send messages as normal.

    `readCompacted` can only be enabled on subscriptions to persistent topics, which have a single active consumer (like failure or exclusive subscriptions).

    Attempting to enable it on subscriptions to non-persistent topics or on shared subscriptions leads to a subscription call throwing a `PulsarClientException`. | false | + +### Consumer example + +This example creates a Node.js consumer with the `my-subscription` subscription on the `my-topic` topic, receives messages, prints the content that arrive, and acknowledges each message to the Pulsar broker for 10 times: + +```JavaScript +const Pulsar = require('pulsar-client'); + +(async () => { + // Create a client + const client = new Pulsar.Client({ + serviceUrl: 'pulsar://localhost:6650', + }); + + // Create a consumer + const consumer = await client.subscribe({ + topic: 'my-topic', + subscription: 'my-subscription', + subscriptionType: 'Exclusive', + }); + + // Receive messages + for (let i = 0; i < 10; i += 1) { + const msg = await consumer.receive(); + console.log(msg.getData().toString()); + consumer.acknowledge(msg); + } + + await consumer.close(); + await client.close(); +})(); +``` + +Instead a consumer can be created with `listener` to process messages. + +```JavaScript +// Create a consumer +const consumer = await client.subscribe({ + topic: 'my-topic', + subscription: 'my-subscription', + subscriptionType: 'Exclusive', + listener: (msg, msgConsumer) => { + console.log(msg.getData().toString()); + msgConsumer.acknowledge(msg); + }, +}); +``` + +## Readers + +Pulsar readers process messages from Pulsar topics. Readers are different from consumers because with readers you need to explicitly specify which message in the stream you want to begin with (consumers, on the other hand, automatically begin with the most recently unacked message). You can [configure](#reader-configuration) Node.js readers using a reader configuration object. + +Here is an example: + +```JavaScript +const reader = await client.createReader({ + topic: 'my-topic', + startMessageId: Pulsar.MessageId.earliest(), +}); + +const msg = await reader.readNext(); +console.log(msg.getData().toString()); + +await reader.close(); +``` + +### Reader operations + +Pulsar Node.js readers have the following methods available: + +| Method | Description | Return type | +| :----- | :---------- | :---------- | +| `readNext()` | Receives the next message on the topic (analogous to the `receive` method for [consumers](#consumer-operations)). When the message is available, the Promise object run executor function and get message object. | `Promise` | +| `readNext(Number)` | Receives a single message from the topic with specific timeout in milliseconds. | `Promise` | +| `hasNext()` | Return whether the broker has next message in target topic. | `Boolean` | +| `close()` | Closes the reader, disabling its ability to receive messages from the broker. | `Promise` | + +### Reader configuration + +| Parameter | Description | Default | +| :-------- | :---------- | :------ | +| `topic` | The Pulsar [topic](reference-terminology.md#topic) on which the reader will establish a subscription and listen for messages. | | +| `startMessageId` | The initial reader position, i.e. the message at which the reader begins processing messages. The options are `Pulsar.MessageId.earliest` (the earliest available message on the topic), `Pulsar.MessageId.latest` (the latest available message on the topic), or a message ID object for a position that is not earliest or latest. | | +| `receiverQueueSize` | Sets the size of the reader's receiver queue, i.e. the number of messages that can be accumulated by the reader before the application calls `readNext`. A value higher than the default of 1000 could increase reader throughput, though at the expense of more memory utilization. | 1000 | +| `readerName` | The name of the reader. | | +| `subscriptionRolePrefix` | The subscription role prefix. | | +| `readCompacted` | If enabling `readCompacted`, a consumer reads messages from a compacted topic rather than reading a full message backlog of a topic.

    A consumer only sees the latest value for each key in the compacted topic, up until reaching the point in the topic message when compacting backlog. Beyond that point, send messages as normal.

    `readCompacted` can only be enabled on subscriptions to persistent topics, which have a single active consumer (like failure or exclusive subscriptions).

    Attempting to enable it on subscriptions to non-persistent topics or on shared subscriptions leads to a subscription call throwing a `PulsarClientException`. | `false` | + + +### Reader example + +This example creates a Node.js reader with the `my-topic` topic, reads messages, and prints the content that arrive for 10 times: + +```JavaScript +const Pulsar = require('pulsar-client'); + +(async () => { + // Create a client + const client = new Pulsar.Client({ + serviceUrl: 'pulsar://localhost:6650', + operationTimeoutSeconds: 30, + }); + + // Create a reader + const reader = await client.createReader({ + topic: 'my-topic', + startMessageId: Pulsar.MessageId.earliest(), + }); + + // read messages + for (let i = 0; i < 10; i += 1) { + const msg = await reader.readNext(); + console.log(msg.getData().toString()); + } + + await reader.close(); + await client.close(); +})(); +``` + +## Messages + +In Pulsar Node.js client, you have to construct producer message object for producer. + +Here is an example message: + +```JavaScript +const msg = { + data: Buffer.from('Hello, Pulsar'), + partitionKey: 'key1', + properties: { + 'foo': 'bar', + }, + eventTimestamp: Date.now(), + replicationClusters: [ + 'cluster1', + 'cluster2', + ], +} + +await producer.send(msg); +``` + +The following keys are available for producer message objects: + +| Parameter | Description | +| :-------- | :---------- | +| `data` | The actual data payload of the message. | +| `properties` | A Object for any application-specific metadata attached to the message. | +| `eventTimestamp` | The timestamp associated with the message. | +| `sequenceId` | The sequence ID of the message. | +| `partitionKey` | The optional key associated with the message (particularly useful for things like topic compaction). | +| `replicationClusters` | The clusters to which this message will be replicated. Pulsar brokers handle message replication automatically; you should only change this setting if you want to override the broker default. | + +### Message object operations + +In Pulsar Node.js client, you can receive (or read) message object as consumer (or reader). + +The message object have the following methods available: + +| Method | Description | Return type | +| :----- | :---------- | :---------- | +| `getTopicName()` | Getter method of topic name. | `String` | +| `getProperties()` | Getter method of properties. | `Array` | +| `getData()` | Getter method of message data. | `Buffer` | +| `getMessageId()` | Getter method of [message id object](#message-id-object-operations). | `Object` | +| `getPublishTimestamp()` | Getter method of publish timestamp. | `Number` | +| `getEventTimestamp()` | Getter method of event timestamp. | `Number` | +| `getRedeliveryCount()` | Getter method of redelivery count. | `Number` | +| `getPartitionKey()` | Getter method of partition key. | `String` | + +### Message ID object operations + +In Pulsar Node.js client, you can get message id object from message object. + +The message id object have the following methods available: + +| Method | Description | Return type | +| :----- | :---------- | :---------- | +| `serialize()` | Serialize the message id into a Buffer for storing. | `Buffer` | +| `toString()` | Get message id as String. | `String` | + +The client has static method of message id object. You can access it as `Pulsar.MessageId.someStaticMethod` too. + +The following static methods are available for the message id object: + +| Method | Description | Return type | +| :----- | :---------- | :---------- | +| `earliest()` | MessageId representing the earliest, or oldest available message stored in the topic. | `Object` | +| `latest()` | MessageId representing the latest, or last published message in the topic. | `Object` | +| `deserialize(Buffer)` | Deserialize a message id object from a Buffer. | `Object` | + diff --git a/site2/website/versioned_docs/version-2.7.0/client-libraries-python.md b/site2/website/versioned_docs/version-2.7.0/client-libraries-python.md new file mode 100644 index 00000000000000..d79a44aaf22107 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/client-libraries-python.md @@ -0,0 +1,291 @@ +--- +id: version-2.7.0-client-libraries-python +title: Pulsar Python client +sidebar_label: Python +original_id: client-libraries-python +--- + +Pulsar Python client library is a wrapper over the existing [C++ client library](client-libraries-cpp.md) and exposes all of the [same features](/api/cpp). You can find the code in the [`python` subdirectory](https://github.com/apache/pulsar/tree/master/pulsar-client-cpp/python) of the C++ client code. + +All the methods in producer, consumer, and reader of a Python client are thread-safe. + +[pdoc](https://github.com/BurntSushi/pdoc)-generated API docs for the Python client are available [here](/api/python). + +## Install + +You can install the [`pulsar-client`](https://pypi.python.org/pypi/pulsar-client) library either via [PyPi](https://pypi.python.org/pypi), using [pip](#installation-using-pip), or by building the library from source. + +### Install using pip + +To install the `pulsar-client` library as a pre-built package using the [pip](https://pip.pypa.io/en/stable/) package manager: + +```shell +$ pip install pulsar-client=={{pulsar:version_number}} +``` + +Installation via PyPi is available for the following Python versions: + +Platform | Supported Python versions +:--------|:------------------------- +MacOS
    10.13 (High Sierra), 10.14 (Mojave)
    | 2.7, 3.7 +Linux | 2.7, 3.4, 3.5, 3.6, 3.7 + +### Install from source + +To install the `pulsar-client` library by building from source, follow [instructions](client-libraries-cpp.md#compilation) and compile the Pulsar C++ client library. That builds the Python binding for the library. + +To install the built Python bindings: + +```shell +$ git clone https://github.com/apache/pulsar +$ cd pulsar/pulsar-client-cpp/python +$ sudo python setup.py install +``` + +## API Reference + +The complete Python API reference is available at [api/python](/api/python). + +## Examples + +You can find a variety of Python code examples for the `pulsar-client` library. + +### Producer example + +The following example creates a Python producer for the `my-topic` topic and sends 10 messages on that topic: + +```python +import pulsar + +client = pulsar.Client('pulsar://localhost:6650') + +producer = client.create_producer('my-topic') + +for i in range(10): + producer.send(('Hello-%d' % i).encode('utf-8')) + +client.close() +``` + +### Consumer example + +The following example creates a consumer with the `my-subscription` subscription name on the `my-topic` topic, receives incoming messages, prints the content and ID of messages that arrive, and acknowledges each message to the Pulsar broker. + +```python +consumer = client.subscribe('my-topic', 'my-subscription') + +while True: + msg = consumer.receive() + try: + print("Received message '{}' id='{}'".format(msg.data(), msg.message_id())) + # Acknowledge successful processing of the message + consumer.acknowledge(msg) + except: + # Message failed to be processed + consumer.negative_acknowledge(msg) + +client.close() +``` + +This example shows how to configure negative acknowledgement. + +```python +from pulsar import Client, schema +client = Client('pulsar://localhost:6650') +consumer = client.subscribe('negative_acks','test',schema=schema.StringSchema()) +producer = client.create_producer('negative_acks',schema=schema.StringSchema()) +for i in range(10): + print('send msg "hello-%d"' % i) + producer.send_async('hello-%d' % i, callback=None) +producer.flush() +for i in range(10): + msg = consumer.receive() + consumer.negative_acknowledge(msg) + print('receive and nack msg "%s"' % msg.data()) +for i in range(10): + msg = consumer.receive() + consumer.acknowledge(msg) + print('receive and ack msg "%s"' % msg.data()) +try: + # No more messages expected + msg = consumer.receive(100) +except: + print("no more msg") + pass +``` + +### Reader interface example + +You can use the Pulsar Python API to use the Pulsar [reader interface](concepts-clients.md#reader-interface). Here's an example: + +```python +# MessageId taken from a previously fetched message +msg_id = msg.message_id() + +reader = client.create_reader('my-topic', msg_id) + +while True: + msg = reader.read_next() + print("Received message '{}' id='{}'".format(msg.data(), msg.message_id())) + # No acknowledgment +``` +### Multi-topic subscriptions + +In addition to subscribing a consumer to a single Pulsar topic, you can also subscribe to multiple topics simultaneously. To use multi-topic subscriptions, you can supply a regular expression (regex) or a `List` of topics. If you select topics via regex, all topics must be within the same Pulsar namespace. + +The following is an example. + +```python +import re +consumer = client.subscribe(re.compile('persistent://public/default/topic-*'), 'my-subscription') +while True: + msg = consumer.receive() + try: + print("Received message '{}' id='{}'".format(msg.data(), msg.message_id())) + # Acknowledge successful processing of the message + consumer.acknowledge(msg) + except: + # Message failed to be processed + consumer.negative_acknowledge(msg) +client.close() +``` + +## Schema + +### Declare and validate schema + +You can declare a schema by passing a class that inherits +from `pulsar.schema.Record` and defines the fields as +class variables. For example: + +```python +from pulsar.schema import * + +class Example(Record): + a = String() + b = Integer() + c = Boolean() +``` + +With this simple schema definition, you can create producers, consumers and readers instances that refer to that. + +```python +producer = client.create_producer( + topic='my-topic', + schema=AvroSchema(Example) ) + +producer.send(Example(a='Hello', b=1)) +``` + +After creating the producer, the Pulsar broker validates that the existing topic schema is indeed of "Avro" type and that the format is compatible with the schema definition of the `Example` class. + +If there is a mismatch, an exception occurs in the producer creation. + +Once a producer is created with a certain schema definition, +it will only accept objects that are instances of the declared +schema class. + +Similarly, for a consumer/reader, the consumer will return an +object, instance of the schema record class, rather than the raw +bytes: + +```python +consumer = client.subscribe( + topic='my-topic', + subscription_name='my-subscription', + schema=AvroSchema(Example) ) + +while True: + msg = consumer.receive() + ex = msg.value() + try: + print("Received message a={} b={} c={}".format(ex.a, ex.b, ex.c)) + # Acknowledge successful processing of the message + consumer.acknowledge(msg) + except: + # Message failed to be processed + consumer.negative_acknowledge(msg) +``` + +### Supported schema types + +You can use different builtin schema types in Pulsar. All the definitions are in the `pulsar.schema` package. + +| Schema | Notes | +| ------ | ----- | +| `BytesSchema` | Get the raw payload as a `bytes` object. No serialization/deserialization are performed. This is the default schema mode | +| `StringSchema` | Encode/decode payload as a UTF-8 string. Uses `str` objects | +| `JsonSchema` | Require record definition. Serializes the record into standard JSON payload | +| `AvroSchema` | Require record definition. Serializes in AVRO format | + +### Schema definition reference + +The schema definition is done through a class that inherits from `pulsar.schema.Record`. + +This class has a number of fields which can be of either +`pulsar.schema.Field` type or another nested `Record`. All the +fields are specified in the `pulsar.schema` package. The fields +are matching the AVRO fields types. + +| Field Type | Python Type | Notes | +| ---------- | ----------- | ----- | +| `Boolean` | `bool` | | +| `Integer` | `int` | | +| `Long` | `int` | | +| `Float` | `float` | | +| `Double` | `float` | | +| `Bytes` | `bytes` | | +| `String` | `str` | | +| `Array` | `list` | Need to specify record type for items. | +| `Map` | `dict` | Key is always `String`. Need to specify value type. | + +Additionally, any Python `Enum` type can be used as a valid field type. + +#### Fields parameters + +When adding a field, you can use these parameters in the constructor. + +| Argument | Default | Notes | +| ---------- | --------| ----- | +| `default` | `None` | Set a default value for the field. Eg: `a = Integer(default=5)` | +| `required` | `False` | Mark the field as "required". It is set in the schema accordingly. | + +#### Schema definition examples + +##### Simple definition + +```python +class Example(Record): + a = String() + b = Integer() + c = Array(String()) + i = Map(String()) +``` + +##### Using enums + +```python +from enum import Enum + +class Color(Enum): + red = 1 + green = 2 + blue = 3 + +class Example(Record): + name = String() + color = Color +``` + +##### Complex types + +```python +class MySubRecord(Record): + x = Integer() + y = Long() + z = String() + +class Example(Record): + a = String() + sub = MySubRecord() +``` diff --git a/site2/website/versioned_docs/version-2.7.0/client-libraries-websocket.md b/site2/website/versioned_docs/version-2.7.0/client-libraries-websocket.md new file mode 100644 index 00000000000000..3164bbd035773d --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/client-libraries-websocket.md @@ -0,0 +1,448 @@ +--- +id: version-2.7.0-client-libraries-websocket +title: Pulsar's WebSocket API +sidebar_label: WebSocket +original_id: client-libraries-websocket +--- + +Pulsar's [WebSocket](https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API) API is meant to provide a simple way to interact with Pulsar using languages that do not have an official [client library](getting-started-clients.md). Through WebSockets you can publish and consume messages and use all the features available in the [Java](client-libraries-java.md), [Go](client-libraries-go.md), [Python](client-libraries-python.md) and [C++](client-libraries-cpp.md) client libraries. + + +> You can use Pulsar's WebSocket API with any WebSocket client library. See examples for Python and Node.js [below](#client-examples). + +## Running the WebSocket service + +The standalone variant of Pulsar that we recommend using for [local development](getting-started-standalone.md) already has the WebSocket service enabled. + +In non-standalone mode, there are two ways to deploy the WebSocket service: + +* [embedded](#embedded-with-a-pulsar-broker) with a Pulsar broker +* as a [separate component](#as-a-separate-component) + +### Embedded with a Pulsar broker + +In this mode, the WebSocket service will run within the same HTTP service that's already running in the broker. To enable this mode, set the [`webSocketServiceEnabled`](reference-configuration.md#broker-webSocketServiceEnabled) parameter in the [`conf/broker.conf`](reference-configuration.md#broker) configuration file in your installation. + +```properties +webSocketServiceEnabled=true +``` + +### As a separate component + +In this mode, the WebSocket service will be run from a Pulsar [broker](reference-terminology.md#broker) as a separate service. Configuration for this mode is handled in the [`conf/websocket.conf`](reference-configuration.md#websocket) configuration file. You'll need to set *at least* the following parameters: + +* [`configurationStoreServers`](reference-configuration.md#websocket-configurationStoreServers) +* [`webServicePort`](reference-configuration.md#websocket-webServicePort) +* [`clusterName`](reference-configuration.md#websocket-clusterName) + +Here's an example: + +```properties +configurationStoreServers=zk1:2181,zk2:2181,zk3:2181 +webServicePort=8080 +clusterName=my-cluster +``` + +### Starting the broker + +When the configuration is set, you can start the service using the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) tool: + +```shell +$ bin/pulsar-daemon start websocket +``` + +## API Reference + +Pulsar's WebSocket API offers three endpoints for [producing](#producer-endpoint) messages, [consuming](#consumer-endpoint) messages and [reading](#reader-endpoint) messages. + +All exchanges via the WebSocket API use JSON. + +### Producer endpoint + +The producer endpoint requires you to specify a tenant, namespace, and topic in the URL: + +```http +ws://broker-service-url:8080/ws/v2/producer/persistent/:tenant/:namespace/:topic +``` + +##### Query param + +Key | Type | Required? | Explanation +:---|:-----|:----------|:----------- +`sendTimeoutMillis` | long | no | Send timeout (default: 30 secs) +`batchingEnabled` | boolean | no | Enable batching of messages (default: false) +`batchingMaxMessages` | int | no | Maximum number of messages permitted in a batch (default: 1000) +`maxPendingMessages` | int | no | Set the max size of the internal-queue holding the messages (default: 1000) +`batchingMaxPublishDelay` | long | no | Time period within which the messages will be batched (default: 10ms) +`messageRoutingMode` | string | no | Message [routing mode](https://pulsar.apache.org/api/client/index.html?org/apache/pulsar/client/api/ProducerConfiguration.MessageRoutingMode.html) for the partitioned producer: `SinglePartition`, `RoundRobinPartition` +`compressionType` | string | no | Compression [type](https://pulsar.apache.org/api/client/index.html?org/apache/pulsar/client/api/CompressionType.html): `LZ4`, `ZLIB` +`producerName` | string | no | Specify the name for the producer. Pulsar will enforce only one producer with same name can be publishing on a topic +`initialSequenceId` | long | no | Set the baseline for the sequence ids for messages published by the producer. +`hashingScheme` | string | no | [Hashing function](http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/ProducerConfiguration.HashingScheme.html) to use when publishing on a partitioned topic: `JavaStringHash`, `Murmur3_32Hash` + + +#### Publishing a message + +```json +{ + "payload": "SGVsbG8gV29ybGQ=", + "properties": {"key1": "value1", "key2": "value2"}, + "context": "1" +} +``` + +Key | Type | Required? | Explanation +:---|:-----|:----------|:----------- +`payload` | string | yes | Base-64 encoded payload +`properties` | key-value pairs | no | Application-defined properties +`context` | string | no | Application-defined request identifier +`key` | string | no | For partitioned topics, decides which partition to use +`replicationClusters` | array | no | Restrict replication to this list of [clusters](reference-terminology.md#cluster), specified by name + + +##### Example success response + +```json +{ + "result": "ok", + "messageId": "CAAQAw==", + "context": "1" + } +``` +##### Example failure response + +```json + { + "result": "send-error:3", + "errorMsg": "Failed to de-serialize from JSON", + "context": "1" + } +``` + +Key | Type | Required? | Explanation +:---|:-----|:----------|:----------- +`result` | string | yes | `ok` if successful or an error message if unsuccessful +`messageId` | string | yes | Message ID assigned to the published message +`context` | string | no | Application-defined request identifier + + +### Consumer endpoint + +The consumer endpoint requires you to specify a tenant, namespace, and topic, as well as a subscription, in the URL: + +```http +ws://broker-service-url:8080/ws/v2/consumer/persistent/:tenant/:namespace/:topic/:subscription +``` + +##### Query param + +Key | Type | Required? | Explanation +:---|:-----|:----------|:----------- +`ackTimeoutMillis` | long | no | Set the timeout for unacked messages (default: 0) +`subscriptionType` | string | no | [Subscription type](https://pulsar.apache.org/api/client/index.html?org/apache/pulsar/client/api/SubscriptionType.html): `Exclusive`, `Failover`, `Shared` +`receiverQueueSize` | int | no | Size of the consumer receive queue (default: 1000) +`consumerName` | string | no | Consumer name +`priorityLevel` | int | no | Define a [priority](http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/ConsumerConfiguration.html#setPriorityLevel-int-) for the consumer +`maxRedeliverCount` | int | no | Define a [maxRedeliverCount](http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/ConsumerBuilder.html#deadLetterPolicy-org.apache.pulsar.client.api.DeadLetterPolicy-) for the consumer (default: 0). Activates [Dead Letter Topic](https://github.com/apache/pulsar/wiki/PIP-22%3A-Pulsar-Dead-Letter-Topic) feature. +`deadLetterTopic` | string | no | Define a [deadLetterTopic](http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/ConsumerBuilder.html#deadLetterPolicy-org.apache.pulsar.client.api.DeadLetterPolicy-) for the consumer (default: {topic}-{subscription}-DLQ). Activates [Dead Letter Topic](https://github.com/apache/pulsar/wiki/PIP-22%3A-Pulsar-Dead-Letter-Topic) feature. +`pullMode` | boolean | no | Enable pull mode (default: false). See "Flow Control" below. + +NB: these parameter (except `pullMode`) apply to the internal consumer of the WebSocket service. +So messages will be subject to the redelivery settings as soon as the get into the receive queue, +even if the client doesn't consume on the WebSocket. + +##### Receiving messages + +Server will push messages on the WebSocket session: + +```json +{ + "messageId": "CAAQAw==", + "payload": "SGVsbG8gV29ybGQ=", + "properties": {"key1": "value1", "key2": "value2"}, + "publishTime": "2016-08-30 16:45:57.785", + "redeliveryCount": 4 +} +``` + +Key | Type | Required? | Explanation +:---|:-----|:----------|:----------- +`messageId` | string | yes | Message ID +`payload` | string | yes | Base-64 encoded payload +`publishTime` | string | yes | Publish timestamp +`redeliveryCount` | number | yes | Number of times this message was already delivered +`properties` | key-value pairs | no | Application-defined properties +`key` | string | no | Original routing key set by producer + +#### Acknowledging the message + +Consumer needs to acknowledge the successful processing of the message to +have the Pulsar broker delete it. + +```json +{ + "messageId": "CAAQAw==" +} +``` + +Key | Type | Required? | Explanation +:---|:-----|:----------|:----------- +`messageId`| string | yes | Message ID of the processed message + +#### Flow control + +##### Push Mode + +By default (`pullMode=false`), the consumer endpoint will use the `receiverQueueSize` parameter both to size its +internal receive queue and to limit the number of unacknowledged messages that are passed to the WebSocket client. +In this mode, if you don't send acknowledgements, the Pulsar WebSocket service will stop sending messages after reaching +`receiverQueueSize` unacked messages sent to the WebSocket client. + +##### Pull Mode + +If you set `pullMode` to `true`, the WebSocket client will need to send `permit` commands to permit the +Pulsar WebSocket service to send more messages. + +```json +{ + "type": "permit", + "permitMessages": 100 +} +``` + +Key | Type | Required? | Explanation +:---|:-----|:----------|:----------- +`type`| string | yes | Type of command. Must be `permit` +`permitMessages`| int | yes | Number of messages to permit + +NB: in this mode it's possible to acknowledge messages in a different connection. + +### Reader endpoint + +The reader endpoint requires you to specify a tenant, namespace, and topic in the URL: + +```http +ws://broker-service-url:8080/ws/v2/reader/persistent/:tenant/:namespace/:topic +``` + +##### Query param + +Key | Type | Required? | Explanation +:---|:-----|:----------|:----------- +`readerName` | string | no | Reader name +`receiverQueueSize` | int | no | Size of the consumer receive queue (default: 1000) +`messageId` | int or enum | no | Message ID to start from, `earliest` or `latest` (default: `latest`) + +##### Receiving messages + +Server will push messages on the WebSocket session: + +```json +{ + "messageId": "CAAQAw==", + "payload": "SGVsbG8gV29ybGQ=", + "properties": {"key1": "value1", "key2": "value2"}, + "publishTime": "2016-08-30 16:45:57.785", + "redeliveryCount": 4 +} +``` + +Key | Type | Required? | Explanation +:---|:-----|:----------|:----------- +`messageId` | string | yes | Message ID +`payload` | string | yes | Base-64 encoded payload +`publishTime` | string | yes | Publish timestamp +`redeliveryCount` | number | yes | Number of times this message was already delivered +`properties` | key-value pairs | no | Application-defined properties +`key` | string | no | Original routing key set by producer + +#### Acknowledging the message + +**In WebSocket**, Reader needs to acknowledge the successful processing of the message to +have the Pulsar WebSocket service update the number of pending messages. +If you don't send acknowledgements, Pulsar WebSocket service will stop sending messages after reaching the pendingMessages limit. + +```json +{ + "messageId": "CAAQAw==" +} +``` + +Key | Type | Required? | Explanation +:---|:-----|:----------|:----------- +`messageId`| string | yes | Message ID of the processed message + + +### Error codes + +In case of error the server will close the WebSocket session using the +following error codes: + +Error Code | Error Message +:----------|:------------- +1 | Failed to create producer +2 | Failed to subscribe +3 | Failed to deserialize from JSON +4 | Failed to serialize to JSON +5 | Failed to authenticate client +6 | Client is not authorized +7 | Invalid payload encoding +8 | Unknown error + +> The application is responsible for re-establishing a new WebSocket session after a backoff period. + +## Client examples + +Below you'll find code examples for the Pulsar WebSocket API in [Python](#python) and [Node.js](#nodejs). + +### Python + +This example uses the [`websocket-client`](https://pypi.python.org/pypi/websocket-client) package. You can install it using [pip](https://pypi.python.org/pypi/pip): + +```shell +$ pip install websocket-client +``` + +You can also download it from [PyPI](https://pypi.python.org/pypi/websocket-client). + +#### Python producer + +Here's an example Python producer that sends a simple message to a Pulsar [topic](reference-terminology.md#topic): + +```python +import websocket, base64, json + +TOPIC = 'ws://localhost:8080/ws/v2/producer/persistent/public/default/my-topic' + +ws = websocket.create_connection(TOPIC) + +# Send one message as JSON +ws.send(json.dumps({ + 'payload' : base64.b64encode('Hello World'), + 'properties': { + 'key1' : 'value1', + 'key2' : 'value2' + }, + 'context' : 5 +})) + +response = json.loads(ws.recv()) +if response['result'] == 'ok': + print 'Message published successfully' +else: + print 'Failed to publish message:', response +ws.close() +``` + +#### Python consumer + +Here's an example Python consumer that listens on a Pulsar topic and prints the message ID whenever a message arrives: + +```python +import websocket, base64, json + +TOPIC = 'ws://localhost:8080/ws/v2/consumer/persistent/public/default/my-topic/my-sub' + +ws = websocket.create_connection(TOPIC) + +while True: + msg = json.loads(ws.recv()) + if not msg: break + + print "Received: {} - payload: {}".format(msg, base64.b64decode(msg['payload'])) + + # Acknowledge successful processing + ws.send(json.dumps({'messageId' : msg['messageId']})) + +ws.close() +``` + +#### Python reader + +Here's an example Python reader that listens on a Pulsar topic and prints the message ID whenever a message arrives: + +```python +import websocket, base64, json + +TOPIC = 'ws://localhost:8080/ws/v2/reader/persistent/public/default/my-topic' + +ws = websocket.create_connection(TOPIC) + +while True: + msg = json.loads(ws.recv()) + if not msg: break + + print "Received: {} - payload: {}".format(msg, base64.b64decode(msg['payload'])) + + # Acknowledge successful processing + ws.send(json.dumps({'messageId' : msg['messageId']})) + +ws.close() +``` + +### Node.js + +This example uses the [`ws`](https://websockets.github.io/ws/) package. You can install it using [npm](https://www.npmjs.com/): + +```shell +$ npm install ws +``` + +#### Node.js producer + +Here's an example Node.js producer that sends a simple message to a Pulsar topic: + +```javascript +var WebSocket = require('ws'), + topic = "ws://localhost:8080/ws/v2/producer/persistent/public/default/my-topic", + ws = new WebSocket(topic); + +var message = { + "payload" : new Buffer("Hello World").toString('base64'), + "properties": { + "key1" : "value1", + "key2" : "value2" + }, + "context" : "1" +}; + +ws.on('open', function() { + // Send one message + ws.send(JSON.stringify(message)); +}); + +ws.on('message', function(message) { + console.log('received ack: %s', message); +}); +``` + +#### Node.js consumer + +Here's an example Node.js consumer that listens on the same topic used by the producer above: + +```javascript +var WebSocket = require('ws'), + topic = "ws://localhost:8080/ws/v2/consumer/persistent/public/default/my-topic/my-sub", + ws = new WebSocket(topic); + +ws.on('message', function(message) { + var receiveMsg = JSON.parse(message); + console.log('Received: %s - payload: %s', message, new Buffer(receiveMsg.payload, 'base64').toString()); + var ackMsg = {"messageId" : receiveMsg.messageId}; + ws.send(JSON.stringify(ackMsg)); +}); +``` + +#### NodeJS reader +```javascript +var WebSocket = require('ws'), + topic = "ws://localhost:8080/ws/v2/reader/persistent/public/default/my-topic", + ws = new WebSocket(topic); + +ws.on('message', function(message) { + var receiveMsg = JSON.parse(message); + console.log('Received: %s - payload: %s', message, new Buffer(receiveMsg.payload, 'base64').toString()); + var ackMsg = {"messageId" : receiveMsg.messageId}; + ws.send(JSON.stringify(ackMsg)); +}); +``` diff --git a/site2/website/versioned_docs/version-2.7.0/concepts-architecture-overview.md b/site2/website/versioned_docs/version-2.7.0/concepts-architecture-overview.md new file mode 100644 index 00000000000000..8373273cb6e2b7 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/concepts-architecture-overview.md @@ -0,0 +1,156 @@ +--- +id: version-2.7.0-concepts-architecture-overview +title: Architecture Overview +sidebar_label: Architecture +original_id: concepts-architecture-overview +--- + +At the highest level, a Pulsar instance is composed of one or more Pulsar clusters. Clusters within an instance can [replicate](concepts-replication.md) data amongst themselves. + +In a Pulsar cluster: + +* One or more brokers handles and load balances incoming messages from producers, dispatches messages to consumers, communicates with the Pulsar configuration store to handle various coordination tasks, stores messages in BookKeeper instances (aka bookies), relies on a cluster-specific ZooKeeper cluster for certain tasks, and more. +* A BookKeeper cluster consisting of one or more bookies handles [persistent storage](#persistent-storage) of messages. +* A ZooKeeper cluster specific to that cluster handles coordination tasks between Pulsar clusters. + +The diagram below provides an illustration of a Pulsar cluster: + +![Pulsar architecture diagram](assets/pulsar-system-architecture.png) + +At the broader instance level, an instance-wide ZooKeeper cluster called the configuration store handles coordination tasks involving multiple clusters, for example [geo-replication](concepts-replication.md). + +## Brokers + +The Pulsar message broker is a stateless component that's primarily responsible for running two other components: + +* An HTTP server that exposes a {@inject: rest:REST:/} API for both administrative tasks and [topic lookup](concepts-clients.md#client-setup-phase) for producers and consumers +* A dispatcher, which is an asynchronous TCP server over a custom [binary protocol](developing-binary-protocol.md) used for all data transfers + +Messages are typically dispatched out of a [managed ledger](#managed-ledgers) cache for the sake of performance, *unless* the backlog exceeds the cache size. If the backlog grows too large for the cache, the broker will start reading entries from BookKeeper. + +Finally, to support geo-replication on global topics, the broker manages replicators that tail the entries published in the local region and republish them to the remote region using the Pulsar [Java client library](client-libraries-java.md). + +> For a guide to managing Pulsar brokers, see the [brokers](admin-api-brokers.md) guide. + +## Clusters + +A Pulsar instance consists of one or more Pulsar *clusters*. Clusters, in turn, consist of: + +* One or more Pulsar [brokers](#brokers) +* A ZooKeeper quorum used for cluster-level configuration and coordination +* An ensemble of bookies used for [persistent storage](#persistent-storage) of messages + +Clusters can replicate amongst themselves using [geo-replication](concepts-replication.md). + +> For a guide to managing Pulsar clusters, see the [clusters](admin-api-clusters.md) guide. + +## Metadata store + +Pulsar uses [Apache Zookeeper](https://zookeeper.apache.org/) for metadata storage, cluster configuration, and coordination. In a Pulsar instance: + +* A configuration store quorum stores configuration for tenants, namespaces, and other entities that need to be globally consistent. +* Each cluster has its own local ZooKeeper ensemble that stores cluster-specific configuration and coordination such as which brokers are responsible for which topics as well as ownership metadata, broker load reports, BookKeeper ledger metadata, and more. + +## Persistent storage + +Pulsar provides guaranteed message delivery for applications. If a message successfully reaches a Pulsar broker, it will be delivered to its intended target. + +This guarantee requires that non-acknowledged messages are stored in a durable manner until they can be delivered to and acknowledged by consumers. This mode of messaging is commonly called *persistent messaging*. In Pulsar, N copies of all messages are stored and synced on disk, for example 4 copies across two servers with mirrored [RAID](https://en.wikipedia.org/wiki/RAID) volumes on each server. + +### Apache BookKeeper + +Pulsar uses a system called [Apache BookKeeper](http://bookkeeper.apache.org/) for persistent message storage. BookKeeper is a distributed [write-ahead log](https://en.wikipedia.org/wiki/Write-ahead_logging) (WAL) system that provides a number of crucial advantages for Pulsar: + +* It enables Pulsar to utilize many independent logs, called [ledgers](#ledgers). Multiple ledgers can be created for topics over time. +* It offers very efficient storage for sequential data that handles entry replication. +* It guarantees read consistency of ledgers in the presence of various system failures. +* It offers even distribution of I/O across bookies. +* It's horizontally scalable in both capacity and throughput. Capacity can be immediately increased by adding more bookies to a cluster. +* Bookies are designed to handle thousands of ledgers with concurrent reads and writes. By using multiple disk devices---one for journal and another for general storage--bookies are able to isolate the effects of read operations from the latency of ongoing write operations. + +In addition to message data, *cursors* are also persistently stored in BookKeeper. Cursors are [subscription](reference-terminology.md#subscription) positions for [consumers](reference-terminology.md#consumer). BookKeeper enables Pulsar to store consumer position in a scalable fashion. + +At the moment, Pulsar supports persistent message storage. This accounts for the `persistent` in all topic names. Here's an example: + +```http +persistent://my-tenant/my-namespace/my-topic +``` + +> Pulsar also supports ephemeral ([non-persistent](concepts-messaging.md#non-persistent-topics)) message storage. + + +You can see an illustration of how brokers and bookies interact in the diagram below: + +![Brokers and bookies](assets/broker-bookie.png) + + +### Ledgers + +A ledger is an append-only data structure with a single writer that is assigned to multiple BookKeeper storage nodes, or bookies. Ledger entries are replicated to multiple bookies. Ledgers themselves have very simple semantics: + +* A Pulsar broker can create a ledger, append entries to the ledger, and close the ledger. +* After the ledger has been closed---either explicitly or because the writer process crashed---it can then be opened only in read-only mode. +* Finally, when entries in the ledger are no longer needed, the whole ledger can be deleted from the system (across all bookies). + +#### Ledger read consistency + +The main strength of Bookkeeper is that it guarantees read consistency in ledgers in the presence of failures. Since the ledger can only be written to by a single process, that process is free to append entries very efficiently, without need to obtain consensus. After a failure, the ledger will go through a recovery process that will finalize the state of the ledger and establish which entry was last committed to the log. After that point, all readers of the ledger are guaranteed to see the exact same content. + +#### Managed ledgers + +Given that Bookkeeper ledgers provide a single log abstraction, a library was developed on top of the ledger called the *managed ledger* that represents the storage layer for a single topic. A managed ledger represents the abstraction of a stream of messages with a single writer that keeps appending at the end of the stream and multiple cursors that are consuming the stream, each with its own associated position. + +Internally, a single managed ledger uses multiple BookKeeper ledgers to store the data. There are two reasons to have multiple ledgers: + +1. After a failure, a ledger is no longer writable and a new one needs to be created. +2. A ledger can be deleted when all cursors have consumed the messages it contains. This allows for periodic rollover of ledgers. + +### Journal storage + +In BookKeeper, *journal* files contain BookKeeper transaction logs. Before making an update to a [ledger](#ledgers), a bookie needs to ensure that a transaction describing the update is written to persistent (non-volatile) storage. A new journal file is created once the bookie starts or the older journal file reaches the journal file size threshold (configured using the [`journalMaxSizeMB`](reference-configuration.md#bookkeeper-journalMaxSizeMB) parameter). + +## Pulsar proxy + +One way for Pulsar clients to interact with a Pulsar [cluster](#clusters) is by connecting to Pulsar message [brokers](#brokers) directly. In some cases, however, this kind of direct connection is either infeasible or undesirable because the client doesn't have direct access to broker addresses. If you're running Pulsar in a cloud environment or on [Kubernetes](https://kubernetes.io) or an analogous platform, for example, then direct client connections to brokers are likely not possible. + +The **Pulsar proxy** provides a solution to this problem by acting as a single gateway for all of the brokers in a cluster. If you run the Pulsar proxy (which, again, is optional), all client connections with the Pulsar cluster will flow through the proxy rather than communicating with brokers. + +> For the sake of performance and fault tolerance, you can run as many instances of the Pulsar proxy as you'd like. + +Architecturally, the Pulsar proxy gets all the information it requires from ZooKeeper. When starting the proxy on a machine, you only need to provide ZooKeeper connection strings for the cluster-specific and instance-wide configuration store clusters. Here's an example: + +```bash +$ bin/pulsar proxy \ + --zookeeper-servers zk-0,zk-1,zk-2 \ + --configuration-store-servers zk-0,zk-1,zk-2 +``` + +> #### Pulsar proxy docs +> For documentation on using the Pulsar proxy, see the [Pulsar proxy admin documentation](administration-proxy.md). + + +Some important things to know about the Pulsar proxy: + +* Connecting clients don't need to provide *any* specific configuration to use the Pulsar proxy. You won't need to update the client configuration for existing applications beyond updating the IP used for the service URL (for example if you're running a load balancer over the Pulsar proxy). +* [TLS encryption](security-tls-transport.md) and [authentication](security-tls-authentication.md) is supported by the Pulsar proxy + +## Service discovery + +[Clients](getting-started-clients.md) connecting to Pulsar brokers need to be able to communicate with an entire Pulsar instance using a single URL. Pulsar provides a built-in service discovery mechanism that you can set up using the instructions in the [Deploying a Pulsar instance](deploy-bare-metal.md#service-discovery-setup) guide. + +You can use your own service discovery system if you'd like. If you use your own system, there is just one requirement: when a client performs an HTTP request to an endpoint, such as `http://pulsar.us-west.example.com:8080`, the client needs to be redirected to *some* active broker in the desired cluster, whether via DNS, an HTTP or IP redirect, or some other means. + +The diagram below illustrates Pulsar service discovery: + +![alt-text](assets/pulsar-service-discovery.png) + +In this diagram, the Pulsar cluster is addressable via a single DNS name: `pulsar-cluster.acme.com`. A [Python client](client-libraries-python.md), for example, could access this Pulsar cluster like this: + +```python +from pulsar import Client + +client = Client('pulsar://pulsar-cluster.acme.com:6650') +``` + +> **Note** +> In Pulsar, each topic is handled by only one broker. Initial requests from a client to read, update or delete a topic are sent to a broker that may not be the topic owner. If the broker cannot handle the request for this topic, it redirects the request to the appropriate broker. diff --git a/site2/website/versioned_docs/version-2.7.0/concepts-authentication.md b/site2/website/versioned_docs/version-2.7.0/concepts-authentication.md new file mode 100644 index 00000000000000..49bd30e1765c41 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/concepts-authentication.md @@ -0,0 +1,9 @@ +--- +id: version-2.7.0-concepts-authentication +title: Authentication and Authorization +sidebar_label: Authentication and Authorization +original_id: concepts-authentication +--- + +Pulsar supports a pluggable [authentication](security-overview.md) mechanism which can be configured at the proxy and/or the broker. Pulsar also supports a pluggable [authorization](security-authorization.md) mechanism. These mechanisms work together to identify the client and its access rights on topics, namespaces and tenants. + diff --git a/site2/website/versioned_docs/version-2.7.0/concepts-messaging.md b/site2/website/versioned_docs/version-2.7.0/concepts-messaging.md new file mode 100644 index 00000000000000..21557a4384435e --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/concepts-messaging.md @@ -0,0 +1,518 @@ +--- +id: version-2.7.0-concepts-messaging +title: Messaging +sidebar_label: Messaging +original_id: concepts-messaging +--- + +Pulsar is built on the [publish-subscribe](https://en.wikipedia.org/wiki/Publish%E2%80%93subscribe_pattern) pattern (often abbreviated to pub-sub). In this pattern, [producers](#producers) publish messages to [topics](#topics). [Consumers](#consumers) [subscribe](#subscription-modes) to those topics, process incoming messages, and send an acknowledgement when processing is complete. + +When a subscription is created, Pulsar [retains](concepts-architecture-overview.md#persistent-storage) all messages, even if the consumer is disconnected. Retained messages are discarded only when a consumer acknowledges that those messages are processed successfully. + +## Messages + +Messages are the basic "unit" of Pulsar. The following table lists the components of messages. + +Component | Description +:---------|:------- +Value / data payload | The data carried by the message. All Pulsar messages contain raw bytes, although message data can also conform to data [schemas](schema-get-started.md). +Key | Messages are optionally tagged with keys, which is useful for things like [topic compaction](concepts-topic-compaction.md). +Properties | An optional key/value map of user-defined properties. +Producer name | The name of the producer who produces the message. If you do not specify a producer name, the default name is used. +Sequence ID | Each Pulsar message belongs to an ordered sequence on its topic. The sequence ID of the message is its order in that sequence. +Publish time | The timestamp of when the message is published. The timestamp is automatically applied by the producer. +Event time | An optional timestamp attached to a message by applications. For example, applications attach a timestamp on when the message is processed. If nothing is set to event time, the value is `0`. +TypedMessageBuilder | It is used to construct a message. You can set message properties such as the message key, message value with `TypedMessageBuilder`.
    When you set `TypedMessageBuilder`, set the key as a string. If you set the key as other types, for example, an AVRO object, the key is sent as bytes, and it is difficult to get the AVRO object back on the consumer. + +> For more information on Pulsar message contents, see Pulsar [binary protocol](developing-binary-protocol.md). + +## Producers + +A producer is a process that attaches to a topic and publishes messages to a Pulsar [broker](reference-terminology.md#broker). The Pulsar broker process the messages. + +### Send modes + +Producers send messages to brokers synchronously (sync) or asynchronously (async). + +| Mode | Description | +|:-----------|-----------| +| Sync send | The producer waits for an acknowledgement from the broker after sending every message. If the acknowledgment is not received, the producer treats the sending operation as a failure. | +| Async send | The producer puts a message in a blocking queue and returns immediately. The client library sends the message to the broker in the background. If the queue is full (you can [configure](reference-configuration.md#broker) the maximum size), the producer is blocked or fails immediately when calling the API, depending on arguments passed to the producer. | + +### Compression + +You can compress messages published by producers during transportation. Pulsar currently supports the following types of compression: + +* [LZ4](https://github.com/lz4/lz4) +* [ZLIB](https://zlib.net/) +* [ZSTD](https://facebook.github.io/zstd/) +* [SNAPPY](https://google.github.io/snappy/) + +### Batching + +When batching is enabled, the producer accumulates and sends a batch of messages in a single request. The batch size is defined by the maximum number of messages and the maximum publish latency. Therefore, the backlog size represents the total number of batches instead of the total number of messages. + +In Pulsar, batches are tracked and stored as single units rather than as individual messages. Consumer unbundles a batch into individual messages. However, scheduled messages (configured through the `deliverAt` or the `deliverAfter` parameter) are always sent as individual messages even batching is enabled. + +In general, a batch is acknowledged when all of its messages are acknowledged by a consumer. It means unexpected failures, negative acknowledgements, or acknowledgement timeouts can result in redelivery of all messages in a batch, even if some of the messages are acknowledged. + +To avoid redelivering acknowledged messages in a batch to the consumer, Pulsar introduces batch index acknowledgement since Pulsar 2.6.0. When batch index acknowledgement is enabled, the consumer filters out the batch index that has been acknowledged and sends the batch index acknowledgement request to the broker. The broker maintains the batch index acknowledgement status and tracks the acknowledgement status of each batch index to avoid dispatching acknowledged messages to the consumer. When all indexes of the batch message are acknowledged, the batch message is deleted. + +By default, batch index acknowledgement is disabled (`batchIndexAcknowledgeEnable=false`). You can enable batch index acknowledgement by setting the `batchIndexAcknowledgeEnable` parameter to `true` at the broker side. Enabling batch index acknowledgement results in more memory overheads. + +### Chunking +When you enable chunking, read the following instructions. +- Batching and chunking cannot be enabled simultaneously. To enable chunking, you must disable batching in advance. +- Chunking is only supported for persisted topics. +- Chunking is only supported for the exclusive and failover subscription modes. + +When chunking is enabled (`chunkingEnabled=true`), if the message size is greater than the allowed maximum publish-payload size, the producer splits the original message into chunked messages and publishes them with chunked metadata to the broker separately and in order. At the broker side, the chunked messages are stored in the managed-ledger in the same way as that of ordinary messages. The only difference is that the consumer needs to buffer the chunked messages and combines them into the real message when all chunked messages have been collected. The chunked messages in the managed-ledger can be interwoven with ordinary messages. If producer fails to publish all the chunks of a message, the consumer can expire incomplete chunks if consumer fail to receive all chunks in expire time. By default, the expire time is set to one hour. + +The consumer consumes the chunked messages and buffers them until the consumer receives all the chunks of a message. And then the consumer stitches chunked messages together and places them into the receiver-queue. Clients consume messages from the receiver-queue. Once the consumer consumes the entire large message and acknowledges it, the consumer internally sends acknowledgement of all the chunk messages associated to that large message. You can set the `maxPendingChuckedMessage` parameter on the consumer. When the threshold is reached, the consumer drops the unchunked messages by silently acknowledging them or asking the broker to redeliver them later by marking them unacknowledged. + + The broker does not require any changes to support chunking for non-shared subscription. The broker only uses `chuckedMessageRate` to record chunked message rate on the topic. + +#### Handle chunked messages with one producer and one ordered consumer + +As shown in the following figure, when a topic has one producer which publishes large message payload in chunked messages along with regular non-chunked messages. The producer publishes message M1 in three chunks M1-C1, M1-C2 and M1-C3. The broker stores all the three chunked messages in the managed-ledger and dispatches to the ordered (exclusive/failover) consumer in the same order. The consumer buffers all the chunked messages in memory until it receives all the chunked messages, combines them into one message and then hands over the original message M1 to the client. + +![](assets/chunking-01.png) + +#### Handle chunked messages with multiple producers and one ordered consumer + +When multiple publishers publish chunked messages into a single topic, the broker stores all the chunked messages coming from different publishers in the same managed-ledger. As shown below, Producer 1 publishes message M1 in three chunks M1-C1, M1-C2 and M1-C3. Producer 2 publishes message M2 in three chunks M2-C1, M2-C2 and M2-C3. All chunked messages of the specific message are still in order but might not be consecutive in the managed-ledger. This brings some memory pressure to the consumer because the consumer keeps separate buffer for each large message to aggregate all chunks of the large message and combine them into one message. + +![](assets/chunking-02.png) + +## Consumers + +A consumer is a process that attaches to a topic via a subscription and then receives messages. + +A consumer sends a [flow permit request](developing-binary-protocol.md#flow-control) to a broker to get messages. There is a queue at the consumer side to receive messages pushed from the broker. You can configure the queue size with the [`receiverQueueSize`](client-libraries-java.md#configure-consumer) parameter. The default size is `1000`). Each time `consumer.receive()` is called, a message is dequeued from the buffer. + +### Receive modes + +Messages are received from [brokers](reference-terminology.md#broker) either synchronously (sync) or asynchronously (async). + +| Mode | Description | +|:--------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Sync receive | A sync receive is blocked until a message is available. | +| Async receive | An async receive returns immediately with a future value—for example, a [`CompletableFuture`](http://www.baeldung.com/java-completablefuture) in Java—that completes once a new message is available. | + +### Listeners + +Client libraries provide listener implementation for consumers. For example, the [Java client](client-libraries-java.md) provides a {@inject: javadoc:MesssageListener:/client/org/apache/pulsar/client/api/MessageListener} interface. In this interface, the `received` method is called whenever a new message is received. + +### Acknowledgement + +When a consumer consumes a message successfully, the consumer sends an acknowledgement request to the broker. This message is permanently stored, and then deleted only after all the subscriptions have acknowledged it. If you want to store the message that has been acknowledged by a consumer, you need to configure the [message retention policy](concepts-messaging.md#message-retention-and-expiry). + +For a batch message, if batch index acknowledgement is enabled, the broker maintains the batch index acknowledgement status and tracks the acknowledgement status of each batch index to avoid dispatching acknowledged messages to the consumer. When all indexes of the batch message are acknowledged, the batch message is deleted. For details about the batch index acknowledgement, see [batching](#batching). + +Messages is acknowledged either one by one or cumulatively. With cumulative acknowledgement, the consumer only needs to acknowledge the last message it received. All messages in the stream up to (and including) the provided message will not be re-delivered to that consumer. + +Messages can be acknowledged in the following two ways: + +- Messages are acknowledged individually. With individual acknowledgement, the consumer needs to acknowledge each message and sends an acknowledgement request to the broker. +- Messages are acknowledged cumulatively. With cumulative acknowledgement, the consumer only needs to acknowledge the last message it received. All messages in the stream up to (and including) the provided message are not re-delivered to that consumer. + +> Note +> +> Cumulative acknowledgement cannot be used in the [shared subscription mode](#subscription-modes), because the shared subscription mode involves multiple consumers who have access to the same subscription. In the shared subscription mode, messages are acknowledged individually. + +### Negative acknowledgement + +When a consumer does not consume a message successfully at a time, and wants to consume the message again, the consumer sends a negative acknowledgement to the broker, and then the broker redelivers the message. + +Messages are negatively acknowledged one by one or cumulatively, which depends on the consumption subscription mode. + +In the exclusive and failover subscription modes, consumers only negatively acknowledge the last message they receive. + +In the shared and Key_Shared subscription modes, you can negatively acknowledge messages individually. + +> Note +> If batching is enabled, other messages and the negatively acknowledged messages in the same batch are redelivered to the consumer. + +### Acknowledgement timeout + +If a message is not consumed successfully, and you want to trigger the broker to redeliver the message automatically, you can adopt the unacknowledged message automatic re-delivery mechanism. Client tracks the unacknowledged messages within the entire `acktimeout` time range, and sends a `redeliver unacknowledged messages` request to the broker automatically when the acknowledgement timeout is specified. + +> Note +> If batching is enabled, other messages and the unacknowledged messages in the same batch are redelivered to the consumer. + +> Note +> Prefer negative acknowledgements over acknowledgement timeout. Negative acknowledgement controls the re-delivery of individual messages with more precision, and avoids invalid redeliveries when the message processing time exceeds the acknowledgement timeout. + +### Dead letter topic + +Dead letter topic enables you to consume new messages when some messages cannot be consumed successfully by a consumer. In this mechanism, messages that are failed to be consumed are stored in a separate topic, which is called dead letter topic. You can decide how to handle messages in the dead letter topic. + +The following example shows how to enable dead letter topic in a Java client using the default dead letter topic: + +```java +Consumer consumer = pulsarClient.newConsumer(Schema.BYTES) + .topic(topic) + .subscriptionName("my-subscription") + .subscriptionType(SubscriptionType.Shared) + .deadLetterPolicy(DeadLetterPolicy.builder() + .maxRedeliverCount(maxRedeliveryCount) + .build()) + .subscribe(); + +``` +The default dead letter topic uses this format: +``` +--DLQ +``` + +If you want to specify the name of the dead letter topic, use this Java client example: + +```java +Consumer consumer = pulsarClient.newConsumer(Schema.BYTES) + .topic(topic) + .subscriptionName("my-subscription") + .subscriptionType(SubscriptionType.Shared) + .deadLetterPolicy(DeadLetterPolicy.builder() + .maxRedeliverCount(maxRedeliveryCount) + .deadLetterTopic("your-topic-name") + .build()) + .subscribe(); + +``` + +Dead letter topic depends on message re-delivery. Messages are redelivered either due to [acknowledgement timeout](#acknowledgement-timeout) or [negative acknowledgement](#negative-acknowledgement). If you are going to use negative acknowledgement on a message, make sure it is negatively acknowledged before the acknowledgement timeout. + +> Note +> Currently, dead letter topic is enabled only in the shared subscription mode. + +### Retry letter topic + +For many online business systems, a message is re-consumed due to exception occurs in the business logic processing. To configure the delay time for re-consuming the failed messages, you can configure the producer to send messages to both the business topic and the retry letter topic, and enable automatic retry on the consumer. When automatic retry is enabled on the consumer, a message is stored in the retry letter topic if the messages are not consumed, and therefore the consumer automatically consumes the failed messages from the retry letter topic after a specified delay time. + +By default, automatic retry is disabled. You can set `enableRetry` to `true` to enable automatic retry on the consumer. + +This example shows how to consume messages from a retry letter topic. + +```java +Consumer consumer = pulsarClient.newConsumer(Schema.BYTES) + .topic(topic) + .subscriptionName("my-subscription") + .subscriptionType(SubscriptionType.Shared) + .enableRetry(true) + .receiverQueueSize(100) + .deadLetterPolicy(DeadLetterPolicy.builder() + .maxRedeliverCount(maxRedeliveryCount) + .retryLetterTopic("persistent://my-property/my-ns/my-subscription-custom-Retry") + .build()) + .subscriptionInitialPosition(SubscriptionInitialPosition.Earliest) + .subscribe(); +``` + +## Topics + +As in other pub-sub systems, topics in Pulsar are named channels for transmitting messages from producers to consumers. Topic names are URLs that have a well-defined structure: + +```http +{persistent|non-persistent}://tenant/namespace/topic +``` + +Topic name component | Description +:--------------------|:----------- +`persistent` / `non-persistent` | This identifies the type of topic. Pulsar supports two kind of topics: [persistent](concepts-architecture-overview.md#persistent-storage) and [non-persistent](#non-persistent-topics). The default is persistent, so if you do not specify a type, the topic is persistent. With persistent topics, all messages are durably persisted on disks (if the broker is not standalone, messages are durably persisted on multiple disks), whereas data for non-persistent topics is not persisted to storage disks. +`tenant` | The topic tenant within the instance. Tenants are essential to multi-tenancy in Pulsar, and spread across clusters. +`namespace` | The administrative unit of the topic, which acts as a grouping mechanism for related topics. Most topic configuration is performed at the [namespace](#namespaces) level. Each tenant has one or multiple namespaces. +`topic` | The final part of the name. Topic names have no special meaning in a Pulsar instance. + +> #### No need to explicitly create new topics +> You do not need to explicitly create topics in Pulsar. If a client attempts to write or receive messages to/from a topic that does not yet exist, Pulsar creates that topic under the namespace provided in the [topic name](#topics) automatically. +> If no tenant or namespace is specified when a client creates a topic, the topic is created in the default tenant and namespace. You can also create a topic in a specified tenant and namespace, such as `persistent://my-tenant/my-namespace/my-topic`. `persistent://my-tenant/my-namespace/my-topic` means the `my-topic` topic is created in the `my-namespace` namespace of the `my-tenant` tenant. + +## Namespaces + +A namespace is a logical nomenclature within a tenant. A tenant creates multiple namespaces via the [admin API](admin-api-namespaces.md#create). For instance, a tenant with different applications can create a separate namespace for each application. A namespace allows the application to create and manage a hierarchy of topics. The topic `my-tenant/app1` is a namespace for the application `app1` for `my-tenant`. You can create any number of [topics](#topics) under the namespace. + +## Subscriptions + +A subscription is a named configuration rule that determines how messages are delivered to consumers. Four subscription modes are available in Pulsar: [exclusive](#exclusive), [shared](#shared), [failover](#failover), and [key_shared](#key_shared). These modes are illustrated in the figure below. + +![Subscription modes](assets/pulsar-subscription-modes.png) + +> ### Pub-Sub or Queuing +> In Pulsar, you can use different subscriptions flexibly. +> * If you want to achieve traditional "fan-out pub-sub messaging" among consumers, specify a unique subscription name for each consumer. It is exclusive subscription mode. +> * If you want to achieve "message queuing" among consumers, share the same subscription name among multiple consumers(shared, failover, key_shared). +> * If you want to achieve both effects simultaneously, combine exclusive subscription mode with other subscription modes for consumers. + +### Exclusive + +In *exclusive* mode, only a single consumer is allowed to attach to the subscription. If multiple consumers subscribe to a topic using the same subscription, an error occurs. + +In the diagram below, only **Consumer A-0** is allowed to consume messages. + +> Exclusive mode is the default subscription mode. + +![Exclusive subscriptions](assets/pulsar-exclusive-subscriptions.png) + +### Failover + +In *failover* mode, multiple consumers can attach to the same subscription. A master consumer is picked for non-partitioned topic or each partition of partitioned topic and receives messages. When the master consumer disconnects, all (non-acknowledged and subsequent) messages are delivered to the next consumer in line. + +For partitioned topics, broker will sort consumers by priority level and lexicographical order of consumer name. Then broker will try to evenly assigns topics to consumers with the highest priority level. + +For non-partitioned topic, broker will pick consumer in the order they subscribe to the non partitioned topic. + +In the diagram below, **Consumer-B-0** is the master consumer while **Consumer-B-1** would be the next consumer in line to receive messages if **Consumer-B-0** is disconnected. + +![Failover subscriptions](assets/pulsar-failover-subscriptions.png) + +### Shared + +In *shared* or *round robin* mode, multiple consumers can attach to the same subscription. Messages are delivered in a round robin distribution across consumers, and any given message is delivered to only one consumer. When a consumer disconnects, all the messages that were sent to it and not acknowledged will be rescheduled for sending to the remaining consumers. + +In the diagram below, **Consumer-C-1** and **Consumer-C-2** are able to subscribe to the topic, but **Consumer-C-3** and others could as well. + +> #### Limitations of shared mode +> When using shared mode, be aware that: +> * Message ordering is not guaranteed. +> * You cannot use cumulative acknowledgment with shared mode. + +![Shared subscriptions](assets/pulsar-shared-subscriptions.png) + +### Key_Shared + +In *Key_Shared* mode, multiple consumers can attach to the same subscription. Messages are delivered in a distribution across consumers and message with same key or same ordering key are delivered to only one consumer. No matter how many times the message is re-delivered, it is delivered to the same consumer. When a consumer connected or disconnected will cause served consumer change for some key of message. + +> #### Limitations of Key_Shared mode +> When you use Key_Shared mode, be aware that: +> * You need to specify a key or orderingKey for messages +> * You cannot use cumulative acknowledgment with Key_Shared mode. + +![Key_Shared subscriptions](assets/pulsar-key-shared-subscriptions.png) + +**You can disable Key_Shared subscription in the `broker.config` file.** + +## Multi-topic subscriptions + +When a consumer subscribes to a Pulsar topic, by default it subscribes to one specific topic, such as `persistent://public/default/my-topic`. As of Pulsar version 1.23.0-incubating, however, Pulsar consumers can simultaneously subscribe to multiple topics. You can define a list of topics in two ways: + +* On the basis of a [**reg**ular **ex**pression](https://en.wikipedia.org/wiki/Regular_expression) (regex), for example `persistent://public/default/finance-.*` +* By explicitly defining a list of topics + +> When subscribing to multiple topics by regex, all topics must be in the same [namespace](#namespaces). + +When subscribing to multiple topics, the Pulsar client automatically makes a call to the Pulsar API to discover the topics that match the regex pattern/list, and then subscribe to all of them. If any of the topics do not exist, the consumer auto-subscribes to them once the topics are created. + +> #### No ordering guarantees across multiple topics +> When a producer sends messages to a single topic, all messages are guaranteed to be read from that topic in the same order. However, these guarantees do not hold across multiple topics. So when a producer sends message to multiple topics, the order in which messages are read from those topics is not guaranteed to be the same. + +The following are multi-topic subscription examples for Java. + +```java +import java.util.regex.Pattern; + +import org.apache.pulsar.client.api.Consumer; +import org.apache.pulsar.client.api.PulsarClient; + +PulsarClient pulsarClient = // Instantiate Pulsar client object + +// Subscribe to all topics in a namespace +Pattern allTopicsInNamespace = Pattern.compile("persistent://public/default/.*"); +Consumer allTopicsConsumer = pulsarClient.newConsumer() + .topicsPattern(allTopicsInNamespace) + .subscriptionName("subscription-1") + .subscribe(); + +// Subscribe to a subsets of topics in a namespace, based on regex +Pattern someTopicsInNamespace = Pattern.compile("persistent://public/default/foo.*"); +Consumer someTopicsConsumer = pulsarClient.newConsumer() + .topicsPattern(someTopicsInNamespace) + .subscriptionName("subscription-1") + .subscribe(); +``` + +For code examples, see [Java](client-libraries-java.md#multi-topic-subscriptions). + +## Partitioned topics + +Normal topics are served only by a single broker, which limits the maximum throughput of the topic. *Partitioned topics* are a special type of topic that are handled by multiple brokers, thus allowing for higher throughput. + +A partitioned topic is actually implemented as N internal topics, where N is the number of partitions. When publishing messages to a partitioned topic, each message is routed to one of several brokers. The distribution of partitions across brokers is handled automatically by Pulsar. + +The diagram below illustrates this: + +![](assets/partitioning.png) + +The **Topic1** topic has five partitions (**P0** through **P4**) split across three brokers. Because there are more partitions than brokers, two brokers handle two partitions a piece, while the third handles only one (again, Pulsar handles this distribution of partitions automatically). + +Messages for this topic are broadcast to two consumers. The [routing mode](#routing-modes) determines each message should be published to which partition, while the [subscription mode](#subscription-modes) determines which messages go to which consumers. + +Decisions about routing and subscription modes can be made separately in most cases. In general, throughput concerns should guide partitioning/routing decisions while subscription decisions should be guided by application semantics. + +There is no difference between partitioned topics and normal topics in terms of how subscription modes work, as partitioning only determines what happens between when a message is published by a producer and processed and acknowledged by a consumer. + +Partitioned topics need to be explicitly created via the [admin API](admin-api-overview.md). The number of partitions can be specified when creating the topic. + +### Routing modes + +When publishing to partitioned topics, you must specify a *routing mode*. The routing mode determines which partition---that is, which internal topic---each message should be published to. + +There are three {@inject: javadoc:MessageRoutingMode:/client/org/apache/pulsar/client/api/MessageRoutingMode} available: + +Mode | Description +:--------|:------------ +`RoundRobinPartition` | If no key is provided, the producer will publish messages across all partitions in round-robin fashion to achieve maximum throughput. Please note that round-robin is not done per individual message but rather it's set to the same boundary of batching delay, to ensure batching is effective. While if a key is specified on the message, the partitioned producer will hash the key and assign message to a particular partition. This is the default mode. +`SinglePartition` | If no key is provided, the producer will randomly pick one single partition and publish all the messages into that partition. While if a key is specified on the message, the partitioned producer will hash the key and assign message to a particular partition. +`CustomPartition` | Use custom message router implementation that will be called to determine the partition for a particular message. User can create a custom routing mode by using the [Java client](client-libraries-java.md) and implementing the {@inject: javadoc:MessageRouter:/client/org/apache/pulsar/client/api/MessageRouter} interface. + +### Ordering guarantee + +The ordering of messages is related to MessageRoutingMode and Message Key. Usually, user would want an ordering of Per-key-partition guarantee. + +If there is a key attached to message, the messages will be routed to corresponding partitions based on the hashing scheme specified by {@inject: javadoc:HashingScheme:/client/org/apache/pulsar/client/api/HashingScheme} in {@inject: javadoc:ProducerBuilder:/client/org/apache/pulsar/client/api/ProducerBuilder}, when using either `SinglePartition` or `RoundRobinPartition` mode. + +Ordering guarantee | Description | Routing Mode and Key +:------------------|:------------|:------------ +Per-key-partition | All the messages with the same key will be in order and be placed in same partition. | Use either `SinglePartition` or `RoundRobinPartition` mode, and Key is provided by each message. +Per-producer | All the messages from the same producer will be in order. | Use `SinglePartition` mode, and no Key is provided for each message. + +### Hashing scheme + +{@inject: javadoc:HashingScheme:/client/org/apache/pulsar/client/api/HashingScheme} is an enum that represent sets of standard hashing functions available when choosing the partition to use for a particular message. + +There are 2 types of standard hashing functions available: `JavaStringHash` and `Murmur3_32Hash`. +The default hashing function for producer is `JavaStringHash`. +Please pay attention that `JavaStringHash` is not useful when producers can be from different multiple language clients, under this use case, it is recommended to use `Murmur3_32Hash`. + + + +## Non-persistent topics + + +By default, Pulsar persistently stores *all* unacknowledged messages on multiple [BookKeeper](concepts-architecture-overview.md#persistent-storage) bookies (storage nodes). Data for messages on persistent topics can thus survive broker restarts and subscriber failover. + +Pulsar also, however, supports **non-persistent topics**, which are topics on which messages are *never* persisted to disk and live only in memory. When using non-persistent delivery, killing a Pulsar broker or disconnecting a subscriber to a topic means that all in-transit messages are lost on that (non-persistent) topic, meaning that clients may see message loss. + +Non-persistent topics have names of this form (note the `non-persistent` in the name): + +```http +non-persistent://tenant/namespace/topic +``` + +> For more info on using non-persistent topics, see the [Non-persistent messaging cookbook](cookbooks-non-persistent.md). + +In non-persistent topics, brokers immediately deliver messages to all connected subscribers *without persisting them* in [BookKeeper](concepts-architecture-overview.md#persistent-storage). If a subscriber is disconnected, the broker will not be able to deliver those in-transit messages, and subscribers will never be able to receive those messages again. Eliminating the persistent storage step makes messaging on non-persistent topics slightly faster than on persistent topics in some cases, but with the caveat that some of the core benefits of Pulsar are lost. + +> With non-persistent topics, message data lives only in memory. If a message broker fails or message data can otherwise not be retrieved from memory, your message data may be lost. Use non-persistent topics only if you're *certain* that your use case requires it and can sustain it. + +By default, non-persistent topics are enabled on Pulsar brokers. You can disable them in the broker's [configuration](reference-configuration.md#broker-enableNonPersistentTopics). You can manage non-persistent topics using the [`pulsar-admin topics`](referencereference--pulsar-admin/#topics-1) interface. + +### Performance + +Non-persistent messaging is usually faster than persistent messaging because brokers don't persist messages and immediately send acks back to the producer as soon as that message is delivered to connected brokers. Producers thus see comparatively low publish latency with non-persistent topic. + +### Client API + +Producers and consumers can connect to non-persistent topics in the same way as persistent topics, with the crucial difference that the topic name must start with `non-persistent`. All three subscription modes---[exclusive](#exclusive), [shared](#shared), and [failover](#failover)---are supported for non-persistent topics. + +Here's an example [Java consumer](client-libraries-java.md#consumers) for a non-persistent topic: + +```java +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://localhost:6650") + .build(); +String npTopic = "non-persistent://public/default/my-topic"; +String subscriptionName = "my-subscription-name"; + +Consumer consumer = client.newConsumer() + .topic(npTopic) + .subscriptionName(subscriptionName) + .subscribe(); +``` + +Here's an example [Java producer](client-libraries-java.md#producer) for the same non-persistent topic: + +```java +Producer producer = client.newProducer() + .topic(npTopic) + .create(); +``` + +## Message retention and expiry + +By default, Pulsar message brokers: + +* immediately delete *all* messages that have been acknowledged by a consumer, and +* [persistently store](concepts-architecture-overview.md#persistent-storage) all unacknowledged messages in a message backlog. + +Pulsar has two features, however, that enable you to override this default behavior: + +* Message **retention** enables you to store messages that have been acknowledged by a consumer +* Message **expiry** enables you to set a time to live (TTL) for messages that have not yet been acknowledged + +> All message retention and expiry is managed at the [namespace](#namespaces) level. For a how-to, see the [Message retention and expiry](cookbooks-retention-expiry.md) cookbook. + +The diagram below illustrates both concepts: + +![Message retention and expiry](assets/retention-expiry.png) + +With message retention, shown at the top, a retention policy applied to all topics in a namespace dicates that some messages are durably stored in Pulsar even though they've already been acknowledged. Acknowledged messages that are not covered by the retention policy are deleted. Without a retention policy, *all* of the acknowledged messages would be deleted. + +With message expiry, shown at the bottom, some messages are deleted, even though they haven't been acknowledged, because they've expired according to the TTL applied to the namespace (for example because a TTL of 5 minutes has been applied and the messages haven't been acknowledged but are 10 minutes old). + +## Message deduplication + +Message duplication occurs when a message is [persisted](concepts-architecture-overview.md#persistent-storage) by Pulsar more than once. Message deduplication is an optional Pulsar feature that prevents unnecessary message duplication by processing each message only once, even if the message is received more than once. + +The following diagram illustrates what happens when message deduplication is disabled vs. enabled: + +![Pulsar message deduplication](assets/message-deduplication.png) + + +Message deduplication is disabled in the scenario shown at the top. Here, a producer publishes message 1 on a topic; the message reaches a Pulsar broker and is [persisted](concepts-architecture-overview.md#persistent-storage) to BookKeeper. The producer then sends message 1 again (in this case due to some retry logic), and the message is received by the broker and stored in BookKeeper again, which means that duplication has occurred. + +In the second scenario at the bottom, the producer publishes message 1, which is received by the broker and persisted, as in the first scenario. When the producer attempts to publish the message again, however, the broker knows that it has already seen message 1 and thus does not persist the message. + +> Message deduplication is handled at the namespace level or the topic level. For more instructions, see the [message deduplication cookbook](cookbooks-deduplication.md). + + +### Producer idempotency + +The other available approach to message deduplication is to ensure that each message is *only produced once*. This approach is typically called **producer idempotency**. The drawback of this approach is that it defers the work of message deduplication to the application. In Pulsar, this is handled at the [broker](reference-terminology.md#broker) level, so you do not need to modify your Pulsar client code. Instead, you only need to make administrative changes. For details, see [Managing message deduplication](cookbooks-deduplication.md). + +### Deduplication and effectively-once semantics + +Message deduplication makes Pulsar an ideal messaging system to be used in conjunction with stream processing engines (SPEs) and other systems seeking to provide effectively-once processing semantics. Messaging systems that do not offer automatic message deduplication require the SPE or other system to guarantee deduplication, which means that strict message ordering comes at the cost of burdening the application with the responsibility of deduplication. With Pulsar, strict ordering guarantees come at no application-level cost. + +> You can find more in-depth information in [this post](https://www.splunk.com/en_us/blog/it/exactly-once-is-not-exactly-the-same.html). + +## Delayed message delivery +Delayed message delivery enables you to consume a message later rather than immediately. In this mechanism, a message is stored in BookKeeper, `DelayedDeliveryTracker` maintains the time index(time -> messageId) in memory after published to a broker, and it is delivered to a consumer once the specific delayed time is passed. + +Delayed message delivery only works in Shared subscription mode. In Exclusive and Failover subscription modes, the delayed message is dispatched immediately. + +The diagram below illustrates the concept of delayed message delivery: + +![Delayed Message Delivery](assets/message_delay.png) + +A broker saves a message without any check. When a consumer consumes a message, if the message is set to delay, then the message is added to `DelayedDeliveryTracker`. A subscription checks and gets timeout messages from `DelayedDeliveryTracker`. + +### Broker +Delayed message delivery is enabled by default. You can change it in the broker configuration file as below: + +``` +# Whether to enable the delayed delivery for messages. +# If disabled, messages are immediately delivered and there is no tracking overhead. +delayedDeliveryEnabled=true + +# Control the ticking time for the retry of delayed message delivery, +# affecting the accuracy of the delivery time compared to the scheduled time. +# Default is 1 second. +delayedDeliveryTickTimeMillis=1000 +``` + +### Producer +The following is an example of delayed message delivery for a producer in Java: +```java +// message to be delivered at the configured delay interval +producer.newMessage().deliverAfter(3L, TimeUnit.Minute).value("Hello Pulsar!").send(); +``` diff --git a/site2/website/versioned_docs/version-2.7.0/concepts-multi-tenancy.md b/site2/website/versioned_docs/version-2.7.0/concepts-multi-tenancy.md new file mode 100644 index 00000000000000..8ba5e84533746b --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/concepts-multi-tenancy.md @@ -0,0 +1,55 @@ +--- +id: version-2.7.0-concepts-multi-tenancy +title: Multi Tenancy +sidebar_label: Multi Tenancy +original_id: concepts-multi-tenancy +--- + +Pulsar was created from the ground up as a multi-tenant system. To support multi-tenancy, Pulsar has a concept of tenants. Tenants can be spread across clusters and can each have their own [authentication and authorization](security-overview.md) scheme applied to them. They are also the administrative unit at which storage quotas, [message TTL](cookbooks-retention-expiry.md#time-to-live-ttl), and isolation policies can be managed. + +The multi-tenant nature of Pulsar is reflected mostly visibly in topic URLs, which have this structure: + +```http +persistent://tenant/namespace/topic +``` + +As you can see, the tenant is the most basic unit of categorization for topics (more fundamental than the namespace and topic name). + +## Tenants + +To each tenant in a Pulsar instance you can assign: + +* An [authorization](security-authorization.md) scheme +* The set of [clusters](reference-terminology.md#cluster) to which the tenant's configuration applies + +## Namespaces + +Tenants and namespaces are two key concepts of Pulsar to support multi-tenancy. + +* Pulsar is provisioned for specified tenants with appropriate capacity allocated to the tenant. +* A namespace is the administrative unit nomenclature within a tenant. The configuration policies set on a namespace apply to all the topics created in that namespace. A tenant may create multiple namespaces via self-administration using the REST API and the [`pulsar-admin`](reference-pulsar-admin.md) CLI tool. For instance, a tenant with different applications can create a separate namespace for each application. + +Names for topics in the same namespace will look like this: + +```http +persistent://tenant/app1/topic-1 + +persistent://tenant/app1/topic-2 + +persistent://tenant/app1/topic-3 +``` + +### Namespace change events and topic-level policies + +Pulsar is a multi-tenant event streaming system. Administrators can manage the tenants and namespaces by setting policies at different levels. However, the policies, such as retention policy and storage quota policy, are only available at a namespace level. In many use cases, users need to set a policy at the topic level. The namespace change events approach is proposed for supporting topic-level policies in an efficient way. In this approach, Pulsar is used as an event log to store namespace change events (such as topic policy changes). This approach has a few benefits: + +- Avoid using ZooKeeper and introduce more loads to ZooKeeper. +- Use Pulsar as an event log for propagating the policy cache. It can scale efficiently. +- Use Pulsar SQL to query the namespace changes and audit the system. + +Each namespace has a system topic `__change_events`. This system topic is used for storing change events for a given namespace. The following figure illustrates how to use namespace change events to implement a topic-level policy. + +1. Pulsar Admin clients communicate with the Admin Restful API to update topic level policies. +2. Any broker that receives the Admin HTTP request publishes a topic policy change event to the corresponding `__change_events` topic of the namespace. +3. Each broker that owns a namespace bundle(s) subscribes to the `__change_events` topic to receive change events of the namespace. It then applies the change events to the policy cache. +4. Once the policy cache is updated, the broker sends the response back to the Pulsar Admin clients. diff --git a/site2/website/versioned_docs/version-2.7.0/concepts-transactions.md b/site2/website/versioned_docs/version-2.7.0/concepts-transactions.md new file mode 100644 index 00000000000000..29313daeb3ef3c --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/concepts-transactions.md @@ -0,0 +1,30 @@ +--- +id: version-2.7.0-transactions +title: Transactions +sidebar_label: Overview +original_id: transactions +--- + +Transactional semantics enable event streaming applications to consume, process, and produce messages in one atomic operation. In Pulsar, a producer or consumer can work with messages across multiple topics and partitions and ensure those messages are processed as a single unit. + +The following concepts help you understand Pulsar transactions. + +## Transaction coordinator and transaction log +The transaction coordinator maintains the topics and subscriptions that interact in a transaction. When a transaction is committed, the transaction coordinator interacts with the topic owner broker to complete the transaction. + +The transaction coordinator maintains the entire life cycle of transactions, and prevents a transaction from incorrect status. + +The transaction coordinator handles transaction timeout, and ensures that the transaction is aborted after a transaction timeout. + +All the transaction metadata is persisted in the transaction log. The transaction log is backed by a Pulsar topic. After the transaction coordinator crashes, it can restore the transaction metadata from the transaction log. + +## Transaction ID +The transaction ID (TxnID) identifies a unique transaction in Pulsar. The transaction ID is 128-bit. The highest 16 bits are reserved for the ID of the transaction coordinator, and the remaining bits are used for monotonically increasing numbers in each transaction coordinator. It is easy to locate the transaction crash with the TxnID. + +## Transaction buffer +Messages produced within a transaction are stored in the transaction buffer. The messages in transaction butter are not materialized (visible) to consumers until the transactions are committed. The messages in the transaction buffer are discarded when the transactions are aborted. + +## Pending acknowledge state +Message acknowledges within a transaction are maintained by the pending acknowledge state before the transaction completes. If a message is in the pending acknowledge state, the message cannot be acknowledged by other transactions until the message is removed from the pending acknowledge state. + +The pending acknowledge state is persisted to the pending acknowledge log. The pending acknowledge log is backed by a Pulsar Topic. A new broker can restore the state from the pending acknowledge log to ensure the acknowledgement is not lost. \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/cookbooks-compaction.md b/site2/website/versioned_docs/version-2.7.0/cookbooks-compaction.md new file mode 100644 index 00000000000000..6f1ce2888fa5b6 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/cookbooks-compaction.md @@ -0,0 +1,127 @@ +--- +id: version-2.7.0-cookbooks-compaction +title: Topic compaction +sidebar_label: Topic compaction +original_id: cookbooks-compaction +--- + +Pulsar's [topic compaction](concepts-topic-compaction.md#compaction) feature enables you to create **compacted** topics in which older, "obscured" entries are pruned from the topic, allowing for faster reads through the topic's history (which messages are deemed obscured/outdated/irrelevant will depend on your use case). + +To use compaction: + +* You need to give messages keys, as topic compaction in Pulsar takes place on a *per-key basis* (i.e. messages are compacted based on their key). For a stock ticker use case, the stock symbol---e.g. `AAPL` or `GOOG`---could serve as the key (more on this [below](#when-should-i-use-compacted-topics)). Messages without keys will be left alone by the compaction process. +* Compaction can be configured to run [automatically](#configuring-compaction-to-run-automatically), or you can manually [trigger](#triggering-compaction-manually) compaction using the Pulsar administrative API. +* Your consumers must be [configured](#consumer-configuration) to read from compacted topics ([Java consumers](#java), for example, have a `readCompacted` setting that must be set to `true`). If this configuration is not set, consumers will still be able to read from the non-compacted topic. + + +> Compaction only works on messages that have keys (as in the stock ticker example the stock symbol serves as the key for each message). Keys can thus be thought of as the axis along which compaction is applied. Messages that don't have keys are simply ignored by compaction. + +## When should I use compacted topics? + +The classic example of a topic that could benefit from compaction would be a stock ticker topic through which consumers can access up-to-date values for specific stocks. Imagine a scenario in which messages carrying stock value data use the stock symbol as the key (`GOOG`, `AAPL`, `TWTR`, etc.). Compacting this topic would give consumers on the topic two options: + +* They can read from the "original," non-compacted topic in case they need access to "historical" values, i.e. the entirety of the topic's messages. +* They can read from the compacted topic if they only want to see the most up-to-date messages. + +Thus, if you're using a Pulsar topic called `stock-values`, some consumers could have access to all messages in the topic (perhaps because they're performing some kind of number crunching of all values in the last hour) while the consumers used to power the real-time stock ticker only see the compacted topic (and thus aren't forced to process outdated messages). Which variant of the topic any given consumer pulls messages from is determined by the consumer's [configuration](#consumer-configuration). + +> One of the benefits of compaction in Pulsar is that you aren't forced to choose between compacted and non-compacted topics, as the compaction process leaves the original topic as-is and essentially adds an alternate topic. In other words, you can run compaction on a topic and consumers that need access to the non-compacted version of the topic will not be adversely affected. + + +## Configuring compaction to run automatically + +Tenant administrators can configure a policy for compaction at the namespace level. The policy specifies how large the topic backlog can grow before compaction is triggered. + +For example, to trigger compaction when the backlog reaches 100MB: + +```bash +$ bin/pulsar-admin namespaces set-compaction-threshold \ + --threshold 100M my-tenant/my-namespace +``` + +Configuring the compaction threshold on a namespace will apply to all topics within that namespace. + +## Triggering compaction manually + +In order to run compaction on a topic, you need to use the [`topics compact`](reference-pulsar-admin.md#topics-compact) command for the [`pulsar-admin`](reference-pulsar-admin.md) CLI tool. Here's an example: + +```bash +$ bin/pulsar-admin topics compact \ + persistent://my-tenant/my-namespace/my-topic +``` + +The `pulsar-admin` tool runs compaction via the Pulsar {@inject: rest:REST:/} API. To run compaction in its own dedicated process, i.e. *not* through the REST API, you can use the [`pulsar compact-topic`](reference-cli-tools.md#pulsar-compact-topic) command. Here's an example: + +```bash +$ bin/pulsar compact-topic \ + --topic persistent://my-tenant-namespace/my-topic +``` + +> Running compaction in its own process is recommended when you want to avoid interfering with the broker's performance. Broker performance should only be affected, however, when running compaction on topics with a large keyspace (i.e when there are many keys on the topic). The first phase of the compaction process keeps a copy of each key in the topic, which can create memory pressure as the number of keys grows. Using the `pulsar-admin topics compact` command to run compaction through the REST API should present no issues in the overwhelming majority of cases; using `pulsar compact-topic` should correspondingly be considered an edge case. + +The `pulsar compact-topic` command communicates with [ZooKeeper](https://zookeeper.apache.org) directly. In order to establish communication with ZooKeeper, though, the `pulsar` CLI tool will need to have a valid [broker configuration](reference-configuration.md#broker). You can either supply a proper configuration in `conf/broker.conf` or specify a non-default location for the configuration: + +```bash +$ bin/pulsar compact-topic \ + --broker-conf /path/to/broker.conf \ + --topic persistent://my-tenant/my-namespace/my-topic + +# If the configuration is in conf/broker.conf +$ bin/pulsar compact-topic \ + --topic persistent://my-tenant/my-namespace/my-topic +``` + +#### When should I trigger compaction? + +How often you [trigger compaction](#triggering-compaction-manually) will vary widely based on the use case. If you want a compacted topic to be extremely speedy on read, then you should run compaction fairly frequently. + +## Consumer configuration + +Pulsar consumers and readers need to be configured to read from compacted topics. The sections below show you how to enable compacted topic reads for Pulsar's language clients. If the + +### Java + +In order to read from a compacted topic using a Java consumer, the `readCompacted` parameter must be set to `true`. Here's an example consumer for a compacted topic: + +```java +Consumer compactedTopicConsumer = client.newConsumer() + .topic("some-compacted-topic") + .readCompacted(true) + .subscribe(); +``` + +As mentioned above, topic compaction in Pulsar works on a *per-key basis*. That means that messages that you produce on compacted topics need to have keys (the content of the key will depend on your use case). Messages that don't have keys will be ignored by the compaction process. Here's an example Pulsar message with a key: + +```java +import org.apache.pulsar.client.api.Message; +import org.apache.pulsar.client.api.MessageBuilder; + +Message msg = MessageBuilder.create() + .setContent(someByteArray) + .setKey("some-key") + .build(); +``` + +The example below shows a message with a key being produced on a compacted Pulsar topic: + +```java +import org.apache.pulsar.client.api.Message; +import org.apache.pulsar.client.api.MessageBuilder; +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.PulsarClient; + +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://localhost:6650") + .build(); + +Producer compactedTopicProducer = client.newProducer() + .topic("some-compacted-topic") + .create(); + +Message msg = MessageBuilder.create() + .setContent(someByteArray) + .setKey("some-key") + .build(); + +compactedTopicProducer.send(msg); +``` diff --git a/site2/website/versioned_docs/version-2.7.0/cookbooks-deduplication.md b/site2/website/versioned_docs/version-2.7.0/cookbooks-deduplication.md new file mode 100644 index 00000000000000..5be280a03dc1a0 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/cookbooks-deduplication.md @@ -0,0 +1,124 @@ +--- +id: version-2.7.0-cookbooks-deduplication +title: Message deduplication +sidebar_label: Message deduplication +original_id: cookbooks-deduplication +--- + +When **Message deduplication** is enabled, it ensures that each message produced on Pulsar topics is persisted to disk *only once*, even if the message is produced more than once. Message deduplication is handled automatically on the server side. + +To use message deduplication in Pulsar, you need to configure your Pulsar brokers and clients. + +## How it works + +You can enable or disable message deduplication at the namespace level or the topic level. By default, it is disabled on all namespaces or topics. You can enable it in the following ways: + +* Enable deduplication for all namespaces/topics at the broker-level. +* Enable deduplication for a specific namespace with the `pulsar-admin namespaces` interface. +* Enable deduplication for a specific topic with the `pulsar-admin topics` interface. + +## Configure message deduplication + +You can configure message deduplication in Pulsar using the [`broker.conf`](reference-configuration.md#broker) configuration file. The following deduplication-related parameters are available. + +Parameter | Description | Default +:---------|:------------|:------- +`brokerDeduplicationEnabled` | Sets the default behavior for message deduplication in the Pulsar broker. If it is set to `true`, message deduplication is enabled on all namespaces/topics. If it is set to `false`, you have to enable or disable deduplication at the namespace level or the topic level. | `false` +`brokerDeduplicationMaxNumberOfProducers` | The maximum number of producers for which information is stored for deduplication purposes. | `10000` +`brokerDeduplicationEntriesInterval` | The number of entries after which a deduplication informational snapshot is taken. A larger interval leads to fewer snapshots being taken, though this lengthens the topic recovery time (the time required for entries published after the snapshot to be replayed). | `1000` +`brokerDeduplicationProducerInactivityTimeoutMinutes` | The time of inactivity (in minutes) after which the broker discards deduplication information related to a disconnected producer. | `360` (6 hours) + +### Set default value at the broker-level + +By default, message deduplication is *disabled* on all Pulsar namespaces/topics. To enable it on all namespaces/topics, set the `brokerDeduplicationEnabled` parameter to `true` and re-start the broker. + +Even if you set the value for `brokerDeduplicationEnabled`, enabling or disabling via Pulsar admin CLI overrides the default settings at the broker-level. + +### Enable message deduplication + +Though message deduplication is disabled by default at the broker level, you can enable message deduplication for a specific namespace or topic using the [`pulsar-admin namespaces set-deduplication`](reference-pulsar-admin.md#namespace-set-deduplication) or the [`pulsar-admin topics set-deduplication`](reference-pulsar-admin.md#topic-set-deduplication) command. You can use the `--enable`/`-e` flag and specify the namespace/topic. + +The following example shows how to enable message deduplication at the namespace level. + +```bash +$ bin/pulsar-admin namespaces set-deduplication \ + public/default \ + --enable # or just -e +``` + +### Disable message deduplication + +Even if you enable message deduplication at the broker level, you can disable message deduplication for a specific namespace or topic using the [`pulsar-admin namespace set-deduplication`](reference-pulsar-admin.md#namespace-set-deduplication) or the [`pulsar-admin topics set-deduplication`](reference-pulsar-admin.md#topic-set-deduplication) command. Use the `--disable`/`-d` flag and specify the namespace/topic. + +The following example shows how to disable message deduplication at the namespace level. + +```bash +$ bin/pulsar-admin namespaces set-deduplication \ + public/default \ + --disable # or just -d +``` + +## Pulsar clients + +If you enable message deduplication in Pulsar brokers, you need complete the following tasks for your client producers: + +1. Specify a name for the producer. +1. Set the message timeout to `0` (namely, no timeout). + +The instructions for Java, Python, and C++ clients are different. + + + + +To enable message deduplication on a [Java producer](client-libraries-java.md#producers), set the producer name using the `producerName` setter, and set the timeout to `0` using the `sendTimeout` setter. + +```java +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.PulsarClient; +import java.util.concurrent.TimeUnit; + +PulsarClient pulsarClient = PulsarClient.builder() + .serviceUrl("pulsar://localhost:6650") + .build(); +Producer producer = pulsarClient.newProducer() + .producerName("producer-1") + .topic("persistent://public/default/topic-1") + .sendTimeout(0, TimeUnit.SECONDS) + .create(); +``` + + + +To enable message deduplication on a [Python producer](client-libraries-python.md#producers), set the producer name using `producer_name`, and set the timeout to `0` using `send_timeout_millis`. + +```python +import pulsar + +client = pulsar.Client("pulsar://localhost:6650") +producer = client.create_producer( + "persistent://public/default/topic-1", + producer_name="producer-1", + send_timeout_millis=0) +``` + + +To enable message deduplication on a [C++ producer](client-libraries-cpp.md#producer), set the producer name using `producer_name`, and set the timeout to `0` using `send_timeout_millis`. + +```cpp +#include + +std::string serviceUrl = "pulsar://localhost:6650"; +std::string topic = "persistent://some-tenant/ns1/topic-1"; +std::string producerName = "producer-1"; + +Client client(serviceUrl); + +ProducerConfiguration producerConfig; +producerConfig.setSendTimeout(0); +producerConfig.setProducerName(producerName); + +Producer producer; + +Result result = client.createProducer(topic, producerConfig, producer); +``` + \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/cookbooks-non-persistent.md b/site2/website/versioned_docs/version-2.7.0/cookbooks-non-persistent.md new file mode 100644 index 00000000000000..caddc612faec50 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/cookbooks-non-persistent.md @@ -0,0 +1,59 @@ +--- +id: version-2.7.0-cookbooks-non-persistent +title: Non-persistent messaging +sidebar_label: Non-persistent messaging +original_id: cookbooks-non-persistent +--- + +**Non-persistent topics** are Pulsar topics in which message data is *never* [persistently stored](concepts-architecture-overview.md#persistent-storage) and kept only in memory. This cookbook provides: + +* A basic [conceptual overview](#overview) of non-persistent topics +* Information about [configurable parameters](#configuration) related to non-persistent topics +* A guide to the [CLI interface](#cli) for managing non-persistent topics + +## Overview + +By default, Pulsar persistently stores *all* unacknowledged messages on multiple [BookKeeper](#persistent-storage) bookies (storage nodes). Data for messages on persistent topics can thus survive broker restarts and subscriber failover. + +Pulsar also, however, supports **non-persistent topics**, which are topics on which messages are *never* persisted to disk and live only in memory. When using non-persistent delivery, killing a Pulsar [broker](reference-terminology.md#broker) or disconnecting a subscriber to a topic means that all in-transit messages are lost on that (non-persistent) topic, meaning that clients may see message loss. + +Non-persistent topics have names of this form (note the `non-persistent` in the name): + +```http +non-persistent://tenant/namespace/topic +``` + +> For more high-level information about non-persistent topics, see the [Concepts and Architecture](concepts-messaging.md#non-persistent-topics) documentation. + +## Using + +> In order to use non-persistent topics, they must be [enabled](#enabling) in your Pulsar broker configuration. + +In order to use non-persistent topics, you only need to differentiate them by name when interacting with them. This [`pulsar-client produce`](reference-cli-tools.md#pulsar-client-produce) command, for example, would produce one message on a non-persistent topic in a standalone cluster: + +```bash +$ bin/pulsar-client produce non-persistent://public/default/example-np-topic \ + --num-produce 1 \ + --messages "This message will be stored only in memory" +``` + +> For a more thorough guide to non-persistent topics from an administrative perspective, see the [Non-persistent topics](admin-api-topics.md) guide. + +## Enabling + +In order to enable non-persistent topics in a Pulsar broker, the [`enableNonPersistentTopics`](reference-configuration.md#broker-enableNonPersistentTopics) must be set to `true`. This is the default, and so you won't need to take any action to enable non-persistent messaging. + + +> #### Configuration for standalone mode +> If you're running Pulsar in standalone mode, the same configurable parameters are available but in the [`standalone.conf`](reference-configuration.md#standalone) configuration file. + +If you'd like to enable *only* non-persistent topics in a broker, you can set the [`enablePersistentTopics`](reference-configuration.md#broker-enablePersistentTopics) parameter to `false` and the `enableNonPersistentTopics` parameter to `true`. + +## Managing with cli + +Non-persistent topics can be managed using the [`pulsar-admin non-persistent`](reference-pulsar-admin.md#non-persistent) command-line interface. With that interface you can perform actions like [create a partitioned non-persistent topic](reference-pulsar-admin.md#non-persistent-create-partitioned-topic), get [stats](reference-pulsar-admin.md#non-persistent-stats) for a non-persistent topic, [list](reference-pulsar-admin.md) non-persistent topics under a namespace, and more. + +## Using with Pulsar clients + +You shouldn't need to make any changes to your Pulsar clients to use non-persistent messaging beyond making sure that you use proper [topic names](#using) with `non-persistent` as the topic type. + diff --git a/site2/website/versioned_docs/version-2.7.0/cookbooks-partitioned.md b/site2/website/versioned_docs/version-2.7.0/cookbooks-partitioned.md new file mode 100644 index 00000000000000..f3cd57103eb2bb --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/cookbooks-partitioned.md @@ -0,0 +1,7 @@ +--- +id: version-2.7.0-cookbooks-partitioned +title: Partitioned topics +sidebar_label: Partitioned Topics +original_id: cookbooks-partitioned +--- +For details of the content, refer to [manage topics](admin-api-topics.md). \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/cookbooks-retention-expiry.md b/site2/website/versioned_docs/version-2.7.0/cookbooks-retention-expiry.md new file mode 100644 index 00000000000000..9a5cd2fb311e95 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/cookbooks-retention-expiry.md @@ -0,0 +1,318 @@ +--- +id: version-2.7.0-cookbooks-retention-expiry +title: Message retention and expiry +sidebar_label: Message retention and expiry +original_id: cookbooks-retention-expiry +--- + +Pulsar brokers are responsible for handling messages that pass through Pulsar, including [persistent storage](concepts-architecture-overview.md#persistent-storage) of messages. By default, for each topic, brokers only retain messages that are in at least one backlog. A backlog is the set of unacknowledged messages for a particular subscription. As a topic can have multiple subscriptions, a topic can have multiple backlogs. + +As a consequence, no messages are retained (by default) on a topic that has not had any subscriptions created for it. + +(Note that messages that are no longer being stored are not necessarily immediately deleted, and may in fact still be accessible until the next ledger rollover. Because clients cannot predict when rollovers may happen, it is not wise to rely on a rollover not happening at an inconvenient point in time.) + +In Pulsar, you can modify this behavior, with namespace granularity, in two ways: + +* You can persistently store messages that are not within a backlog (because they've been acknowledged by on every existing subscription, or because there are no subscriptions) by setting [retention policies](#retention-policies). +* Messages that are not acknowledged within a specified timeframe can be automatically acknowledged, by specifying the [time to live](#time-to-live-ttl) (TTL). + +Pulsar's [admin interface](admin-api-overview.md) enables you to manage both retention policies and TTL with namespace granularity (and thus within a specific tenant and either on a specific cluster or in the [`global`](concepts-architecture-overview.md#global-cluster) cluster). + + +> #### Retention and TTL solve two different problems +> * Message retention: Keep the data for at least X hours (even if acknowledged) +> * Time-to-live: Discard data after some time (by automatically acknowledging) +> +> Most applications will want to use at most one of these. + + +## Retention policies + +By default, when a Pulsar message arrives at a broker, the message is stored until it has been acknowledged on all subscriptions, at which point it is marked for deletion. You can override this behavior and retain messages that have already been acknowledged on all subscriptions by setting a *retention policy* for all topics in a given namespace. Retention is based on both a *size limit* and a *time limit*. + +Retention policies are useful when you use the Reader interface. The Reader interface does not use acknowledgements, and messages do not exist within backlogs. It is required to configure retention for Reader-only use cases. + +When you set a retention policy on topics in a namespace, you must set **both** a *size limit* and a *time limit*. You can refer to the following table to set retention policies in `pulsar-admin` and Java. + +|Time limit|Size limit| Message retention | +|----------|----------|------------------------| +| -1 | -1 | Infinite retention | +| -1 | >0 | Based on the size limit | +| >0 | -1 | Based on the time limit | +| 0 | 0 | Disable message retention (by default) | +| 0 | >0 | Invalid | +| >0 | 0 | Invalid | +| >0 | >0 | Acknowledged messages or messages with no active subscription will not be retained when either time or size reaches the limit. | + +The retention settings apply to all messages on topics that do not have any subscriptions, or to messages that have been acknowledged by all subscriptions. The retention policy settings do not affect unacknowledged messages on topics with subscriptions. The unacknowledged messages are controlled by the backlog quota. + +When a retention limit on a topic is exceeded, the oldest message is marked for deletion until the set of retained messages falls within the specified limits again. + +### Defaults + +You can set message retention at instance level with the following two parameters: `defaultRetentionTimeInMinutes` and `defaultRetentionSizeInMB`. Both parameters are set to `0` by default. + +For more information of the two parameters, refer to the [`broker.conf`](reference-configuration.md#broker) configuration file. + +### Set retention policy + +You can set a retention policy for a namespace by specifying the namespace, a size limit and a time limit in `pulsar-admin`, REST API and Java. + + + +You can use the [`set-retention`](reference-pulsar-admin.md#namespaces-set-retention) subcommand and specify a namespace, a size limit using the `-s`/`--size` flag, and a time limit using the `-t`/`--time` flag. + +In the following example, the size limit is set to 10 GB and the time limit is set to 3 hours for each topic within the `my-tenant/my-ns` namespace. +- When the size of messages reaches 10 GB on a topic within 3 hours, the acknowledged messages will not be retained. +- After 3 hours, even if the message size is less than 10 GB, the acknowledged messages will not be retained. + +```shell +$ pulsar-admin namespaces set-retention my-tenant/my-ns \ + --size 10G \ + --time 3h +``` + +In the following example, the time is not limited and the size limit is set to 1 TB. The size limit determines the retention. + +```shell +$ pulsar-admin namespaces set-retention my-tenant/my-ns \ + --size 1T \ + --time -1 +``` + +In the following example, the size is not limited and the time limit is set to 3 hours. The time limit determines the retention. + +```shell +$ pulsar-admin namespaces set-retention my-tenant/my-ns \ + --size -1 \ + --time 3h +``` + +To achieve infinite retention, set both values to `-1`. + +```shell +$ pulsar-admin namespaces set-retention my-tenant/my-ns \ + --size -1 \ + --time -1 +``` + +To disable the retention policy, set both values to `0`. + +```shell +$ pulsar-admin namespaces set-retention my-tenant/my-ns \ + --size 0 \ + --time 0 +``` + + +{@inject: endpoint|POST|/admin/v2/namespaces/:tenant/:namespace/retention|operation/setRetention} + +> **Note** +> To disable the retention policy, you need to set both the size and time limit to `0`. Set either size or time limit to `0` is invalid. + + +```java +int retentionTime = 10; // 10 minutes +int retentionSize = 500; // 500 megabytes +RetentionPolicies policies = new RetentionPolicies(retentionTime, retentionSize); +admin.namespaces().setRetention(namespace, policies); +``` + + + +### Get retention policy + +You can fetch the retention policy for a namespace by specifying the namespace. The output will be a JSON object with two keys: `retentionTimeInMinutes` and `retentionSizeInMB`. + +#### pulsar-admin + +Use the [`get-retention`](reference-pulsar-admin.md#namespaces) subcommand and specify the namespace. + +##### Example + +```shell +$ pulsar-admin namespaces get-retention my-tenant/my-ns +{ + "retentionTimeInMinutes": 10, + "retentionSizeInMB": 500 +} +``` + +#### REST API + +{@inject: endpoint|GET|/admin/v2/namespaces/:tenant/:namespace/retention|operation/getRetention} + +#### Java + +```java +admin.namespaces().getRetention(namespace); +``` + +## Backlog quotas + +*Backlogs* are sets of unacknowledged messages for a topic that have been stored by bookies. Pulsar stores all unacknowledged messages in backlogs until they are processed and acknowledged. + +You can control the allowable size of backlogs, at the namespace level, using *backlog quotas*. Setting a backlog quota involves setting: + +TODO: Expand on is this per backlog or per topic? + +* an allowable *size threshold* for each topic in the namespace +* a *retention policy* that determines which action the [broker](reference-terminology.md#broker) takes if the threshold is exceeded. + +The following retention policies are available: + +Policy | Action +:------|:------ +`producer_request_hold` | The broker will hold and not persist produce request payload +`producer_exception` | The broker will disconnect from the client by throwing an exception +`consumer_backlog_eviction` | The broker will begin discarding backlog messages + + +> #### Beware the distinction between retention policy types +> As you may have noticed, there are two definitions of the term "retention policy" in Pulsar, one that applies to persistent storage of messages not in backlogs, and one that applies to messages within backlogs. + + +Backlog quotas are handled at the namespace level. They can be managed via: + +### Set size thresholds and backlog retention policies + +You can set a size threshold and backlog retention policy for all of the topics in a [namespace](reference-terminology.md#namespace) by specifying the namespace, a size limit, and a policy by name. + +#### pulsar-admin + +Use the [`set-backlog-quota`](reference-pulsar-admin.md#namespaces) subcommand and specify a namespace, a size limit using the `-l`/`--limit` flag, and a retention policy using the `-p`/`--policy` flag. + +##### Example + +```shell +$ pulsar-admin namespaces set-backlog-quota my-tenant/my-ns \ + --limit 2G \ + --policy producer_request_hold +``` + +#### REST API + +{@inject: endpoint|POST|/admin/v2/namespaces/:tenant/:namespace/backlogQuota|operation/getBacklogQuotaMap} + +#### Java + +```java +long sizeLimit = 2147483648L; +BacklogQuota.RetentionPolicy policy = BacklogQuota.RetentionPolicy.producer_request_hold; +BacklogQuota quota = new BacklogQuota(sizeLimit, policy); +admin.namespaces().setBacklogQuota(namespace, quota); +``` + +### Get backlog threshold and backlog retention policy + +You can see which size threshold and backlog retention policy has been applied to a namespace. + +#### pulsar-admin + +Use the [`get-backlog-quotas`](reference-pulsar-admin.md#pulsar-admin-namespaces-get-backlog-quotas) subcommand and specify a namespace. Here's an example: + +```shell +$ pulsar-admin namespaces get-backlog-quotas my-tenant/my-ns +{ + "destination_storage": { + "limit" : 2147483648, + "policy" : "producer_request_hold" + } +} +``` + +#### REST API + +{@inject: endpoint|GET|/admin/v2/namespaces/:tenant/:namespace/backlogQuotaMap|operation/getBacklogQuotaMap} + +#### Java + +```java +Map quotas = + admin.namespaces().getBacklogQuotas(namespace); +``` + +### Remove backlog quotas + +#### pulsar-admin + +Use the [`remove-backlog-quota`](reference-pulsar-admin.md#pulsar-admin-namespaces-remove-backlog-quota) subcommand and specify a namespace. Here's an example: + +```shell +$ pulsar-admin namespaces remove-backlog-quota my-tenant/my-ns +``` + +#### REST API + +{@inject: endpoint|DELETE|/admin/v2/namespaces/:tenant/:namespace/backlogQuota|operation/removeBacklogQuota} + +#### Java + +```java +admin.namespaces().removeBacklogQuota(namespace); +``` + +### Clear backlog + +#### pulsar-admin + +Use the [`clear-backlog`](reference-pulsar-admin.md#pulsar-admin-namespaces-clear-backlog) subcommand. + +##### Example + +```shell +$ pulsar-admin namespaces clear-backlog my-tenant/my-ns +``` + +By default, you will be prompted to ensure that you really want to clear the backlog for the namespace. You can override the prompt using the `-f`/`--force` flag. + +## Time to live (TTL) + +By default, Pulsar stores all unacknowledged messages forever. This can lead to heavy disk space usage in cases where a lot of messages are going unacknowledged. If disk space is a concern, you can set a time to live (TTL) that determines how long unacknowledged messages will be retained. + +### Set the TTL for a namespace + +#### pulsar-admin + +Use the [`set-message-ttl`](reference-pulsar-admin.md#pulsar-admin-namespaces-set-message-ttl) subcommand and specify a namespace and a TTL (in seconds) using the `-ttl`/`--messageTTL` flag. + +##### Example + +```shell +$ pulsar-admin namespaces set-message-ttl my-tenant/my-ns \ + --messageTTL 120 # TTL of 2 minutes +``` + +#### REST API + +{@inject: endpoint|POST|/admin/v2/namespaces/:tenant/:namespace/messageTTL|operation/setNamespaceMessageTTL} + +#### Java + +```java +admin.namespaces().setNamespaceMessageTTL(namespace, ttlInSeconds); +``` + +### Get the TTL configuration for a namespace + +#### pulsar-admin + +Use the [`get-message-ttl`](reference-pulsar-admin.md#pulsar-admin-namespaces-get-message-ttl) subcommand and specify a namespace. + +##### Example + +```shell +$ pulsar-admin namespaces get-message-ttl my-tenant/my-ns +60 +``` + +#### REST API + +{@inject: endpoint|GET|/admin/v2/namespaces/:tenant/:namespace/messageTTL|operation/getNamespaceMessageTTL} + +#### Java + +```java +admin.namespaces().getNamespaceMessageTTL(namespace) +``` + diff --git a/site2/website/versioned_docs/version-2.7.0/cookbooks-tiered-storage.md b/site2/website/versioned_docs/version-2.7.0/cookbooks-tiered-storage.md new file mode 100644 index 00000000000000..3d63f20c7820c4 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/cookbooks-tiered-storage.md @@ -0,0 +1,301 @@ +--- +id: version-2.7.0-cookbooks-tiered-storage +title: Tiered Storage +sidebar_label: Tiered Storage +original_id: cookbooks-tiered-storage +--- + +Pulsar's **Tiered Storage** feature allows older backlog data to be offloaded to long term storage, thereby freeing up space in BookKeeper and reducing storage costs. This cookbook walks you through using tiered storage in your Pulsar cluster. + +* Tiered storage uses [Apache jclouds](https://jclouds.apache.org) to support +[Amazon S3](https://aws.amazon.com/s3/) and [Google Cloud Storage](https://cloud.google.com/storage/)(GCS for short) +for long term storage. With Jclouds, it is easy to add support for more +[cloud storage providers](https://jclouds.apache.org/reference/providers/#blobstore-providers) in the future. + +* Tiered storage uses [Apache Hadoop](http://hadoop.apache.org/) to support filesystem for long term storage. +With Hadoop, it is easy to add support for more filesystem in the future. + +## When should I use Tiered Storage? + +Tiered storage should be used when you have a topic for which you want to keep a very long backlog for a long time. For example, if you have a topic containing user actions which you use to train your recommendation systems, you may want to keep that data for a long time, so that if you change your recommendation algorithm you can rerun it against your full user history. + +## The offloading mechanism + +A topic in Pulsar is backed by a log, known as a managed ledger. This log is composed of an ordered list of segments. Pulsar only every writes to the final segment of the log. All previous segments are sealed. The data within the segment is immutable. This is known as a segment oriented architecture. + +![Tiered storage](assets/pulsar-tiered-storage.png "Tiered Storage") + +The Tiered Storage offloading mechanism takes advantage of this segment oriented architecture. When offloading is requested, the segments of the log are copied, one-by-one, to tiered storage. All segments of the log, apart from the segment currently being written to can be offloaded. + +On the broker, the administrator must configure the bucket and credentials for the cloud storage service. +The configured bucket must exist before attempting to offload. If it does not exist, the offload operation will fail. + +Pulsar uses multi-part objects to upload the segment data. It is possible that a broker could crash while uploading the data. +We recommend you add a life cycle rule your bucket to expire incomplete multi-part upload after a day or two to avoid +getting charged for incomplete uploads. + +When ledgers are offloaded to long term storage, you can still query data in the offloaded ledgers with Pulsar SQL. + +## Configuring the offload driver + +Offloading is configured in ```broker.conf```. + +At a minimum, the administrator must configure the driver, the bucket and the authenticating credentials. +There is also some other knobs to configure, like the bucket region, the max block size in backed storage, etc. + +Currently we support driver of types: + +- `aws-s3`: [Simple Cloud Storage Service](https://aws.amazon.com/s3/) +- `google-cloud-storage`: [Google Cloud Storage](https://cloud.google.com/storage/) +- `filesystem`: [Filesystem Storage](http://hadoop.apache.org/) + +> Driver names are case-insensitive for driver's name. There is a third driver type, `s3`, which is identical to `aws-s3`, +> though it requires that you specify an endpoint url using `s3ManagedLedgerOffloadServiceEndpoint`. This is useful if +> using a S3 compatible data store, other than AWS. + +```conf +managedLedgerOffloadDriver=aws-s3 +``` + +### "aws-s3" Driver configuration + +#### Bucket and Region + +Buckets are the basic containers that hold your data. +Everything that you store in Cloud Storage must be contained in a bucket. +You can use buckets to organize your data and control access to your data, +but unlike directories and folders, you cannot nest buckets. + +```conf +s3ManagedLedgerOffloadBucket=pulsar-topic-offload +``` + +Bucket Region is the region where bucket located. Bucket Region is not a required +but a recommended configuration. If it is not configured, It will use the default region. + +With AWS S3, the default region is `US East (N. Virginia)`. Page +[AWS Regions and Endpoints](https://docs.aws.amazon.com/general/latest/gr/rande.html) contains more information. + +```conf +s3ManagedLedgerOffloadRegion=eu-west-3 +``` + +#### Authentication with AWS + +To be able to access AWS S3, you need to authenticate with AWS S3. +Pulsar does not provide any direct means of configuring authentication for AWS S3, +but relies on the mechanisms supported by the +[DefaultAWSCredentialsProviderChain](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html). + +Once you have created a set of credentials in the AWS IAM console, they can be configured in a number of ways. + +1. Using ec2 instance metadata credentials + +If you are on AWS instance with an instance profile that provides credentials, Pulsar will use these credentials +if no other mechanism is provided + +2. Set the environment variables **AWS_ACCESS_KEY_ID** and **AWS_SECRET_ACCESS_KEY** in ```conf/pulsar_env.sh```. + +```bash +export AWS_ACCESS_KEY_ID=ABC123456789 +export AWS_SECRET_ACCESS_KEY=ded7db27a4558e2ea8bbf0bf37ae0e8521618f366c +``` + +> \"export\" is important so that the variables are made available in the environment of spawned processes. + + +3. Add the Java system properties *aws.accessKeyId* and *aws.secretKey* to **PULSAR_EXTRA_OPTS** in `conf/pulsar_env.sh`. + +```bash +PULSAR_EXTRA_OPTS="${PULSAR_EXTRA_OPTS} ${PULSAR_MEM} ${PULSAR_GC} -Daws.accessKeyId=ABC123456789 -Daws.secretKey=ded7db27a4558e2ea8bbf0bf37ae0e8521618f366c -Dio.netty.leakDetectionLevel=disabled -Dio.netty.recycler.maxCapacity.default=1000 -Dio.netty.recycler.linkCapacity=1024" +``` + +4. Set the access credentials in ```~/.aws/credentials```. + +```conf +[default] +aws_access_key_id=ABC123456789 +aws_secret_access_key=ded7db27a4558e2ea8bbf0bf37ae0e8521618f366c +``` + +5. Assuming an IAM role + +If you want to assume an IAM role, this can be done via specifying the following: + +```conf +s3ManagedLedgerOffloadRole= +s3ManagedLedgerOffloadRoleSessionName=pulsar-s3-offload +``` + +This will use the `DefaultAWSCredentialsProviderChain` for assuming this role. + +> The broker must be rebooted for credentials specified in pulsar_env to take effect. + +#### Configuring the size of block read/write + +Pulsar also provides some knobs to configure the size of requests sent to AWS S3. + +- ```s3ManagedLedgerOffloadMaxBlockSizeInBytes``` configures the maximum size of + a "part" sent during a multipart upload. This cannot be smaller than 5MB. Default is 64MB. +- ```s3ManagedLedgerOffloadReadBufferSizeInBytes``` configures the block size for + each individual read when reading back data from AWS S3. Default is 1MB. + +In both cases, these should not be touched unless you know what you are doing. + +### "google-cloud-storage" Driver configuration + +Buckets are the basic containers that hold your data. Everything that you store in +Cloud Storage must be contained in a bucket. You can use buckets to organize your data and +control access to your data, but unlike directories and folders, you cannot nest buckets. + +```conf +gcsManagedLedgerOffloadBucket=pulsar-topic-offload +``` + +Bucket Region is the region where bucket located. Bucket Region is not a required but +a recommended configuration. If it is not configured, It will use the default region. + +Regarding GCS, buckets are default created in the `us multi-regional location`, +page [Bucket Locations](https://cloud.google.com/storage/docs/bucket-locations) contains more information. + +```conf +gcsManagedLedgerOffloadRegion=europe-west3 +``` + +#### Authentication with GCS + +The administrator needs to configure `gcsManagedLedgerOffloadServiceAccountKeyFile` in `broker.conf` +for the broker to be able to access the GCS service. `gcsManagedLedgerOffloadServiceAccountKeyFile` is +a Json file, containing the GCS credentials of a service account. +[Service Accounts section of this page](https://support.google.com/googleapi/answer/6158849) contains +more information of how to create this key file for authentication. More information about google cloud IAM +is available [here](https://cloud.google.com/storage/docs/access-control/iam). + +To generate service account credentials or view the public credentials that you've already generated, follow the following steps: + +1. Open the [Service accounts page](https://console.developers.google.com/iam-admin/serviceaccounts). +2. Select a project or create a new one. +3. Click **Create service account**. +4. In the **Create service account** window, type a name for the service account, and select **Furnish a new private key**. If you want to [grant G Suite domain-wide authority](https://developers.google.com/identity/protocols/OAuth2ServiceAccount#delegatingauthority) to the service account, also select **Enable G Suite Domain-wide Delegation**. +5. Click **Create**. + +> Notes: Make ensure that the service account you create has permission to operate GCS, you need to assign **Storage Admin** permission to your service account in [here](https://cloud.google.com/storage/docs/access-control/iam). + +```conf +gcsManagedLedgerOffloadServiceAccountKeyFile="/Users/hello/Downloads/project-804d5e6a6f33.json" +``` + +#### Configuring the size of block read/write + +Pulsar also provides some knobs to configure the size of requests sent to GCS. + +- ```gcsManagedLedgerOffloadMaxBlockSizeInBytes``` configures the maximum size of a "part" sent + during a multipart upload. This cannot be smaller than 5MB. Default is 64MB. +- ```gcsManagedLedgerOffloadReadBufferSizeInBytes``` configures the block size for each individual + read when reading back data from GCS. Default is 1MB. + +In both cases, these should not be touched unless you know what you are doing. + +### "filesystem" Driver configuration + + +#### Configure connection address + +You can configure the connection address in the `broker.conf` file. + +```conf +fileSystemURI="hdfs://127.0.0.1:9000" +``` +#### Configure Hadoop profile path + +The configuration file is stored in the Hadoop profile path. It contains various settings, such as base path, authentication, and so on. + +```conf +fileSystemProfilePath="../conf/filesystem_offload_core_site.xml" +``` + +The model for storing topic data uses `org.apache.hadoop.io.MapFile`. You can use all of the configurations in `org.apache.hadoop.io.MapFile` for Hadoop. + +**Example** + +```conf + + + fs.defaultFS + + + + + hadoop.tmp.dir + pulsar + + + + io.file.buffer.size + 4096 + + + + io.seqfile.compress.blocksize + 1000000 + + + + io.seqfile.compression.type + BLOCK + + + + io.map.index.interval + 128 + + +``` + +For more information about the configurations in `org.apache.hadoop.io.MapFile`, see [Filesystem Storage](http://hadoop.apache.org/). +## Configuring offload to run automatically + +Namespace policies can be configured to offload data automatically once a threshold is reached. The threshold is based on the size of data that the topic has stored on the pulsar cluster. Once the topic reaches the threshold, an offload operation will be triggered. Setting a negative value to the threshold will disable automatic offloading. Setting the threshold to 0 will cause the broker to offload data as soon as it possiby can. + +```bash +$ bin/pulsar-admin namespaces set-offload-threshold --size 10M my-tenant/my-namespace +``` + +> Automatic offload runs when a new segment is added to a topic log. If you set the threshold on a namespace, but few messages are being produced to the topic, offload will not until the current segment is full. + + +## Triggering offload manually + +Offloading can manually triggered through a REST endpoint on the Pulsar broker. We provide a CLI which will call this rest endpoint for you. + +When triggering offload, you must specify the maximum size, in bytes, of backlog which will be retained locally on the bookkeeper. The offload mechanism will offload segments from the start of the topic backlog until this condition is met. + +```bash +$ bin/pulsar-admin topics offload --size-threshold 10M my-tenant/my-namespace/topic1 +Offload triggered for persistent://my-tenant/my-namespace/topic1 for messages before 2:0:-1 +``` + +The command to triggers an offload will not wait until the offload operation has completed. To check the status of the offload, use offload-status. + +```bash +$ bin/pulsar-admin topics offload-status my-tenant/my-namespace/topic1 +Offload is currently running +``` + +To wait for offload to complete, add the -w flag. + +```bash +$ bin/pulsar-admin topics offload-status -w my-tenant/my-namespace/topic1 +Offload was a success +``` + +If there is an error offloading, the error will be propagated to the offload-status command. + +```bash +$ bin/pulsar-admin topics offload-status persistent://public/default/topic1 +Error in offload +null + +Reason: Error offloading: org.apache.bookkeeper.mledger.ManagedLedgerException: java.util.concurrent.CompletionException: com.amazonaws.services.s3.model.AmazonS3Exception: Anonymous users cannot initiate multipart uploads. Please authenticate. (Service: Amazon S3; Status Code: 403; Error Code: AccessDenied; Request ID: 798758DE3F1776DF; S3 Extended Request ID: dhBFz/lZm1oiG/oBEepeNlhrtsDlzoOhocuYMpKihQGXe6EG8puRGOkK6UwqzVrMXTWBxxHcS+g=), S3 Extended Request ID: dhBFz/lZm1oiG/oBEepeNlhrtsDlzoOhocuYMpKihQGXe6EG8puRGOkK6UwqzVrMXTWBxxHcS+g= +```` + diff --git a/site2/website/versioned_docs/version-2.7.0/deploy-aws.md b/site2/website/versioned_docs/version-2.7.0/deploy-aws.md new file mode 100644 index 00000000000000..66b5836ba3a338 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/deploy-aws.md @@ -0,0 +1,227 @@ +--- +id: version-2.7.0-deploy-aws +title: Deploying a Pulsar cluster on AWS using Terraform and Ansible +sidebar_label: Amazon Web Services +original_id: deploy-aws +--- + +> For instructions on deploying a single Pulsar cluster manually rather than using Terraform and Ansible, see [Deploying a Pulsar cluster on bare metal](deploy-bare-metal.md). For instructions on manually deploying a multi-cluster Pulsar instance, see [Deploying a Pulsar instance on bare metal](deploy-bare-metal-multi-cluster.md). + +One of the easiest ways to get a Pulsar [cluster](reference-terminology.md#cluster) running on [Amazon Web Services](https://aws.amazon.com/) (AWS) is to use the [Terraform](https://terraform.io) infrastructure provisioning tool and the [Ansible](https://www.ansible.com) server automation tool. Terraform can create the resources necessary for running the Pulsar cluster---[EC2](https://aws.amazon.com/ec2/) instances, networking and security infrastructure, etc.---While Ansible can install and run Pulsar on the provisioned resources. + +## Requirements and setup + +In order to install a Pulsar cluster on AWS using Terraform and Ansible, you need to prepare the following things: + +* An [AWS account](https://aws.amazon.com/account/) and the [`aws`](https://aws.amazon.com/cli/) command-line tool +* Python and [pip](https://pip.pypa.io/en/stable/) +* The [`terraform-inventory`](https://github.com/adammck/terraform-inventory) tool, which enables Ansible to use Terraform artifacts + +You also need to make sure that you are currently logged into your AWS account via the `aws` tool: + +```bash +$ aws configure +``` + +## Installation + +You can install Ansible on Linux or macOS using pip. + +```bash +$ pip install ansible +``` + +You can install Terraform using the instructions [here](https://www.terraform.io/intro/getting-started/install.html). + +You also need to have the Terraform and Ansible configuration for Pulsar locally on your machine. You can find them in the [GitHub repository](https://github.com/apache/pulsar) of Pulsar, which you can fetch using Git commands: + +```bash +$ git clone https://github.com/apache/pulsar +$ cd pulsar/deployment/terraform-ansible/aws +``` + +## SSH setup + +> If you already have an SSH key and want to use it, you can skip the step of generating an SSH key and update `private_key_file` setting +> in `ansible.cfg` file and `public_key_path` setting in `terraform.tfvars` file. +> +> For example, if you already have a private SSH key in `~/.ssh/pulsar_aws` and a public key in `~/.ssh/pulsar_aws.pub`, +> follow the steps below: +> +> 1. update `ansible.cfg` with following values: +> +> ```shell +> private_key_file=~/.ssh/pulsar_aws +> ``` +> +> 2. update `terraform.tfvars` with following values: +> +> ```shell +> public_key_path=~/.ssh/pulsar_aws.pub +> ``` + +In order to create the necessary AWS resources using Terraform, you need to create an SSH key. Enter the following commands to create a private SSH key in `~/.ssh/id_rsa` and a public key in `~/.ssh/id_rsa.pub`: + +```bash +$ ssh-keygen -t rsa +``` + +Do *not* enter a passphrase (hit **Enter** instead when the prompt comes out). Enter the following command to verify that a key has been created: + +```bash +$ ls ~/.ssh +id_rsa id_rsa.pub +``` + +## Create AWS resources using Terraform + +To start building AWS resources with Terraform, you need to install all Terraform dependencies. Enter the following command: + +```bash +$ terraform init +# This will create a .terraform folder +``` + +After that, you can apply the default Terraform configuration by entering this command: + +```bash +$ terraform apply +``` + +Then you see this prompt below: + +```bash +Do you want to perform these actions? + Terraform will perform the actions described above. + Only 'yes' will be accepted to approve. + + Enter a value: +``` + +Type `yes` and hit **Enter**. Applying the configuration could take several minutes. When the configuration applying finishes, you can see `Apply complete!` along with some other information, including the number of resources created. + +### Apply a non-default configuration + +You can apply a non-default Terraform configuration by changing the values in the `terraform.tfvars` file. The following variables are available: + +Variable name | Description | Default +:-------------|:------------|:------- +`public_key_path` | The path of the public key that you have generated. | `~/.ssh/id_rsa.pub` +`region` | The AWS region in which the Pulsar cluster runs | `us-west-2` +`availability_zone` | The AWS availability zone in which the Pulsar cluster runs | `us-west-2a` +`aws_ami` | The [Amazon Machine Image](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) (AMI) that the cluster uses | `ami-9fa343e7` +`num_zookeeper_nodes` | The number of [ZooKeeper](https://zookeeper.apache.org) nodes in the ZooKeeper cluster | 3 +`num_bookie_nodes` | The number of bookies that runs in the cluster | 3 +`num_broker_nodes` | The number of Pulsar brokers that runs in the cluster | 2 +`num_proxy_nodes` | The number of Pulsar proxies that runs in the cluster | 1 +`base_cidr_block` | The root [CIDR](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) that network assets uses for the cluster | `10.0.0.0/16` +`instance_types` | The EC2 instance types to be used. This variable is a map with two keys: `zookeeper` for the ZooKeeper instances, `bookie` for the BookKeeper bookies and `broker` and `proxy` for Pulsar brokers and bookies | `t2.small` (ZooKeeper), `i3.xlarge` (BookKeeper) and `c5.2xlarge` (Brokers/Proxies) + +### What is installed + +When you run the Ansible playbook, the following AWS resources are used: + +* 9 total [Elastic Compute Cloud](https://aws.amazon.com/ec2) (EC2) instances running the [ami-9fa343e7](https://access.redhat.com/articles/3135091) Amazon Machine Image (AMI), which runs [Red Hat Enterprise Linux (RHEL) 7.4](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html-single/7.4_release_notes/index). By default, that includes: + * 3 small VMs for ZooKeeper ([t2.small](https://www.ec2instances.info/?selected=t2.small) instances) + * 3 larger VMs for BookKeeper [bookies](reference-terminology.md#bookie) ([i3.xlarge](https://www.ec2instances.info/?selected=i3.xlarge) instances) + * 2 larger VMs for Pulsar [brokers](reference-terminology.md#broker) ([c5.2xlarge](https://www.ec2instances.info/?selected=c5.2xlarge) instances) + * 1 larger VMs for Pulsar [proxy](reference-terminology.md#proxy) ([c5.2xlarge](https://www.ec2instances.info/?selected=c5.2xlarge) instances) +* An EC2 [security group](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-network-security.html) +* A [virtual private cloud](https://aws.amazon.com/vpc/) (VPC) for security +* An [API Gateway](https://aws.amazon.com/api-gateway/) for connections from the outside world +* A [route table](http://docs.aws.amazon.com/AmazonVPC/latest/UserGuide/VPC_Route_Tables.html) for the Pulsar cluster's VPC +* A [subnet](http://docs.aws.amazon.com/AmazonVPC/latest/UserGuide/VPC_Subnets.html) for the VPC + +All EC2 instances for the cluster run in the [us-west-2](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html) region. + +### Fetch your Pulsar connection URL + +When you apply the Terraform configuration by entering the command `terraform apply`, Terraform outputs a value for the `pulsar_service_url`. The value should look something like this: + +``` +pulsar://pulsar-elb-1800761694.us-west-2.elb.amazonaws.com:6650 +``` + +You can fetch that value at any time by entering the command `terraform output pulsar_service_url` or parsing the `terraform.tstate` file (which is JSON, even though the filename does not reflect that): + +```bash +$ cat terraform.tfstate | jq .modules[0].outputs.pulsar_service_url.value +``` + +### Destroy your cluster + +At any point, you can destroy all AWS resources associated with your cluster using Terraform's `destroy` command: + +```bash +$ terraform destroy +``` + +## Setup Disks + +Before you run the Pulsar playbook, you need to mount the disks to the correct directories on those bookie nodes. Since different type of machines have different disk layout, you need to update the task defined in `setup-disk.yaml` file after changing the `instance_types` in your terraform config, + +To setup disks on bookie nodes, enter this command: + +```bash +$ ansible-playbook \ + --user='ec2-user' \ + --inventory=`which terraform-inventory` \ + setup-disk.yaml +``` + +After that, the disks is mounted under `/mnt/journal` as journal disk, and `/mnt/storage` as ledger disk. +Remember to enter this command just only once. If you attempt to enter this command again after you have run Pulsar playbook, your disks might potentially be erased again, causing the bookies to fail to start up. + +## Run the Pulsar playbook + +Once you have created the necessary AWS resources using Terraform, you can install and run Pulsar on the Terraform-created EC2 instances using Ansible. + +(Optional) If you want to use any [built-in IO connectors](io-connectors.md) , edit the `Download Pulsar IO packages` task in the `deploy-pulsar.yaml` file and uncomment the connectors you want to use. + +To run the playbook, enter this command: + +```bash +$ ansible-playbook \ + --user='ec2-user' \ + --inventory=`which terraform-inventory` \ + ../deploy-pulsar.yaml +``` + +If you have created a private SSH key at a location different from `~/.ssh/id_rsa`, you can specify the different location using the `--private-key` flag in the following command: + +```bash +$ ansible-playbook \ + --user='ec2-user' \ + --inventory=`which terraform-inventory` \ + --private-key="~/.ssh/some-non-default-key" \ + ../deploy-pulsar.yaml +``` + +## Access the cluster + +You can now access your running Pulsar using the unique Pulsar connection URL for your cluster, which you can obtain following the instructions [above](#fetching-your-pulsar-connection-url). + +For a quick demonstration of accessing the cluster, we can use the Python client for Pulsar and the Python shell. First, install the Pulsar Python module using pip: + +```bash +$ pip install pulsar-client +``` + +Now, open up the Python shell using the `python` command: + +```bash +$ python +``` + +Once you are in the shell, enter the following command: + +```python +>>> import pulsar +>>> client = pulsar.Client('pulsar://pulsar-elb-1800761694.us-west-2.elb.amazonaws.com:6650') +# Make sure to use your connection URL +>>> producer = client.create_producer('persistent://public/default/test-topic') +>>> producer.send('Hello world') +>>> client.close() +``` + +If all of these commands are successful, Pulsar clients can now use your cluster! diff --git a/site2/website/versioned_docs/version-2.7.0/deploy-bare-metal.md b/site2/website/versioned_docs/version-2.7.0/deploy-bare-metal.md new file mode 100644 index 00000000000000..94de093d767564 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/deploy-bare-metal.md @@ -0,0 +1,461 @@ +--- +id: version-2.7.0-deploy-bare-metal +title: Deploy a cluster on bare metal +sidebar_label: Bare metal +original_id: deploy-bare-metal +--- + + +> ### Tips +> +> 1. Single-cluster Pulsar installations should be sufficient for all but the most ambitious use cases. If you are interested in experimenting with +> Pulsar or using Pulsar in a startup or on a single team, it is simplest to opt for a single cluster. If you do need to run a multi-cluster Pulsar instance, +> see the guide [here](deploy-bare-metal-multi-cluster.md). +> +> 2. If you want to use all builtin [Pulsar IO](io-overview.md) connectors in your Pulsar deployment, you need to download `apache-pulsar-io-connectors` +> package and install `apache-pulsar-io-connectors` under `connectors` directory in the pulsar directory on every broker node or on every function-worker node if you +> have run a separate cluster of function workers for [Pulsar Functions](functions-overview.md). +> +> 3. If you want to use [Tiered Storage](concepts-tiered-storage.md) feature in your Pulsar deployment, you need to download `apache-pulsar-offloaders` +> package and install `apache-pulsar-offloaders` under `offloaders` directory in the pulsar directory on every broker node. For more details of how to configure +> this feature, you can refer to the [Tiered storage cookbook](cookbooks-tiered-storage.md). + +Deploying a Pulsar cluster involves doing the following (in order): + +* Deploy a [ZooKeeper](#deploy-a-zookeeper-cluster) cluster (optional) +* Initialize [cluster metadata](#initialize-cluster-metadata) +* Deploy a [BookKeeper](#deploy-a-bookkeeper-cluster) cluster +* Deploy one or more Pulsar [brokers](#deploy-pulsar-brokers) + +## Preparation + +### Requirements + +> If you already have an existing zookeeper cluster and want to reuse it, you do not need to prepare the machines +> for running ZooKeeper. + +To run Pulsar on bare metal, the following configuration is recommended: + +* At least 6 Linux machines or VMs + * 3 for running [ZooKeeper](https://zookeeper.apache.org) + * 3 for running a Pulsar broker, and a [BookKeeper](https://bookkeeper.apache.org) bookie +* A single [DNS](https://en.wikipedia.org/wiki/Domain_Name_System) name covering all of the Pulsar broker hosts + +> If you do not have enough machines, or to try out Pulsar in cluster mode (and expand the cluster later), +> you can deploy a full Pulsar configuration on one node, where Zookeeper, the bookie and broker are run on the same machine. + +> If you do not have a DNS server, you can use the multi-host format in the service URL instead. + +Each machine in your cluster needs to have [Java 8](http://www.oracle.com/technetwork/java/javase/downloads/index.html) or a more recent version of Java installed. + +The following is a diagram showing the basic setup: + +![alt-text](assets/pulsar-basic-setup.png) + +In this diagram, connecting clients need to be able to communicate with the Pulsar cluster using a single URL. In this case, `pulsar-cluster.acme.com` abstracts over all of the message-handling brokers. Pulsar message brokers run on machines alongside BookKeeper bookies; brokers and bookies, in turn, rely on ZooKeeper. + +### Hardware considerations + +When you deploy a Pulsar cluster, keep in mind the following basic better choices when you do the capacity planning. + +#### ZooKeeper + +For machines running ZooKeeper, is is recommended to use less powerful machines or VMs. Pulsar uses ZooKeeper only for periodic coordination-related and configuration-related tasks, *not* for basic operations. If you run Pulsar on [Amazon Web Services](https://aws.amazon.com/) (AWS), for example, a [t2.small](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/t2-instances.html) instance might likely suffice. + +#### Bookies and Brokers + +For machines running a bookie and a Pulsar broker, more powerful machines are required. For an AWS deployment, for example, [i3.4xlarge](https://aws.amazon.com/blogs/aws/now-available-i3-instances-for-demanding-io-intensive-applications/) instances may be appropriate. On those machines you can use the following: + +* Fast CPUs and 10Gbps [NIC](https://en.wikipedia.org/wiki/Network_interface_controller) (for Pulsar brokers) +* Small and fast [solid-state drives](https://en.wikipedia.org/wiki/Solid-state_drive) (SSDs) or [hard disk drives](https://en.wikipedia.org/wiki/Hard_disk_drive) (HDDs) with a [RAID](https://en.wikipedia.org/wiki/RAID) controller and a battery-backed write cache (for BookKeeper bookies) + +## Install the Pulsar binary package + +> You need to install the Pulsar binary package on *each machine in the cluster*, including machines running [ZooKeeper](#deploy-a-zookeeper-cluster) and [BookKeeper](#deploy-a-bookkeeper-cluster). + +To get started deploying a Pulsar cluster on bare metal, you need to download a binary tarball release in one of the following ways: + +* By clicking on the link below directly, which automatically triggers a download: + * Pulsar {{pulsar:version}} binary release +* From the Pulsar [downloads page](pulsar:download_page_url) +* From the Pulsar [releases page](https://github.com/apache/pulsar/releases/latest) on [GitHub](https://github.com) +* Using [wget](https://www.gnu.org/software/wget): + +```bash +$ wget pulsar:binary_release_url +``` + +Once you download the tarball, untar it and `cd` into the resulting directory: + +```bash +$ tar xvzf apache-pulsar-{{pulsar:version}}-bin.tar.gz +$ cd apache-pulsar-{{pulsar:version}} +``` + +The extracted directory contains the following subdirectories: + +Directory | Contains +:---------|:-------- +`bin` |[command-line tools](reference-cli-tools.md) of Pulsar, such as [`pulsar`](reference-cli-tools.md#pulsar) and [`pulsar-admin`](reference-pulsar-admin.md) +`conf` | Configuration files for Pulsar, including for [broker configuration](reference-configuration.md#broker), [ZooKeeper configuration](reference-configuration.md#zookeeper), and more +`data` | The data storage directory that ZooKeeper and BookKeeper use +`lib` | The [JAR](https://en.wikipedia.org/wiki/JAR_(file_format)) files that Pulsar uses +`logs` | Logs that the installation creates + +## [Install Builtin Connectors (optional)]( https://pulsar.apache.org/docs/en/next/standalone/#install-builtin-connectors-optional) + +> Since Pulsar release `2.1.0-incubating`, Pulsar provides a separate binary distribution, containing all the `builtin` connectors. +> If you want to enable those `builtin` connectors, you can follow the instructions as below; otherwise you can +> skip this section for now. + +To get started using builtin connectors, you need to download the connectors tarball release on every broker node in one of the following ways: + +* by clicking the link below and downloading the release from an Apache mirror: + + * Pulsar IO Connectors {{pulsar:version}} release + +* from the Pulsar [downloads page](pulsar:download_page_url) +* from the Pulsar [releases page](https://github.com/apache/pulsar/releases/latest) +* using [wget](https://www.gnu.org/software/wget): + + ```shell + $ wget pulsar:connector_release_url/{connector}-{{pulsar:version}}.nar + ``` + +Once you download the .nar file, copy the file to directory `connectors` in the pulsar directory. +For example, if you download the connector file `pulsar-io-aerospike-{{pulsar:version}}.nar`: + +```bash +$ mkdir connectors +$ mv pulsar-io-aerospike-{{pulsar:version}}.nar connectors + +$ ls connectors +pulsar-io-aerospike-{{pulsar:version}}.nar +... +``` + +## [Install Tiered Storage Offloaders (optional)](https://pulsar.apache.org/docs/en/next/standalone/#install-tiered-storage-offloaders-optional) + +> Since Pulsar release `2.2.0`, Pulsar releases a separate binary distribution, containing the tiered storage offloaders. +> If you want to enable tiered storage feature, you can follow the instructions as below; otherwise you can +> skip this section for now. + +To get started using tiered storage offloaders, you need to download the offloaders tarball release on every broker node in one of the following ways: + +* by clicking the link below and downloading the release from an Apache mirror: + + * Pulsar Tiered Storage Offloaders {{pulsar:version}} release + +* from the Pulsar [downloads page](pulsar:download_page_url) +* from the Pulsar [releases page](https://github.com/apache/pulsar/releases/latest) +* using [wget](https://www.gnu.org/software/wget): + + ```shell + $ wget pulsar:offloader_release_url + ``` + +Once you download the tarball, in the pulsar directory, untar the offloaders package and copy the offloaders as `offloaders` in the pulsar directory: + +```bash +$ tar xvfz apache-pulsar-offloaders-{{pulsar:version}}-bin.tar.gz + +// you can find a directory named `apache-pulsar-offloaders-{{pulsar:version}}` in the pulsar directory +// then copy the offloaders + +$ mv apache-pulsar-offloaders-{{pulsar:version}}/offloaders offloaders + +$ ls offloaders +tiered-storage-jcloud-{{pulsar:version}}.nar +``` + +For more details of how to configure tiered storage feature, you can refer to the [Tiered storage cookbook](cookbooks-tiered-storage.md) + + +## Deploy a ZooKeeper cluster + +> If you already have an exsiting zookeeper cluster and want to use it, you can skip this section. + +[ZooKeeper](https://zookeeper.apache.org) manages a variety of essential coordination- and configuration-related tasks for Pulsar. To deploy a Pulsar cluster, you need to deploy ZooKeeper first (before all other components). A 3-node ZooKeeper cluster is the recommended configuration. Pulsar does not make heavy use of ZooKeeper, so more lightweight machines or VMs should suffice for running ZooKeeper. + +To begin, add all ZooKeeper servers to the configuration specified in [`conf/zookeeper.conf`](reference-configuration.md#zookeeper) (in the Pulsar directory that you create [above](#install-the-pulsar-binary-package)). The following is an example: + +```properties +server.1=zk1.us-west.example.com:2888:3888 +server.2=zk2.us-west.example.com:2888:3888 +server.3=zk3.us-west.example.com:2888:3888 +``` + +> If you only have one machine on which to deploy Pulsar, you only need to add one server entry in the configuration file. + +On each host, you need to specify the ID of the node in the `myid` file, which is in the `data/zookeeper` folder of each server by default (you can change the file location via the [`dataDir`](reference-configuration.md#zookeeper-dataDir) parameter). + +> See the [Multi-server setup guide](https://zookeeper.apache.org/doc/r3.4.10/zookeeperAdmin.html#sc_zkMulitServerSetup) in the ZooKeeper documentation for detailed information on `myid` and more. + +For example, on a ZooKeeper server like `zk1.us-west.example.com`, you can set the `myid` value as follows: + +```bash +$ mkdir -p data/zookeeper +$ echo 1 > data/zookeeper/myid +``` + +On `zk2.us-west.example.com`, the command is `echo 2 > data/zookeeper/myid` and so on. + +Once you add each server to the `zookeeper.conf` configuration and have the appropriate `myid` entry, you can start ZooKeeper on all hosts (in the background, using nohup) with the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool: + +```bash +$ bin/pulsar-daemon start zookeeper +``` + +> If you plan to deploy Zookeeper with the Bookie on the same node, you +> need to start zookeeper by using different stats port. + +Start zookeeper with [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool like: + +```bash +$ PULSAR_EXTRA_OPTS="-Dstats_server_port=8001" bin/pulsar-daemon start zookeeper +``` + +## Initialize cluster metadata + +Once you deploy ZooKeeper for your cluster, you need to write some metadata to ZooKeeper for each cluster in your instance. You only need to write this data **once**. + +You can initialize this metadata using the [`initialize-cluster-metadata`](reference-cli-tools.md#pulsar-initialize-cluster-metadata) command of the [`pulsar`](reference-cli-tools.md#pulsar) CLI tool. This command can be run on any machine in your ZooKeeper cluster. The following is an example: + +```shell +$ bin/pulsar initialize-cluster-metadata \ + --cluster pulsar-cluster-1 \ + --zookeeper zk1.us-west.example.com:2181 \ + --configuration-store zk1.us-west.example.com:2181 \ + --web-service-url http://pulsar.us-west.example.com:8080 \ + --web-service-url-tls https://pulsar.us-west.example.com:8443 \ + --broker-service-url pulsar://pulsar.us-west.example.com:6650 \ + --broker-service-url-tls pulsar+ssl://pulsar.us-west.example.com:6651 +``` + +As you can see from the example above, you will need to specify the following: + +Flag | Description +:----|:----------- +`--cluster` | A name for the cluster +`--zookeeper` | A "local" ZooKeeper connection string for the cluster. This connection string only needs to include *one* machine in the ZooKeeper cluster. +`--configuration-store` | The configuration store connection string for the entire instance. As with the `--zookeeper` flag, this connection string only needs to include *one* machine in the ZooKeeper cluster. +`--web-service-url` | The web service URL for the cluster, plus a port. This URL should be a standard DNS name. The default port is 8080 (you had better not use a different port). +`--web-service-url-tls` | If you use [TLS](security-tls-transport.md), you also need to specify a TLS web service URL for the cluster. The default port is 8443 (you had better not use a different port). +`--broker-service-url` | A broker service URL enabling interaction with the brokers in the cluster. This URL should not use the same DNS name as the web service URL but should use the `pulsar` scheme instead. The default port is 6650 (you had better not use a different port). +`--broker-service-url-tls` | If you use [TLS](security-tls-transport.md), you also need to specify a TLS web service URL for the cluster as well as a TLS broker service URL for the brokers in the cluster. The default port is 6651 (you had better not use a different port). + + +> If you do not have a DNS server, you can use multi-host format in the service URL with the following settings: +> +> ```properties +> --web-service-url http://host1:8080,host2:8080,host3:8080 \ +> --web-service-url-tls https://host1:8443,host2:8443,host3:8443 \ +> --broker-service-url pulsar://host1:6650,host2:6650,host3:6650 \ +> --broker-service-url-tls pulsar+ssl://host1:6651,host2:6651,host3:6651 +> ``` + +> If you want to use an existing BookKeeper cluster, you can add the `--existing-bk-metadata-service-uri` flag as follows: +> +> ```properties +> --existing-bk-metadata-service-uri "zk+null://zk1:2181;zk2:2181/ledgers" \ +> --web-service-url http://host1:8080,host2:8080,host3:8080 \ +> --web-service-url-tls https://host1:8443,host2:8443,host3:8443 \ +> --broker-service-url pulsar://host1:6650,host2:6650,host3:6650 \ +> --broker-service-url-tls pulsar+ssl://host1:6651,host2:6651,host3:6651 +> ``` +> You can obtain the metadata service URI of the existing BookKeeper cluster by using the `bin/bookkeeper shell whatisinstanceid` command. You must enclose the value in double quotes since the multiple metadata service URIs are separated with semicolons. + +## Deploy a BookKeeper cluster + +[BookKeeper](https://bookkeeper.apache.org) handles all persistent data storage in Pulsar. You need to deploy a cluster of BookKeeper bookies to use Pulsar. You can choose to run a **3-bookie BookKeeper cluster**. + +You can configure BookKeeper bookies using the [`conf/bookkeeper.conf`](reference-configuration.md#bookkeeper) configuration file. The most important step in configuring bookies for our purposes here is ensuring that [`zkServers`](reference-configuration.md#bookkeeper-zkServers) is set to the connection string for the ZooKeeper cluster. The following is an example: + +```properties +zkServers=zk1.us-west.example.com:2181,zk2.us-west.example.com:2181,zk3.us-west.example.com:2181 +``` + +Once you appropriately modify the `zkServers` parameter, you can make any other configuration changes that you require. You can find a full listing of the available BookKeeper configuration parameters [here](reference-configuration.md#bookkeeper). However, consulting the [BookKeeper documentation](http://bookkeeper.apache.org/docs/latest/reference/config/) for a more in-depth guide might be a better choice. + +Once you apply the desired configuration in `conf/bookkeeper.conf`, you can start up a bookie on each of your BookKeeper hosts. You can start up each bookie either in the background, using [nohup](https://en.wikipedia.org/wiki/Nohup), or in the foreground. + +To start the bookie in the background, use the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool: + +```bash +$ bin/pulsar-daemon start bookie +``` + +To start the bookie in the foreground: + +```bash +$ bin/pulsar bookie +``` + +You can verify that a bookie works properly by running the `bookiesanity` command on the [BookKeeper shell](reference-cli-tools.md#shell): + +```bash +$ bin/bookkeeper shell bookiesanity +``` + +This command creates an ephemeral BookKeeper ledger on the local bookie, writes a few entries, reads them back, and finally deletes the ledger. + +After you start all the bookies, you can use `simpletest` command for [BookKeeper shell](reference-cli-tools.md#shell) on any bookie node, to verify all the bookies in the cluster are up running. + +```bash +$ bin/bookkeeper shell simpletest --ensemble --writeQuorum --ackQuorum --numEntries +``` + +This command creates a `num-bookies` sized ledger on the cluster, writes a few entries, and finally deletes the ledger. + + +## Deploy Pulsar brokers + +Pulsar brokers are the last thing you need to deploy in your Pulsar cluster. Brokers handle Pulsar messages and provide the administrative interface of Pulsar. A good choice is to run **3 brokers**, one for each machine that already runs a BookKeeper bookie. + +### Configure Brokers + +The most important element of broker configuration is ensuring that each broker is aware of the ZooKeeper cluster that you have deployed. Ensure that the [`zookeeperServers`](reference-configuration.md#broker-zookeeperServers) and [`configurationStoreServers`](reference-configuration.md#broker-configurationStoreServers) parameters are correct. In this case, since you only have 1 cluster and no configuration store setup, the `configurationStoreServers` point to the same `zookeeperServers`. + +```properties +zookeeperServers=zk1.us-west.example.com:2181,zk2.us-west.example.com:2181,zk3.us-west.example.com:2181 +configurationStoreServers=zk1.us-west.example.com:2181,zk2.us-west.example.com:2181,zk3.us-west.example.com:2181 +``` + +You also need to specify the cluster name (matching the name that you provided when you [initialize the metadata of the cluster](#initialize-cluster-metadata)): + +```properties +clusterName=pulsar-cluster-1 +``` + +In addition, you need to match the broker and web service ports provided when you initialize the metadata of the cluster (especially when you use a different port than the default): + +```properties +brokerServicePort=6650 +brokerServicePortTls=6651 +webServicePort=8080 +webServicePortTls=8443 +``` + +> If you deploy Pulsar in a one-node cluster, you should update the replication settings in `conf/broker.conf` to `1`. +> +> ```properties +> # Number of bookies to use when creating a ledger +> managedLedgerDefaultEnsembleSize=1 +> +> # Number of copies to store for each message +> managedLedgerDefaultWriteQuorum=1 +> +> # Number of guaranteed copies (acks to wait before write is complete) +> managedLedgerDefaultAckQuorum=1 +> ``` + +### Enable Pulsar Functions (optional) + +If you want to enable [Pulsar Functions](functions-overview.md), you can follow the instructions as below: + +1. Edit `conf/broker.conf` to enable functions worker, by setting `functionsWorkerEnabled` to `true`. + + ```conf + functionsWorkerEnabled=true + ``` + +2. Edit `conf/functions_worker.yml` and set `pulsarFunctionsCluster` to the cluster name that you provide when you [initialize the metadata of the cluster](#initialize-cluster-metadata). + + ```conf + pulsarFunctionsCluster: pulsar-cluster-1 + ``` + +If you want to learn more options about deploying the functions worker, check out [Deploy and manage functions worker](functions-worker.md). + +### Start Brokers + +You can then provide any other configuration changes that you want in the [`conf/broker.conf`](reference-configuration.md#broker) file. Once you decide on a configuration, you can start up the brokers for your Pulsar cluster. Like ZooKeeper and BookKeeper, you can start brokers either in the foreground or in the background, using nohup. + +You can start a broker in the foreground using the [`pulsar broker`](reference-cli-tools.md#pulsar-broker) command: + +```bash +$ bin/pulsar broker +``` + +You can start a broker in the background using the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool: + +```bash +$ bin/pulsar-daemon start broker +``` + +Once you succesfully start up all the brokers that you intend to use, your Pulsar cluster should be ready to go! + +## Connect to the running cluster + +Once your Pulsar cluster is up and running, you should be able to connect with it using Pulsar clients. One such client is the [`pulsar-client`](reference-cli-tools.md#pulsar-client) tool, which is included with the Pulsar binary package. The `pulsar-client` tool can publish messages to and consume messages from Pulsar topics and thus provide a simple way to make sure that your cluster runs properly. + +To use the `pulsar-client` tool, first modify the client configuration file in [`conf/client.conf`](reference-configuration.md#client) in your binary package. You need to change the values for `webServiceUrl` and `brokerServiceUrl`, substituting `localhost` (which is the default), with the DNS name that you assign to your broker/bookie hosts. The following is an example: + +```properties +webServiceUrl=http://us-west.example.com:8080 +brokerServiceurl=pulsar://us-west.example.com:6650 +``` + +> If you do not have a DNS server, you can specify multi-host in service URL as follows: +> +> ```properties +> webServiceUrl=http://host1:8080,host2:8080,host3:8080 +> brokerServiceurl=pulsar://host1:6650,host2:6650,host3:6650 +> ``` + +Once that is complete, you can publish a message to the Pulsar topic: + +```bash +$ bin/pulsar-client produce \ + persistent://public/default/test \ + -n 1 \ + -m "Hello Pulsar" +``` + +> You may need to use a different cluster name in the topic if you specify a cluster name other than `pulsar-cluster-1`. + +This command publishes a single message to the Pulsar topic. In addition, you can subscribe to the Pulsar topic in a different terminal before publishing messages as below: + +```bash +$ bin/pulsar-client consume \ + persistent://public/default/test \ + -n 100 \ + -s "consumer-test" \ + -t "Exclusive" +``` + +Once you successfully publish the above message to the topic, you should see it in the standard output: + +```bash +----- got message ----- +Hello Pulsar +``` + +## Run Functions + +> If you have [enabled](#enable-pulsar-functions-optional) Pulsar Functions, you can try out the Pulsar Functions now. + +Create an ExclamationFunction `exclamation`. + +```bash +bin/pulsar-admin functions create \ + --jar examples/api-examples.jar \ + --classname org.apache.pulsar.functions.api.examples.ExclamationFunction \ + --inputs persistent://public/default/exclamation-input \ + --output persistent://public/default/exclamation-output \ + --tenant public \ + --namespace default \ + --name exclamation +``` + +Check whether the function runs as expected by [triggering](functions-deploying.md#triggering-pulsar-functions) the function. + +```bash +bin/pulsar-admin functions trigger --name exclamation --trigger-value "hello world" +``` + +You should see the following output: + +```shell +hello world! +``` diff --git a/site2/website/versioned_docs/version-2.7.0/deploy-docker.md b/site2/website/versioned_docs/version-2.7.0/deploy-docker.md new file mode 100644 index 00000000000000..439c7e77f5088e --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/deploy-docker.md @@ -0,0 +1,52 @@ +--- +id: version-2.7.0-deploy-docker +title: Deploy a cluster on Docker +sidebar_label: Docker +original_id: deploy-docker +--- + +To deploy a Pulsar cluster on Docker, complete the following steps: +1. Deploy a ZooKeeper cluster (optional) +2. Initialize cluster metadata +3. Deploy a BookKeeper cluster +4. Deploy one or more Pulsar brokers + +## Prepare + +To run Pulsar on Docker, you need to create a container for each Pulsar component: ZooKeeper, BookKeeper and broker. You can pull the images of ZooKeeper and BookKeeper separately on [Docker Hub](https://hub.docker.com/), and pull a [Pulsar image](https://hub.docker.com/r/apachepulsar/pulsar-all/tags) for the broker. You can also pull only one [Pulsar image](https://hub.docker.com/r/apachepulsar/pulsar-all/tags) and create three containers with this image. This tutorial takes the second option as an example. + +### Pull a Pulsar image +You can pull a Pulsar image from [Docker Hub](https://hub.docker.com/r/apachepulsar/pulsar-all/tags) with the following command. + +``` +docker pull apachepulsar/pulsar-all:latest +``` + +### Create three containers +Create containers for ZooKeeper, BookKeeper and broker. In this example, they are named as `zookeeper`, `bookkeeper` and `broker` respectively. You can name them as you want with the `--name` flag. By default, the container names are created randomly. + +``` +docker run -it --name bookkeeper apachepulsar/pulsar-all:latest /bin/bash +docker run -it --name zookeeper apachepulsar/pulsar-all:latest /bin/bash +docker run -it --name broker apachepulsar/pulsar-all:latest /bin/bash +``` + +### Create a network +To deploy a Pulsar cluster on Docker, you need to create a `network` and connect the containers of ZooKeeper, BookKeeper and broker to this network. The following command creates the network `pulsar`: + +``` +docker network create pulsar +``` + +### Connect containers to network +Connect the containers of ZooKeeper, BookKeeper and broker to the `pulsar` network with the following commands. + +``` +docker network connect pulsar zookeeper +docker network connect pulsar bookkeeper +docker network connect pulsar broker +``` + +To check whether the containers are successfully connected to the network, enter the `docker network inspect pulsar` command. + +For detailed information about how to deploy ZooKeeper cluster, BookKeeper cluster, brokers, see [deploy a cluster on bare metal](deploy-bare-metal.md). diff --git a/site2/website/versioned_docs/version-2.7.0/deploy-monitoring.md b/site2/website/versioned_docs/version-2.7.0/deploy-monitoring.md new file mode 100644 index 00000000000000..7724157f0b6973 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/deploy-monitoring.md @@ -0,0 +1,95 @@ +--- +id: version-2.7.0-deploy-monitoring +title: Monitor +sidebar_label: Monitor +original_id: deploy-monitoring +--- + +You can use different ways to monitor a Pulsar cluster, exposing both metrics related to the usage of topics and the overall health of the individual components of the cluster. + +## Collect metrics + +You can collect broker stats, ZooKeeper stats, and BookKeeper stats. + +### Broker stats + +You can collect Pulsar broker metrics from brokers and export the metrics in JSON format. The Pulsar broker metrics mainly have two types: + +* *Destination dumps*, which contain stats for each individual topic. You can fetch the destination dumps using the command below: + + ```shell + bin/pulsar-admin broker-stats destinations + ``` + +* Broker metrics, which contain the broker information and topics stats aggregated at namespace level. You can fetch the broker metrics by using the following command: + + ```shell + bin/pulsar-admin broker-stats monitoring-metrics + ``` + +All the message rates are updated every minute. + +The aggregated broker metrics are also exposed in the [Prometheus](https://prometheus.io) format at: + +```shell +http://$BROKER_ADDRESS:8080/metrics +``` + +### ZooKeeper stats + +The local ZooKeeper, configuration store server and clients that are shipped with Pulsar can expose detailed stats through Prometheus. + +```shell +http://$LOCAL_ZK_SERVER:8000/metrics +http://$GLOBAL_ZK_SERVER:8001/metrics +``` + +The default port of local ZooKeeper is `8000` and the default port of configuration store is `8001`. You can change the default port of local ZooKeeper and configuration store by specifying system property `stats_server_port`. + +### BookKeeper stats + +You can configure the stats frameworks for BookKeeper by modifying the `statsProviderClass` in the `conf/bookkeeper.conf` file. + +The default BookKeeper configuration enables the Prometheus exporter. The configuration is included with Pulsar distribution. + +```shell +http://$BOOKIE_ADDRESS:8000/metrics +``` + +The default port for bookie is `8000`. You can change the port by configuring `prometheusStatsHttpPort` in the `conf/bookkeeper.conf` file. + +## Configure Prometheus + +You can use Prometheus to collect all the metrics exposed for Pulsar components and set up [Grafana](https://grafana.com/) dashboards to display the metrics and monitor your Pulsar cluster. For details, refer to [Prometheus guide](https://prometheus.io/docs/introduction/getting_started/). + +When you run Pulsar on bare metal, you can provide the list of nodes to be probed. When you deploy Pulsar in a Kubernetes cluster, the monitoring is setup automatically. For details, refer to [Kubernetes instructions](kubernetes-helm.md). + +## Dashboards + +When you collect time series statistics, the major problem is to make sure the number of dimensions attached to the data does not explode. Thus you only need to collect time series of metrics aggregated at the namespace level. + +### Pulsar per-topic dashboard + +The per-topic dashboard instructions are available at [Pulsar manager](administration-pulsar-manager.md). + +### Grafana + +You can use grafana to create dashboard driven by the data that is stored in Prometheus. + +When you deploy Pulsar on Kubernetes, a `pulsar-grafana` Docker image is enabled by default. You can use the docker image with the principal dashboards. + +Enter the command below to use the dashboard manually: + +```shell +docker run -p3000:3000 \ + -e PROMETHEUS_URL=http://$PROMETHEUS_HOST:9090/ \ + apachepulsar/pulsar-grafana:latest +``` + +The following are some Grafana dashboards examples: + +- [pulsar-grafana](http://pulsar.apache.org/docs/en/deploy-monitoring/#grafana): a Grafana dashboard that displays metrics collected in Prometheus for Pulsar clusters running on Kubernetes. +- [apache-pulsar-grafana-dashboard](https://github.com/streamnative/apache-pulsar-grafana-dashboard): a collection of Grafana dashboard templates for different Pulsar components running on both Kubernetes and on-premise machines. + + ## Alerting rules + You can set alerting rules according to your Pulsar environment. To configure alerting rules for Apache Pulsar, you can refer to [StreamNative platform](https://streamnative.io/docs/latest/configure/control-center/alertmanager) examples or [Alert Manager](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) alerting rules. \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/developing-binary-protocol.md b/site2/website/versioned_docs/version-2.7.0/developing-binary-protocol.md new file mode 100644 index 00000000000000..58885640e6410c --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/developing-binary-protocol.md @@ -0,0 +1,556 @@ +--- +id: version-2.7.0-develop-binary-protocol +title: Pulsar binary protocol specification +sidebar_label: Binary protocol +original_id: develop-binary-protocol +--- + +Pulsar uses a custom binary protocol for communications between producers/consumers and brokers. This protocol is designed to support required features, such as acknowledgements and flow control, while ensuring maximum transport and implementation efficiency. + +Clients and brokers exchange *commands* with each other. Commands are formatted as binary [protocol buffer](https://developers.google.com/protocol-buffers/) (aka *protobuf*) messages. The format of protobuf commands is specified in the [`PulsarApi.proto`](https://github.com/apache/pulsar/blob/master/pulsar-common/src/main/proto/PulsarApi.proto) file and also documented in the [Protobuf interface](#protobuf-interface) section below. + +> ### Connection sharing +> Commands for different producers and consumers can be interleaved and sent through the same connection without restriction. + +All commands associated with Pulsar's protocol are contained in a +[`BaseCommand`](#pulsar.proto.BaseCommand) protobuf message that includes a [`Type`](#pulsar.proto.Type) [enum](https://developers.google.com/protocol-buffers/docs/proto#enum) with all possible subcommands as optional fields. `BaseCommand` messages can specify only one subcommand. + +## Framing + +Since protobuf doesn't provide any sort of message frame, all messages in the Pulsar protocol are prepended with a 4-byte field that specifies the size of the frame. The maximum allowable size of a single frame is 5 MB. + +The Pulsar protocol allows for two types of commands: + +1. **Simple commands** that do not carry a message payload. +2. **Payload commands** that bear a payload that is used when publishing or delivering messages. In payload commands, the protobuf command data is followed by protobuf [metadata](#message-metadata) and then the payload, which is passed in raw format outside of protobuf. All sizes are passed as 4-byte unsigned big endian integers. + +> Message payloads are passed in raw format rather than protobuf format for efficiency reasons. + +### Simple commands + +Simple (payload-free) commands have this basic structure: + +| Component | Description | Size (in bytes) | +|:------------|:----------------------------------------------------------------------------------------|:----------------| +| totalSize | The size of the frame, counting everything that comes after it (in bytes) | 4 | +| commandSize | The size of the protobuf-serialized command | 4 | +| message | The protobuf message serialized in a raw binary format (rather than in protobuf format) | | + +### Payload commands + +Payload commands have this basic structure: + +| Component | Description | Size (in bytes) | +|:-------------|:--------------------------------------------------------------------------------------------|:----------------| +| totalSize | The size of the frame, counting everything that comes after it (in bytes) | 4 | +| commandSize | The size of the protobuf-serialized command | 4 | +| message | The protobuf message serialized in a raw binary format (rather than in protobuf format) | | +| magicNumber | A 2-byte byte array (`0x0e01`) identifying the current format | 2 | +| checksum | A [CRC32-C checksum](http://www.evanjones.ca/crc32c.html) of everything that comes after it | 4 | +| metadataSize | The size of the message [metadata](#message-metadata) | 4 | +| metadata | The message [metadata](#message-metadata) stored as a binary protobuf message | | +| payload | Anything left in the frame is considered the payload and can include any sequence of bytes | | + +## Message metadata + +Message metadata is stored alongside the application-specified payload as a serialized protobuf message. Metadata is created by the producer and passed on unchanged to the consumer. + +| Field | Description | +|:-------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `producer_name` | The name of the producer that published the message | +| `sequence_id` | The sequence ID of the message, assigned by producer | +| `publish_time` | The publish timestamp in Unix time (i.e. as the number of milliseconds since January 1st, 1970 in UTC) | +| `properties` | A sequence of key/value pairs (using the [`KeyValue`](https://github.com/apache/pulsar/blob/master/pulsar-common/src/main/proto/PulsarApi.proto#L32) message). These are application-defined keys and values with no special meaning to Pulsar. | +| `replicated_from` *(optional)* | Indicates that the message has been replicated and specifies the name of the [cluster](reference-terminology.md#cluster) where the message was originally published | +| `partition_key` *(optional)* | While publishing on a partition topic, if the key is present, the hash of the key is used to determine which partition to choose | +| `compression` *(optional)* | Signals that payload has been compressed and with which compression library | +| `uncompressed_size` *(optional)* | If compression is used, the producer must fill the uncompressed size field with the original payload size | +| `num_messages_in_batch` *(optional)* | If this message is really a [batch](#batch-messages) of multiple entries, this field must be set to the number of messages in the batch | + +### Batch messages + +When using batch messages, the payload will be containing a list of entries, +each of them with its individual metadata, defined by the `SingleMessageMetadata` +object. + + +For a single batch, the payload format will look like this: + + +| Field | Description | +|:--------------|:------------------------------------------------------------| +| metadataSizeN | The size of the single message metadata serialized Protobuf | +| metadataN | Single message metadata | +| payloadN | Message payload passed by application | + +Each metadata field looks like this; + +| Field | Description | +|:---------------------------|:--------------------------------------------------------| +| properties | Application-defined properties | +| partition key *(optional)* | Key to indicate the hashing to a particular partition | +| payload_size | Size of the payload for the single message in the batch | + +When compression is enabled, the whole batch will be compressed at once. + +## Interactions + +### Connection establishment + +After opening a TCP connection to a broker, typically on port 6650, the client +is responsible to initiate the session. + +![Connect interaction](assets/binary-protocol-connect.png) + +After receiving a `Connected` response from the broker, the client can +consider the connection ready to use. Alternatively, if the broker doesn't +validate the client authentication, it will reply with an `Error` command and +close the TCP connection. + +Example: + +```protobuf +message CommandConnect { + "client_version" : "Pulsar-Client-Java-v1.15.2", + "auth_method_name" : "my-authentication-plugin", + "auth_data" : "my-auth-data", + "protocol_version" : 6 +} +``` + +Fields: + * `client_version` → String based identifier. Format is not enforced + * `auth_method_name` → *(optional)* Name of the authentication plugin if auth + enabled + * `auth_data` → *(optional)* Plugin specific authentication data + * `protocol_version` → Indicates the protocol version supported by the + client. Broker will not send commands introduced in newer revisions of the + protocol. Broker might be enforcing a minimum version + +```protobuf +message CommandConnected { + "server_version" : "Pulsar-Broker-v1.15.2", + "protocol_version" : 6 +} +``` + +Fields: + * `server_version` → String identifier of broker version + * `protocol_version` → Protocol version supported by the broker. Client + must not attempt to send commands introduced in newer revisions of the + protocol + +### Keep Alive + +To identify prolonged network partitions between clients and brokers or cases +in which a machine crashes without interrupting the TCP connection on the remote +end (eg: power outage, kernel panic, hard reboot...), we have introduced a +mechanism to probe for the availability status of the remote peer. + +Both clients and brokers are sending `Ping` commands periodically and they will +close the socket if a `Pong` response is not received within a timeout (default +used by broker is 60s). + +A valid implementation of a Pulsar client is not required to send the `Ping` +probe, though it is required to promptly reply after receiving one from the +broker in order to prevent the remote side from forcibly closing the TCP connection. + + +### Producer + +In order to send messages, a client needs to establish a producer. When creating +a producer, the broker will first verify that this particular client is +authorized to publish on the topic. + +Once the client gets confirmation of the producer creation, it can publish +messages to the broker, referring to the producer id negotiated before. + +![Producer interaction](assets/binary-protocol-producer.png) + +##### Command Producer + +```protobuf +message CommandProducer { + "topic" : "persistent://my-property/my-cluster/my-namespace/my-topic", + "producer_id" : 1, + "request_id" : 1 +} +``` + +Parameters: + * `topic` → Complete topic name to where you want to create the producer on + * `producer_id` → Client generated producer identifier. Needs to be unique + within the same connection + * `request_id` → Identifier for this request. Used to match the response with + the originating request. Needs to be unique within the same connection + * `producer_name` → *(optional)* If a producer name is specified, the name will + be used, otherwise the broker will generate a unique name. Generated + producer name is guaranteed to be globally unique. Implementations are + expected to let the broker generate a new producer name when the producer + is initially created, then reuse it when recreating the producer after + reconnections. + +The broker will reply with either `ProducerSuccess` or `Error` commands. + +##### Command ProducerSuccess + +```protobuf +message CommandProducerSuccess { + "request_id" : 1, + "producer_name" : "generated-unique-producer-name" +} +``` + +Parameters: + * `request_id` → Original id of the `CreateProducer` request + * `producer_name` → Generated globally unique producer name or the name + specified by the client, if any. + +##### Command Send + +Command `Send` is used to publish a new message within the context of an +already existing producer. This command is used in a frame that includes command +as well as message payload, for which the complete format is specified in the +[payload commands](#payload-commands) section. + +```protobuf +message CommandSend { + "producer_id" : 1, + "sequence_id" : 0, + "num_messages" : 1 +} +``` + +Parameters: + * `producer_id` → id of an existing producer + * `sequence_id` → each message has an associated sequence id which is expected + to be implemented with a counter starting at 0. The `SendReceipt` that + acknowledges the effective publishing of a messages will refer to it by + its sequence id. + * `num_messages` → *(optional)* Used when publishing a batch of messages at + once. + +##### Command SendReceipt + +After a message has been persisted on the configured number of replicas, the +broker will send the acknowledgment receipt to the producer. + + +```protobuf +message CommandSendReceipt { + "producer_id" : 1, + "sequence_id" : 0, + "message_id" : { + "ledgerId" : 123, + "entryId" : 456 + } +} +``` + +Parameters: + * `producer_id` → id of producer originating the send request + * `sequence_id` → sequence id of the published message + * `message_id` → message id assigned by the system to the published message + Unique within a single cluster. Message id is composed of 2 longs, `ledgerId` + and `entryId`, that reflect that this unique id is assigned when appending + to a BookKeeper ledger + + +##### Command CloseProducer + +**Note**: *This command can be sent by either producer or broker*. + +When receiving a `CloseProducer` command, the broker will stop accepting any +more messages for the producer, wait until all pending messages are persisted +and then reply `Success` to the client. + +The broker can send a `CloseProducer` command to client when it's performing +a graceful failover (eg: broker is being restarted, or the topic is being unloaded +by load balancer to be transferred to a different broker). + +When receiving the `CloseProducer`, the client is expected to go through the +service discovery lookup again and recreate the producer again. The TCP +connection is not affected. + +### Consumer + +A consumer is used to attach to a subscription and consume messages from it. +After every reconnection, a client needs to subscribe to the topic. If a +subscription is not already there, a new one will be created. + +![Consumer](assets/binary-protocol-consumer.png) + +#### Flow control + +After the consumer is ready, the client needs to *give permission* to the +broker to push messages. This is done with the `Flow` command. + +A `Flow` command gives additional *permits* to send messages to the consumer. +A typical consumer implementation will use a queue to accumulate these messages +before the application is ready to consume them. + +After the application has dequeued half of the messages in the queue, the consumer +sends permits to the broker to ask for more messages (equals to half of the messages in the queue). + +For example, if the queue size is 1000 and the consumer consumes 500 messages in the queue. +Then the consumer sends permits to the broker to ask for 500 messages. + +##### Command Subscribe + +```protobuf +message CommandSubscribe { + "topic" : "persistent://my-property/my-cluster/my-namespace/my-topic", + "subscription" : "my-subscription-name", + "subType" : "Exclusive", + "consumer_id" : 1, + "request_id" : 1 +} +``` + +Parameters: + * `topic` → Complete topic name to where you want to create the consumer on + * `subscription` → Subscription name + * `subType` → Subscription type: Exclusive, Shared, Failover + * `consumer_id` → Client generated consumer identifier. Needs to be unique + within the same connection + * `request_id` → Identifier for this request. Used to match the response with + the originating request. Needs to be unique within the same connection + * `consumer_name` → *(optional)* Clients can specify a consumer name. This + name can be used to track a particular consumer in the stats. Also, in + Failover subscription type, the name is used to decide which consumer is + elected as *master* (the one receiving messages): consumers are sorted by + their consumer name and the first one is elected master. + +##### Command Flow + +```protobuf +message CommandFlow { + "consumer_id" : 1, + "messagePermits" : 1000 +} +``` + +Parameters: +* `consumer_id` → Id of an already established consumer +* `messagePermits` → Number of additional permits to grant to the broker for + pushing more messages + +##### Command Message + +Command `Message` is used by the broker to push messages to an existing consumer, +within the limits of the given permits. + + +This command is used in a frame that includes the message payload as well, for +which the complete format is specified in the [payload commands](#payload-commands) +section. + +```protobuf +message CommandMessage { + "consumer_id" : 1, + "message_id" : { + "ledgerId" : 123, + "entryId" : 456 + } +} +``` + + +##### Command Ack + +An `Ack` is used to signal to the broker that a given message has been +successfully processed by the application and can be discarded by the broker. + +In addition, the broker will also maintain the consumer position based on the +acknowledged messages. + +```protobuf +message CommandAck { + "consumer_id" : 1, + "ack_type" : "Individual", + "message_id" : { + "ledgerId" : 123, + "entryId" : 456 + } +} +``` + +Parameters: + * `consumer_id` → Id of an already established consumer + * `ack_type` → Type of acknowledgment: `Individual` or `Cumulative` + * `message_id` → Id of the message to acknowledge + * `validation_error` → *(optional)* Indicates that the consumer has discarded + the messages due to: `UncompressedSizeCorruption`, + `DecompressionError`, `ChecksumMismatch`, `BatchDeSerializeError` + +##### Command CloseConsumer + +***Note***: *This command can be sent by either producer or broker*. + +This command behaves the same as [`CloseProducer`](#command-closeproducer) + +##### Command RedeliverUnacknowledgedMessages + +A consumer can ask the broker to redeliver some or all of the pending messages +that were pushed to that particular consumer and not yet acknowledged. + +The protobuf object accepts a list of message ids that the consumer wants to +be redelivered. If the list is empty, the broker will redeliver all the +pending messages. + +On redelivery, messages can be sent to the same consumer or, in the case of a +shared subscription, spread across all available consumers. + + +##### Command ReachedEndOfTopic + +This is sent by a broker to a particular consumer, whenever the topic +has been "terminated" and all the messages on the subscription were +acknowledged. + +The client should use this command to notify the application that no more +messages are coming from the consumer. + +##### Command ConsumerStats + +This command is sent by the client to retreive Subscriber and Consumer level +stats from the broker. +Parameters: + * `request_id` → Id of the request, used to correlate the request + and the response. + * `consumer_id` → Id of an already established consumer. + +##### Command ConsumerStatsResponse + +This is the broker's response to ConsumerStats request by the client. +It contains the Subscriber and Consumer level stats of the `consumer_id` sent in the request. +If the `error_code` or the `error_message` field is set it indicates that the request has failed. + +##### Command Unsubscribe + +This command is sent by the client to unsubscribe the `consumer_id` from the associated topic. +Parameters: + * `request_id` → Id of the request. + * `consumer_id` → Id of an already established consumer which needs to unsubscribe. + + +## Service discovery + +### Topic lookup + +Topic lookup needs to be performed each time a client needs to create or +reconnect a producer or a consumer. Lookup is used to discover which particular +broker is serving the topic we are about to use. + +Lookup can be done with a REST call as described in the +[admin API](admin-api-topics.md#lookup-of-topic) +docs. + +Since Pulsar-1.16 it is also possible to perform the lookup within the binary +protocol. + +For the sake of example, let's assume we have a service discovery component +running at `pulsar://broker.example.com:6650` + +Individual brokers will be running at `pulsar://broker-1.example.com:6650`, +`pulsar://broker-2.example.com:6650`, ... + +A client can use a connection to the discovery service host to issue a +`LookupTopic` command. The response can either be a broker hostname to +connect to, or a broker hostname to which retry the lookup. + +The `LookupTopic` command has to be used in a connection that has already +gone through the `Connect` / `Connected` initial handshake. + +![Topic lookup](assets/binary-protocol-topic-lookup.png) + +```protobuf +message CommandLookupTopic { + "topic" : "persistent://my-property/my-cluster/my-namespace/my-topic", + "request_id" : 1, + "authoritative" : false +} +``` + +Fields: + * `topic` → Topic name to lookup + * `request_id` → Id of the request that will be passed with its response + * `authoritative` → Initial lookup request should use false. When following a + redirect response, client should pass the same value contained in the + response + +##### LookupTopicResponse + +Example of response with successful lookup: + +```protobuf +message CommandLookupTopicResponse { + "request_id" : 1, + "response" : "Connect", + "brokerServiceUrl" : "pulsar://broker-1.example.com:6650", + "brokerServiceUrlTls" : "pulsar+ssl://broker-1.example.com:6651", + "authoritative" : true +} +``` + +Example of lookup response with redirection: + +```protobuf +message CommandLookupTopicResponse { + "request_id" : 1, + "response" : "Redirect", + "brokerServiceUrl" : "pulsar://broker-2.example.com:6650", + "brokerServiceUrlTls" : "pulsar+ssl://broker-2.example.com:6651", + "authoritative" : true +} +``` + +In this second case, we need to reissue the `LookupTopic` command request +to `broker-2.example.com` and this broker will be able to give a definitive +answer to the lookup request. + +### Partitioned topics discovery + +Partitioned topics metadata discovery is used to find out if a topic is a +"partitioned topic" and how many partitions were set up. + +If the topic is marked as "partitioned", the client is expected to create +multiple producers or consumers, one for each partition, using the `partition-X` +suffix. + +This information only needs to be retrieved the first time a producer or +consumer is created. There is no need to do this after reconnections. + +The discovery of partitioned topics metadata works very similar to the topic +lookup. The client send a request to the service discovery address and the +response will contain actual metadata. + +##### Command PartitionedTopicMetadata + +```protobuf +message CommandPartitionedTopicMetadata { + "topic" : "persistent://my-property/my-cluster/my-namespace/my-topic", + "request_id" : 1 +} +``` + +Fields: + * `topic` → the topic for which to check the partitions metadata + * `request_id` → Id of the request that will be passed with its response + + +##### Command PartitionedTopicMetadataResponse + +Example of response with metadata: + +```protobuf +message CommandPartitionedTopicMetadataResponse { + "request_id" : 1, + "response" : "Success", + "partitions" : 32 +} +``` + +## Protobuf interface + +All Pulsar's Protobuf definitions can be found {@inject: github:here:/pulsar-common/src/main/proto/PulsarApi.proto}. diff --git a/site2/website/versioned_docs/version-2.7.0/functions-develop.md b/site2/website/versioned_docs/version-2.7.0/functions-develop.md new file mode 100644 index 00000000000000..21b071239fe71a --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/functions-develop.md @@ -0,0 +1,1084 @@ +--- +id: version-2.7.0-functions-develop +title: Develop Pulsar Functions +sidebar_label: How-to: Develop +original_id: functions-develop +--- + +You learn how to develop Pulsar Functions with different APIs for Java, Python and Go. + +## Available APIs +In Java and Python, you have two options to write Pulsar Functions. In Go, you can use Pulsar Functions SDK for Go. + +Interface | Description | Use cases +:---------|:------------|:--------- +Language-native interface | No Pulsar-specific libraries or special dependencies required (only core libraries from Java/Python). | Functions that do not require access to the function [context](#context). +Pulsar Function SDK for Java/Python/Go | Pulsar-specific libraries that provide a range of functionality not provided by "native" interfaces. | Functions that require access to the function [context](#context). + +The language-native function, which adds an exclamation point to all incoming strings and publishes the resulting string to a topic, has no external dependencies. The following example is language-native function. + + + +```Java +import java.util.function.Function; + +public class JavaNativeExclamationFunction implements Function { + @Override + public String apply(String input) { + return String.format("%s!", input); + } +} +``` +For complete code, see [here](https://github.com/apache/pulsar/blob/master/pulsar-functions/java-examples/src/main/java/org/apache/pulsar/functions/api/examples/JavaNativeExclamationFunction.java). + + +```python +def process(input): + return "{}!".format(input) +``` +For complete code, see [here](https://github.com/apache/pulsar/blob/master/pulsar-functions/python-examples/native_exclamation_function.py). + +> Note +> You can write Pulsar Functions in python2 or python3. However, Pulsar only looks for `python` as the interpreter. +> +> If you're running Pulsar Functions on an Ubuntu system that only supports python3, you might fail to +> start the functions. In this case, you can create a symlink. Your system will fail if +> you subsequently install any other package that depends on Python 2.x. A solution is under development in [Issue 5518](https://github.com/apache/pulsar/issues/5518). +> +> ```bash +> sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 10 +> ``` + + + +The following example uses Pulsar Functions SDK. + + +```Java +import org.apache.pulsar.functions.api.Context; +import org.apache.pulsar.functions.api.Function; + +public class ExclamationFunction implements Function { + @Override + public String process(String input, Context context) { + return String.format("%s!", input); + } +} +``` +For complete code, see [here](https://github.com/apache/pulsar/blob/master/pulsar-functions/java-examples/src/main/java/org/apache/pulsar/functions/api/examples/ExclamationFunction.java). + + +```python +from pulsar import Function + +class ExclamationFunction(Function): + def __init__(self): + pass + + def process(self, input, context): + return input + '!' +``` +For complete code, see [here](https://github.com/apache/pulsar/blob/master/pulsar-functions/python-examples/exclamation_function.py). + + +```Go +package main + +import ( + "context" + "fmt" + + "github.com/apache/pulsar/pulsar-function-go/pf" +) + +func HandleRequest(ctx context.Context, in []byte) error{ + fmt.Println(string(in) + "!") + return nil +} + +func main() { + pf.Start(HandleRequest) +} +``` +For complete code, see [here](https://github.com/apache/pulsar/blob/master/pulsar-function-go/examples/inputFunc.go#L20-L36). + + + +## Schema registry +Pulsar has a built-in schema registry and is bundled with popular schema types, such as Avro, JSON and Protobuf. Pulsar Functions can leverage the existing schema information from input topics and derive the input type. The schema registry applies for output topic as well. + +## SerDe +SerDe stands for **Ser**ialization and **De**serialization. Pulsar Functions uses SerDe when publishing data to and consuming data from Pulsar topics. How SerDe works by default depends on the language you use for a particular function. + + + +When you write Pulsar Functions in Java, the following basic Java types are built in and supported by default: `String`, `Double`, `Integer`, `Float`, `Long`, `Short`, and `Byte`. + +To customize Java types, you need to implement the following interface. + +```java +public interface SerDe { + T deserialize(byte[] input); + byte[] serialize(T input); +} +``` +SerDe works in the following ways in Java Functions. +- If the input and output topics have schema, Pulsar Functions use schema for SerDe. +- If the input or output topics do not exist, Pulsar Functions adopt the following rules to determine SerDe: + - If the schema type is specified, Pulsar Functions use the specified schema type. + - If SerDe is specified, Pulsar Functions use the specified SerDe, and the schema type for input and output topics is `Byte`. + - If neither the schema type nor SerDe is specified, Pulsar Functions use the built-in SerDe. For non-primitive schema type, the built-in SerDe serializes and deserializes objects in the `JSON` format. + + +In Python, the default SerDe is identity, meaning that the type is serialized as whatever type the producer function returns. + +You can specify the SerDe when [creating](functions-deploy.md#cluster-mode) or [running](functions-deploy.md#local-run-mode) functions. + +```bash +$ bin/pulsar-admin functions create \ + --tenant public \ + --namespace default \ + --name my_function \ + --py my_function.py \ + --classname my_function.MyFunction \ + --custom-serde-inputs '{"input-topic-1":"Serde1","input-topic-2":"Serde2"}' \ + --output-serde-classname Serde3 \ + --output output-topic-1 +``` + +This case contains two input topics: `input-topic-1` and `input-topic-2`, each of which is mapped to a different SerDe class (the map must be specified as a JSON string). The output topic, `output-topic-1`, uses the `Serde3` class for SerDe. At the moment, all Pulsar Functions logic, include processing function and SerDe classes, must be contained within a single Python file. + +When using Pulsar Functions for Python, you have three SerDe options: + +1. You can use the [`IdentitySerde`](https://github.com/apache/pulsar/blob/master/pulsar-client-cpp/python/pulsar/functions/serde.py#L70), which leaves the data unchanged. The `IdentitySerDe` is the **default**. Creating or running a function without explicitly specifying SerDe means that this option is used. +2. You can use the [`PickleSerDe`](https://github.com/apache/pulsar/blob/master/pulsar-client-cpp/python/pulsar/functions/serde.py#L62), which uses Python [`pickle`](https://docs.python.org/3/library/pickle.html) for SerDe. +3. You can create a custom SerDe class by implementing the baseline [`SerDe`](https://github.com/apache/pulsar/blob/master/pulsar-client-cpp/python/pulsar/functions/serde.py#L50) class, which has just two methods: [`serialize`](https://github.com/apache/pulsar/blob/master/pulsar-client-cpp/python/pulsar/functions/serde.py#L53) for converting the object into bytes, and [`deserialize`](https://github.com/apache/pulsar/blob/master/pulsar-client-cpp/python/pulsar/functions/serde.py#L58) for converting bytes into an object of the required application-specific type. + +The table below shows when you should use each SerDe. + +SerDe option | When to use +:------------|:----------- +`IdentitySerde` | When you work with simple types like strings, Booleans, integers. +`PickleSerDe` | When you work with complex, application-specific types and are comfortable with the "best effort" approach of `pickle`. +Custom SerDe | When you require explicit control over SerDe, potentially for performance or data compatibility purposes. + + +Currently, the feature is not available in Go. + + + +### Example +Imagine that you're writing Pulsar Functions that are processing tweet objects, you can refer to the following example of `Tweet` class. + + + + +```java +public class Tweet { + private String username; + private String tweetContent; + + public Tweet(String username, String tweetContent) { + this.username = username; + this.tweetContent = tweetContent; + } + + // Standard setters and getters +} +``` + +To pass `Tweet` objects directly between Pulsar Functions, you need to provide a custom SerDe class. In the example below, `Tweet` objects are basically strings in which the username and tweet content are separated by a `|`. + +```java +package com.example.serde; + +import org.apache.pulsar.functions.api.SerDe; + +import java.util.regex.Pattern; + +public class TweetSerde implements SerDe { + public Tweet deserialize(byte[] input) { + String s = new String(input); + String[] fields = s.split(Pattern.quote("|")); + return new Tweet(fields[0], fields[1]); + } + + public byte[] serialize(Tweet input) { + return "%s|%s".format(input.getUsername(), input.getTweetContent()).getBytes(); + } +} +``` + +To apply this customized SerDe to a particular Pulsar Function, you need to: + +* Package the `Tweet` and `TweetSerde` classes into a JAR. +* Specify a path to the JAR and SerDe class name when deploying the function. + +The following is an example of [`create`](reference-pulsar-admin.md#create-1) operation. + +```bash +$ bin/pulsar-admin functions create \ + --jar /path/to/your.jar \ + --output-serde-classname com.example.serde.TweetSerde \ + # Other function attributes +``` + +> #### Custom SerDe classes must be packaged with your function JARs +> Pulsar does not store your custom SerDe classes separately from your Pulsar Functions. So you need to include your SerDe classes in your function JARs. If not, Pulsar returns an error. + + + +```python +class Tweet(object): + def __init__(self, username, tweet_content): + self.username = username + self.tweet_content = tweet_content +``` + +In order to use this class in Pulsar Functions, you have two options: + +1. You can specify `PickleSerDe`, which applies the [`pickle`](https://docs.python.org/3/library/pickle.html) library SerDe. +2. You can create your own SerDe class. The following is an example. + + ```python +from pulsar import SerDe + +class TweetSerDe(SerDe): + + def serialize(self, input): + return bytes("{0}|{1}".format(input.username, input.tweet_content)) + + def deserialize(self, input_bytes): + tweet_components = str(input_bytes).split('|') + return Tweet(tweet_components[0], tweet_componentsp[1]) + ``` +For complete code, see [here](https://github.com/apache/pulsar/blob/master/pulsar-functions/python-examples/custom_object_function.py). + + + +In both languages, however, you can write custom SerDe logic for more complex, application-specific types. + +## Context +Java, Python and Go SDKs provide access to a **context object** that can be used by a function. This context object provides a wide variety of information and functionality to the function. + +* The name and ID of a Pulsar Function. +* The message ID of each message. Each Pulsar message is automatically assigned with an ID. +* The key, event time, properties and partition key of each message. +* The name of the topic to which the message is sent. +* The names of all input topics as well as the output topic associated with the function. +* The name of the class used for [SerDe](#serde). +* The [tenant](reference-terminology.md#tenant) and namespace associated with the function. +* The ID of the Pulsar Functions instance running the function. +* The version of the function. +* The [logger object](functions-develop.md#logger) used by the function, which can be used to create function log messages. +* Access to arbitrary [user configuration](#user-config) values supplied via the CLI. +* An interface for recording [metrics](#metrics). +* An interface for storing and retrieving state in [state storage](#state-storage). +* A function to publish new messages onto arbitrary topics. +* A function to ack the message being processed (if auto-ack is disabled). + + + +The [Context](https://github.com/apache/pulsar/blob/master/pulsar-functions/api-java/src/main/java/org/apache/pulsar/functions/api/Context.java) interface provides a number of methods that you can use to access the function [context](#context). The various method signatures for the `Context` interface are listed as follows. + +```java +public interface Context { + Record getCurrentRecord(); + Collection getInputTopics(); + String getOutputTopic(); + String getOutputSchemaType(); + String getTenant(); + String getNamespace(); + String getFunctionName(); + String getFunctionId(); + String getInstanceId(); + String getFunctionVersion(); + Logger getLogger(); + void incrCounter(String key, long amount); + void incrCounterAsync(String key, long amount); + long getCounter(String key); + long getCounterAsync(String key); + void putState(String key, ByteBuffer value); + void putStateAsync(String key, ByteBuffer value); + void deleteState(String key); + ByteBuffer getState(String key); + ByteBuffer getStateAsync(String key); + Map getUserConfigMap(); + Optional getUserConfigValue(String key); + Object getUserConfigValueOrDefault(String key, Object defaultValue); + void recordMetric(String metricName, double value); + CompletableFuture publish(String topicName, O object, String schemaOrSerdeClassName); + CompletableFuture publish(String topicName, O object); + TypedMessageBuilder newOutputMessage(String topicName, Schema schema) throws PulsarClientException; + ConsumerBuilder newConsumerBuilder(Schema schema) throws PulsarClientException; +} +``` + +The following example uses several methods available via the `Context` object. + +```java +import org.apache.pulsar.functions.api.Context; +import org.apache.pulsar.functions.api.Function; +import org.slf4j.Logger; + +import java.util.stream.Collectors; + +public class ContextFunction implements Function { + public Void process(String input, Context context) { + Logger LOG = context.getLogger(); + String inputTopics = context.getInputTopics().stream().collect(Collectors.joining(", ")); + String functionName = context.getFunctionName(); + + String logMessage = String.format("A message with a value of \"%s\" has arrived on one of the following topics: %s\n", + input, + inputTopics); + + LOG.info(logMessage); + + String metricName = String.format("function-%s-messages-received", functionName); + context.recordMetric(metricName, 1); + + return null; + } +} +``` + + +``` +class ContextImpl(pulsar.Context): + def get_message_id(self): + ... + def get_message_key(self): + ... + def get_message_eventtime(self): + ... + def get_message_properties(self): + ... + def get_current_message_topic_name(self): + ... + def get_partition_key(self): + ... + def get_function_name(self): + ... + def get_function_tenant(self): + ... + def get_function_namespace(self): + ... + def get_function_id(self): + ... + def get_instance_id(self): + ... + def get_function_version(self): + ... + def get_logger(self): + ... + def get_user_config_value(self, key): + ... + def get_user_config_map(self): + ... + def record_metric(self, metric_name, metric_value): + ... + def get_input_topics(self): + ... + def get_output_topic(self): + ... + def get_output_serde_class_name(self): + ... + def publish(self, topic_name, message, serde_class_name="serde.IdentitySerDe", + properties=None, compression_type=None, callback=None, message_conf=None): + ... + def ack(self, msgid, topic): + ... + def get_and_reset_metrics(self): + ... + def reset_metrics(self): + ... + def get_metrics(self): + ... + def incr_counter(self, key, amount): + ... + def get_counter(self, key): + ... + def del_counter(self, key): + ... + def put_state(self, key, value): + ... + def get_state(self, key): + ... +``` + + +``` +func (c *FunctionContext) GetInstanceID() int { + return c.instanceConf.instanceID +} + +func (c *FunctionContext) GetInputTopics() []string { + return c.inputTopics +} + +func (c *FunctionContext) GetOutputTopic() string { + return c.instanceConf.funcDetails.GetSink().Topic +} + +func (c *FunctionContext) GetFuncTenant() string { + return c.instanceConf.funcDetails.Tenant +} + +func (c *FunctionContext) GetFuncName() string { + return c.instanceConf.funcDetails.Name +} + +func (c *FunctionContext) GetFuncNamespace() string { + return c.instanceConf.funcDetails.Namespace +} + +func (c *FunctionContext) GetFuncID() string { + return c.instanceConf.funcID +} + +func (c *FunctionContext) GetFuncVersion() string { + return c.instanceConf.funcVersion +} + +func (c *FunctionContext) GetUserConfValue(key string) interface{} { + return c.userConfigs[key] +} + +func (c *FunctionContext) GetUserConfMap() map[string]interface{} { + return c.userConfigs +} + +func (c *FunctionContext) SetCurrentRecord(record pulsar.Message) { + c.record = record +} + +func (c *FunctionContext) GetCurrentRecord() pulsar.Message { + return c.record +} + +func (c *FunctionContext) NewOutputMessage(topic string) pulsar.Producer { + return c.outputMessage(topic) +} +``` + +The following example uses several methods available via the `Context` object. + +``` +import ( + "context" + "fmt" + + "github.com/apache/pulsar/pulsar-function-go/pf" +) + +func contextFunc(ctx context.Context) { + if fc, ok := pf.FromContext(ctx); ok { + fmt.Printf("function ID is:%s, ", fc.GetFuncID()) + fmt.Printf("function version is:%s\n", fc.GetFuncVersion()) + } +} +``` + +For complete code, see [here](https://github.com/apache/pulsar/blob/master/pulsar-function-go/examples/contextFunc.go#L29-L34). + + + +### User config +When you run or update Pulsar Functions created using SDK, you can pass arbitrary key/values to them with the command line with the `--user-config` flag. Key/values must be specified as JSON. The following function creation command passes a user configured key/value to a function. + +```bash +$ bin/pulsar-admin functions create \ + --name word-filter \ + # Other function configs + --user-config '{"forbidden-word":"rosebud"}' +``` + + + +The Java SDK [`Context`](#context) object enables you to access key/value pairs provided to Pulsar Functions via the command line (as JSON). The following example passes a key/value pair. + +```bash +$ bin/pulsar-admin functions create \ + # Other function configs + --user-config '{"word-of-the-day":"verdure"}' +``` + +To access that value in a Java function: + +```java +import org.apache.pulsar.functions.api.Context; +import org.apache.pulsar.functions.api.Function; +import org.slf4j.Logger; + +import java.util.Optional; + +public class UserConfigFunction implements Function { + @Override + public void apply(String input, Context context) { + Logger LOG = context.getLogger(); + Optional wotd = context.getUserConfigValue("word-of-the-day"); + if (wotd.isPresent()) { + LOG.info("The word of the day is {}", wotd); + } else { + LOG.warn("No word of the day provided"); + } + return null; + } +} +``` + +The `UserConfigFunction` function will log the string `"The word of the day is verdure"` every time the function is invoked (which means every time a message arrives). The `word-of-the-day` user config will be changed only when the function is updated with a new config value via the command line. + +You can also access the entire user config map or set a default value in case no value is present: + +```java +// Get the whole config map +Map allConfigs = context.getUserConfigMap(); + +// Get value or resort to default +String wotd = context.getUserConfigValueOrDefault("word-of-the-day", "perspicacious"); +``` + +> For all key/value pairs passed to Java functions, both the key *and* the value are `String`. To set the value to be a different type, you need to deserialize from the `String` type. + + +In Python function, you can access the configuration value like this. + +```python +from pulsar import Function + +class WordFilter(Function): + def process(self, context, input): + forbidden_word = context.user_config()["forbidden-word"] + + # Don't publish the message if it contains the user-supplied + # forbidden word + if forbidden_word in input: + pass + # Otherwise publish the message + else: + return input +``` + +The Python SDK [`Context`](#context) object enables you to access key/value pairs provided to Pulsar Functions via the command line (as JSON). The following example passes a key/value pair. + +```bash +$ bin/pulsar-admin functions create \ + # Other function configs \ + --user-config '{"word-of-the-day":"verdure"}' +``` + +To access that value in a Python function: + +```python +from pulsar import Function + +class UserConfigFunction(Function): + def process(self, input, context): + logger = context.get_logger() + wotd = context.get_user_config_value('word-of-the-day') + if wotd is None: + logger.warn('No word of the day provided') + else: + logger.info("The word of the day is {0}".format(wotd)) +``` + + +The Go SDK [`Context`](#context) object enables you to access key/value pairs provided to Pulsar Functions via the command line (as JSON). The following example passes a key/value pair. + +```bash +$ bin/pulsar-admin functions create \ + --go path/to/go/binary + --user-config '{"word-of-the-day":"lackadaisical"}' +``` + +To access that value in a Go function: + +```go +func contextFunc(ctx context.Context) { + fc, ok := pf.FromContext(ctx) + if !ok { + logutil.Fatal("Function context is not defined") + } + + wotd := fc.GetUserConfValue("word-of-the-day") + + if wotd == nil { + logutil.Warn("The word of the day is empty") + } else { + logutil.Infof("The word of the day is %s", wotd.(string)) + } +} +``` + + + +### Logger + + + +Pulsar Functions that use the Java SDK have access to an [SLF4j](https://www.slf4j.org/) [`Logger`](https://www.slf4j.org/api/org/apache/log4j/Logger.html) object that can be used to produce logs at the chosen log level. The following example logs either a `WARNING`- or `INFO`-level log based on whether the incoming string contains the word `danger`. + +```java +import org.apache.pulsar.functions.api.Context; +import org.apache.pulsar.functions.api.Function; +import org.slf4j.Logger; + +public class LoggingFunction implements Function { + @Override + public void apply(String input, Context context) { + Logger LOG = context.getLogger(); + String messageId = new String(context.getMessageId()); + + if (input.contains("danger")) { + LOG.warn("A warning was received in message {}", messageId); + } else { + LOG.info("Message {} received\nContent: {}", messageId, input); + } + + return null; + } +} +``` + +If you want your function to produce logs, you need to specify a log topic when creating or running the function. The following is an example. + +```bash +$ bin/pulsar-admin functions create \ + --jar my-functions.jar \ + --classname my.package.LoggingFunction \ + --log-topic persistent://public/default/logging-function-logs \ + # Other function configs +``` + +All logs produced by `LoggingFunction` above can be accessed via the `persistent://public/default/logging-function-logs` topic. + + +Pulsar Functions that use the Python SDK have access to a logging object that can be used to produce logs at the chosen log level. The following example function that logs either a `WARNING`- or `INFO`-level log based on whether the incoming string contains the word `danger`. + +```python +from pulsar import Function + +class LoggingFunction(Function): + def process(self, input, context): + logger = context.get_logger() + msg_id = context.get_message_id() + if 'danger' in input: + logger.warn("A warning was received in message {0}".format(context.get_message_id())) + else: + logger.info("Message {0} received\nContent: {1}".format(msg_id, input)) +``` + +If you want your function to produce logs on a Pulsar topic, you need to specify a **log topic** when creating or running the function. The following is an example. + +```bash +$ bin/pulsar-admin functions create \ + --py logging_function.py \ + --classname logging_function.LoggingFunction \ + --log-topic logging-function-logs \ + # Other function configs +``` + +All logs produced by `LoggingFunction` above can be accessed via the `logging-function-logs` topic. + + +The following Go Function example shows different log levels based on the function input. + +``` +import ( + "context" + + "github.com/apache/pulsar/pulsar-function-go/pf" + + log "github.com/apache/pulsar/pulsar-function-go/logutil" +) + +func loggerFunc(ctx context.Context, input []byte) { + if len(input) <= 100 { + log.Infof("This input has a length of: %d", len(input)) + } else { + log.Warnf("This input is getting too long! It has {%d} characters", len(input)) + } +} + +func main() { + pf.Start(loggerFunc) +} +``` + +When you use `logTopic` related functionalities in Go Function, import `github.com/apache/pulsar/pulsar-function-go/logutil`, and you do not have to use the `getLogger()` context object. + + + +## Metrics +Pulsar Functions can publish arbitrary metrics to the metrics interface which can be queried. + +> If a Pulsar Function uses the language-native interface for Java or Python, that function is not able to publish metrics and stats to Pulsar. + + + +You can record metrics using the [`Context`](#context) object on a per-key basis. For example, you can set a metric for the `process-count` key and a different metric for the `elevens-count` key every time the function processes a message. + +```java +import org.apache.pulsar.functions.api.Context; +import org.apache.pulsar.functions.api.Function; + +public class MetricRecorderFunction implements Function { + @Override + public void apply(Integer input, Context context) { + // Records the metric 1 every time a message arrives + context.recordMetric("hit-count", 1); + + // Records the metric only if the arriving number equals 11 + if (input == 11) { + context.recordMetric("elevens-count", 1); + } + + return null; + } +} +``` + +> For instructions on reading and using metrics, see the [Monitoring](deploy-monitoring.md) guide. + + +You can record metrics using the [`Context`](#context) object on a per-key basis. For example, you can set a metric for the `process-count` key and a different metric for the `elevens-count` key every time the function processes a message. The following is an example. + +```python +from pulsar import Function + +class MetricRecorderFunction(Function): + def process(self, input, context): + context.record_metric('hit-count', 1) + + if input == 11: + context.record_metric('elevens-count', 1) +``` + +Currently, the feature is not available in Go. + + + +### Access metrics +To access metrics created by Pulsar Functions, refer to [Monitoring](deploy-monitoring.md) in Pulsar. + +## Security + +If you want to enable security on Pulsar Functions, first you should enable security on [Functions Workers](functions-worker.md). For more details, refer to [Security settings](functions-worker.md#security-settings). + +Pulsar Functions can support the following providers: + +- ClearTextSecretsProvider +- EnvironmentBasedSecretsProvider + +> Pulsar Function supports ClearTextSecretsProvider by default. + +At the same time, Pulsar Functions provides two interfaces, **SecretsProvider** and **SecretsProviderConfigurator**, allowing users to customize secret provider. + + + +You can get secret provider using the [`Context`](#context) object. The following is an example: + +```java +import org.apache.pulsar.functions.api.Context; +import org.apache.pulsar.functions.api.Function; +import org.slf4j.Logger; + +public class GetSecretProviderFunction implements Function { + + @Override + public Void process(String input, Context context) throws Exception { + Logger LOG = context.getLogger(); + String secretProvider = context.getSecret(input); + + if (!secretProvider.isEmpty()) { + LOG.info("The secret provider is {}", secretProvider); + } else { + LOG.warn("No secret provider"); + } + + return null; + } +} +``` + + +You can get secret provider using the [`Context`](#context) object. The following is an example: + +```python +from pulsar import Function + +class GetSecretProviderFunction(Function): + def process(self, input, context): + logger = context.get_logger() + secret_provider = context.get_secret(input) + if secret_provider is None: + logger.warn('No secret provider') + else: + logger.info("The secret provider is {0}".format(secret_provider)) +``` + + + +Currently, the feature is not available in Go. + + + +## State storage +Pulsar Functions use [Apache BookKeeper](https://bookkeeper.apache.org) as a state storage interface. Pulsar installation, including the local standalone installation, includes deployment of BookKeeper bookies. + +Since Pulsar 2.1.0 release, Pulsar integrates with Apache BookKeeper [table service](https://docs.google.com/document/d/155xAwWv5IdOitHh1NVMEwCMGgB28M3FyMiQSxEpjE-Y/edit#heading=h.56rbh52koe3f) to store the `State` for functions. For example, a `WordCount` function can store its `counters` state into BookKeeper table service via Pulsar Functions State API. + +States are key-value pairs, where the key is a string and the value is arbitrary binary data - counters are stored as 64-bit big-endian binary values. Keys are scoped to an individual Pulsar Function, and shared between instances of that function. + +You can access states within Pulsar Java Functions using the `putState`, `putStateAsync`, `getState`, `getStateAsync`, `incrCounter`, `incrCounterAsync`, `getCounter`, `getCounterAsync` and `deleteState` calls on the context object. You can access states within Pulsar Python Functions using the `putState`, `getState`, `incrCounter`, `getCounter` and `deleteState` calls on the context object. You can also manage states using the [querystate](#query-state) and [putstate](#putstate) options to `pulsar-admin functions`. + +> Note +> State storage is not available in Go. + +### API + + + +Currently Pulsar Functions expose the following APIs for mutating and accessing State. These APIs are available in the [Context](functions-develop.md#context) object when you are using Java SDK functions. + +#### incrCounter + +```java + /** + * Increment the builtin distributed counter refered by key + * @param key The name of the key + * @param amount The amount to be incremented + */ + void incrCounter(String key, long amount); +``` + +The application can use `incrCounter` to change the counter of a given `key` by the given `amount`. + +#### incrCounterAsync + +```java + /** + * Increment the builtin distributed counter referred by key + * but dont wait for the completion of the increment operation + * + * @param key The name of the key + * @param amount The amount to be incremented + */ + CompletableFuture incrCounterAsync(String key, long amount); +``` + +The application can use `incrCounterAsync` to asynchronously change the counter of a given `key` by the given `amount`. + +#### getCounter + +```java + /** + * Retrieve the counter value for the key. + * + * @param key name of the key + * @return the amount of the counter value for this key + */ + long getCounter(String key); +``` + +The application can use `getCounter` to retrieve the counter of a given `key` mutated by `incrCounter`. + +Except the `counter` API, Pulsar also exposes a general key/value API for functions to store +general key/value state. + +#### getCounterAsync + +```java + /** + * Retrieve the counter value for the key, but don't wait + * for the operation to be completed + * + * @param key name of the key + * @return the amount of the counter value for this key + */ + CompletableFuture getCounterAsync(String key); +``` + +The application can use `getCounterAsync` to asynchronously retrieve the counter of a given `key` mutated by `incrCounterAsync`. + +#### putState + +```java + /** + * Update the state value for the key. + * + * @param key name of the key + * @param value state value of the key + */ + void putState(String key, ByteBuffer value); +``` + +#### putStateAsync + +```java + /** + * Update the state value for the key, but don't wait for the operation to be completed + * + * @param key name of the key + * @param value state value of the key + */ + CompletableFuture putStateAsync(String key, ByteBuffer value); +``` + +The application can use `putStateAsync` to asynchronously update the state of a given `key`. + +#### getState + +```java + /** + * Retrieve the state value for the key. + * + * @param key name of the key + * @return the state value for the key. + */ + ByteBuffer getState(String key); +``` + +#### getStateAsync + +```java + /** + * Retrieve the state value for the key, but don't wait for the operation to be completed + * + * @param key name of the key + * @return the state value for the key. + */ + CompletableFuture getStateAsync(String key); +``` + +The application can use `getStateAsync` to asynchronously retrieve the state of a given `key`. + +#### deleteState + +```java + /** + * Delete the state value for the key. + * + * @param key name of the key + */ +``` + +Counters and binary values share the same keyspace, so this deletes either type. + + +Currently Pulsar Functions expose the following APIs for mutating and accessing State. These APIs are available in the [Context](#context) object when you are using Python SDK functions. + +#### incr_counter + +```python + def incr_counter(self, key, amount): + """incr the counter of a given key in the managed state""" +``` + +Application can use `incr_counter` to change the counter of a given `key` by the given `amount`. +If the `key` does not exist, a new key is created. + +#### get_counter + +```python + def get_counter(self, key): + """get the counter of a given key in the managed state""" +``` + +Application can use `get_counter` to retrieve the counter of a given `key` mutated by `incrCounter`. + +Except the `counter` API, Pulsar also exposes a general key/value API for functions to store +general key/value state. + +#### put_state + +```python + def put_state(self, key, value): + """update the value of a given key in the managed state""" +``` + +The key is a string, and the value is arbitrary binary data. + +#### get_state + +```python + def get_state(self, key): + """get the value of a given key in the managed state""" +``` + +#### del_counter + +```python + def del_counter(self, key): + """delete the counter of a given key in the managed state""" +``` + +Counters and binary values share the same keyspace, so this deletes either type. + + + +### Query State + +A Pulsar Function can use the [State API](#api) for storing state into Pulsar's state storage +and retrieving state back from Pulsar's state storage. Additionally Pulsar also provides +CLI commands for querying its state. + +```shell +$ bin/pulsar-admin functions querystate \ + --tenant \ + --namespace \ + --name \ + --state-storage-url \ + --key \ + [---watch] +``` + +If `--watch` is specified, the CLI will watch the value of the provided `state-key`. + +### Example + + + + +{@inject: github:`WordCountFunction`:/pulsar-functions/java-examples/src/main/java/org/apache/pulsar/functions/api/examples/WordCountFunction.java} is a very good example +demonstrating on how Application can easily store `state` in Pulsar Functions. + +```java +import org.apache.pulsar.functions.api.Context; +import org.apache.pulsar.functions.api.Function; + +import java.util.Arrays; + +public class WordCountFunction implements Function { + @Override + public Void process(String input, Context context) throws Exception { + Arrays.asList(input.split("\\.")).forEach(word -> context.incrCounter(word, 1)); + return null; + } +} +``` + +The logic of this `WordCount` function is pretty simple and straightforward: + +1. The function first splits the received `String` into multiple words using regex `\\.`. +2. For each `word`, the function increments the corresponding `counter` by 1 (via `incrCounter(key, amount)`). + + + +```python +from pulsar import Function + +class WordCount(Function): + def process(self, item, context): + for word in item.split(): + context.incr_counter(word, 1) +``` + +The logic of this `WordCount` function is pretty simple and straightforward: + +1. The function first splits the received string into multiple words on space. +2. For each `word`, the function increments the corresponding `counter` by 1 (via `incr_counter(key, amount)`). + + diff --git a/site2/website/versioned_docs/version-2.7.0/functions-package.md b/site2/website/versioned_docs/version-2.7.0/functions-package.md new file mode 100644 index 00000000000000..5a9275626b7553 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/functions-package.md @@ -0,0 +1,431 @@ +--- +id: version-2.7.0-functions-package +title: Package Pulsar Functions +sidebar_label: How-to: Package +original_id: functions-package +--- + +You can package Pulsar functions in Java, Python, and Go. Packaging the window function in Java is the same as [packaging a function in Java](#java). + +> **Note** +> Currently, the window function is not available in Python and Go. + +## Prerequisite + +Before running a Pulsar function, you need to start Pulsar. You can [run a standalone Pulsar in Docker](getting-started-docker.md), or [run Pulsar in Kubernetes](getting-started-helm.md). + +To check whether the Docker image starts, you can use the `docker ps` command. + +## Java + +To package a function in Java, complete the following steps. + +1. Create a new maven project with a pom file. In the following code sample, the value of `mainClass` is your package name. + + ```Java + + + 4.0.0 + + java-function + java-function + 1.0-SNAPSHOT + + + + org.apache.pulsar + pulsar-functions-api + 2.6.0 + + + + + + + maven-assembly-plugin + + false + + jar-with-dependencies + + + + org.example.test.ExclamationFunction + + + + + + make-assembly + package + + assembly + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 8 + 8 + + + + + + + ``` + +2. Write a Java function. + + ``` + package org.example.test; + + import java.util.function.Function; + + public class ExclamationFunction implements Function { + @Override + public String apply(String s) { + return "This is my function!"; + } + } + ``` + + For the imported package, you can use one of the following interfaces: + - Function interface provided by Java 8: `java.util.function.Function` + - Pulsar Function interface: `org.apache.pulsar.functions.api.Function` + + The main difference between the two interfaces is that the `org.apache.pulsar.functions.api.Function` interface provides the context interface. When you write a function and want to interact with it, you can use context to obtain a wide variety of information and functionality for Pulsar Functions. + + The following example uses `org.apache.pulsar.functions.api.Function` interface with context. + + ``` + package org.example.functions; + import org.apache.pulsar.functions.api.Context; + import org.apache.pulsar.functions.api.Function; + + import java.util.Arrays; + public class WordCountFunction implements Function { + // This function is invoked every time a message is published to the input topic + @Override + public Void process(String input, Context context) throws Exception { + Arrays.asList(input.split(" ")).forEach(word -> { + String counterKey = word.toLowerCase(); + context.incrCounter(counterKey, 1); + }); + return null; + } + } + ``` + +3. Package the Java function. + + ```bash + mvn package + ``` + + After the Java function is packaged, a `target` directory is created automatically. Open the `target` directory to check if there is a JAR package similar to `java-function-1.0-SNAPSHOT.jar`. + + +4. Run the Java function. + + (1) Copy the packaged jar file to the Pulsar image. + + ```bash + docker exec -it [CONTAINER ID] /bin/bash + docker cp CONTAINER ID:/pulsar + ``` + + (2) Run the Java function using the following command. + + ```bash + ./bin/pulsar-admin functions localrun \ + --classname org.example.test.ExclamationFunction \ + --jar java-function-1.0-SNAPSHOT.jar \ + --inputs persistent://public/default/my-topic-1 \ + --output persistent://public/default/test-1 \ + --tenant public \ + --namespace default \ + --name JavaFunction + ``` + + The following log indicates that the Java function starts successfully. + + ```text + ... + 07:55:03.724 [main] INFO org.apache.pulsar.functions.runtime.ProcessRuntime - Started process successfully + ... + ``` + +## Python + +Python Function supports the following three formats: + +- One python file +- ZIP file +- PIP + +### One python file + +To package a function with **one python file** in Python, complete the following steps. + +1. Write a Python function. + + ``` + from pulsar import Function // import the Function module from Pulsar + + # The classic ExclamationFunction that appends an exclamation at the end + # of the input + class ExclamationFunction(Function): + def __init__(self): + pass + + def process(self, input, context): + return input + '!' + ``` + + In this example, when you write a Python function, you need to inherit the Function class and implement the `process()` method. + + `process()` mainly has two parameters: + + - `input` represents your input. + + - `context` represents an interface exposed by the Pulsar Function. You can get the attributes in the Python function based on the provided context object. + +2. Install a Python client. + + The implementation of a Python function depends on the Python client, so before deploying a Python function, you need to install the corresponding version of the Python client. + + ```bash + pip install python-client==2.6.0 + ``` + +3. Run the Python Function. + + (1) Copy the Python function file to the Pulsar image. + + ```bash + docker exec -it [CONTAINER ID] /bin/bash + docker cp CONTAINER ID:/pulsar + ``` + + (2) Run the Python function using the following command. + + ```bash + ./bin/pulsar-admin functions localrun \ + --classname org.example.test.ExclamationFunction \ + --py \ + --inputs persistent://public/default/my-topic-1 \ + --output persistent://public/default/test-1 \ + --tenant public \ + --namespace default \ + --name PythonFunction + ``` + + The following log indicates that the Python function starts successfully. + + ```text + ... + 07:55:03.724 [main] INFO org.apache.pulsar.functions.runtime.ProcessRuntime - Started process successfully + ... + ``` + +### ZIP file + +To package a function with the **ZIP file** in Python, complete the following steps. + +1. Prepare the ZIP file. + + The following is required when packaging the ZIP file of the Python Function. + + ```text + Assuming the zip file is named as `func.zip`, unzip the `func.zip` folder: + "func/src" + "func/requirements.txt" + "func/deps" + ``` + Take [exclamation.zip](https://github.com/apache/pulsar/tree/master/tests/docker-images/latest-version-image/python-examples) as an example. The internal structure of the example is as follows. + + ```text + . + ├── deps + │   └── sh-1.12.14-py2.py3-none-any.whl + └── src + └── exclamation.py + ``` + +2. Run the Python Function. + + (1) Copy the ZIP file to the Pulsar image. + + ```bash + docker exec -it [CONTAINER ID] /bin/bash + docker cp CONTAINER ID:/pulsar + ``` + + (2) Run the Python function using the following command. + + ```bash + ./bin/pulsar-admin functions localrun \ + --classname exclamation \ + --py \ + --inputs persistent://public/default/in-topic \ + --output persistent://public/default/out-topic \ + --tenant public \ + --namespace default \ + --name PythonFunction + ``` + + The following log indicates that the Python function starts successfully. + + ```text + ... + 07:55:03.724 [main] INFO org.apache.pulsar.functions.runtime.ProcessRuntime - Started process successfully + ... + ``` + +### PIP + +The PIP method is only supported in Kubernetes runtime. To package a function with **PIP** in Python, complete the following steps. + +1. Configure the `functions_worker.yml` file. + + ```text + #### Kubernetes Runtime #### + installUserCodeDependencies: true + ``` + +2. Write your Python Function. + + ``` + from pulsar import Function + import js2xml + + # The classic ExclamationFunction that appends an exclamation at the end + # of the input + class ExclamationFunction(Function): + def __init__(self): + pass + + def process(self, input, context): + // add your logic + return input + '!' + ``` + + You can introduce additional dependencies. When Python Function detects that the file currently used is `whl` and the `installUserCodeDependencies` parameter is specified, the system uses the `pip install` command to install the dependencies required in Python Function. + +3. Generate the `whl` file. + + ```shell script + $ cd $PULSAR_HOME/pulsar-functions/scripts/python + $ chmod +x generate.sh + $ ./generate.sh + # e.g: ./generate.sh /path/to/python /path/to/python/output 1.0.0 + ``` + + The output is written in `/path/to/python/output`: + + ```text + -rw-r--r-- 1 root staff 1.8K 8 27 14:29 pulsarfunction-1.0.0-py2-none-any.whl + -rw-r--r-- 1 root staff 1.4K 8 27 14:29 pulsarfunction-1.0.0.tar.gz + -rw-r--r-- 1 root staff 0B 8 27 14:29 pulsarfunction.whl + ``` + +## Go + +To package a function in Go, complete the following steps. + +1. Write a Go function. + + Currently, Go function can be **only** implemented using SDK and the interface of the function is exposed in the form of SDK. Before using the Go function, you need to import "github.com/apache/pulsar/pulsar-function-go/pf". + + ``` + import ( + "context" + "fmt" + + "github.com/apache/pulsar/pulsar-function-go/pf" + ) + + func HandleRequest(ctx context.Context, input []byte) error { + fmt.Println(string(input) + "!") + return nil + } + + func main() { + pf.Start(HandleRequest) + } + ``` + + You can use context to connect to the Go function. + + ``` + if fc, ok := pf.FromContext(ctx); ok { + fmt.Printf("function ID is:%s, ", fc.GetFuncID()) + fmt.Printf("function version is:%s\n", fc.GetFuncVersion()) + } + ``` + + When writing a Go function, remember that + - In `main()`, you **only** need to register the function name to `Start()`. **Only** one function name is received in `Start()`. + - Go function uses Go reflection, which is based on the received function name, to verify whether the parameter list and returned value list are correct. The parameter list and returned value list **must be** one of the following sample functions: + + ``` + func () + func () error + func (input) error + func () (output, error) + func (input) (output, error) + func (context.Context) error + func (context.Context, input) error + func (context.Context) (output, error) + func (context.Context, input) (output, error) + ``` + +2. Build the Go function. + + ``` + go build .go + ``` + +3. Run the Go Function. + + (1) Copy the Go function file to the Pulsar image. + + ```bash + docker exec -it [CONTAINER ID] /bin/bash + docker cp CONTAINER ID:/pulsar + ``` + + (2) Run the Go function with the following command. + + ``` + ./bin/pulsar-admin functions localrun \ + --go [your go function path] + --inputs [input topics] \ + --output [output topic] \ + --tenant [default:public] \ + --namespace [default:default] \ + --name [custom unique go function name] + ``` + + The following log indicates that the Go function starts successfully. + + ```text + ... + 07:55:03.724 [main] INFO org.apache.pulsar.functions.runtime.ProcessRuntime - Started process successfully + ... + ``` + +## Start Functions in cluster mode +If you want to start a function in cluster mode, replace `localrun` with `create` in the commands above. The following log indicates that your function starts successfully. + + ```text + "Created successfully" + ``` + +For information about parameters on `--classname`, `--jar`, `--py`, `--go`, `--inputs`, run the command `./bin/pulsar-admin functions` or see [here](reference-pulsar-admin.md#functions). \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/functions-runtime.md b/site2/website/versioned_docs/version-2.7.0/functions-runtime.md new file mode 100644 index 00000000000000..fd16e0ef2e8def --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/functions-runtime.md @@ -0,0 +1,313 @@ +--- +id: version-2.7.0-functions-runtime +title: Configure Functions runtime +sidebar_label: Setup: Configure Functions runtime +original_id: functions-runtime +--- + +You can use the following methods to run functions. + +- *Thread*: Invoke functions threads in functions worker. +- *Process*: Invoke functions in processes forked by functions worker. +- *Kubernetes*: Submit functions as Kubernetes StatefulSets by functions worker. + +> **Note** +> Pulsar supports adding labels to the Kubernetes StatefulSets and services while launching functions, which facilitates selecting the target Kubernetes objects. + +The differences of the thread and process modes are: +- Thread mode: when a function runs in thread mode, it runs on the same Java virtual machine (JVM) with functions worker. +- Process mode: when a function runs in process mode, it runs on the same machine that functions worker runs. + +## Configure thread runtime +It is easy to configure *Thread* runtime. In most cases, you do not need to configure anything. You can customize the thread group name with the following settings: + +```yaml +threadContainerFactory: + threadGroupName: "Your Function Container Group" +``` + +*Thread* runtime is only supported in Java function. + +## Configure process runtime +When you enable *Process* runtime, you do not need to configure anything. + +```yaml +processContainerFactory: + # the directory for storing the function logs + logDirectory: + # change the jar location only when you put the java instance jar in a different location + javaInstanceJarLocation: + # change the python instance location only when you put the python instance jar in a different location + pythonInstanceLocation: + # change the extra dependencies location: + extraFunctionDependenciesDir: +``` + +*Process* runtime is supported in Java, Python, and Go functions. + +## Configure Kubernetes runtime + +When the functions worker generates Kubernetes manifests and apply the manifests, the Kubernetes runtime works. If you have run functions worker on Kubernetes, you can use the `serviceAccount` associated with the pod that the functions worker is running in. Otherwise, you can configure it to communicate with a Kubernetes cluster. + +The manifests, generated by the functions worker, include a `StatefulSet`, a `Service` (used to communicate with the pods), and a `Secret` for auth credentials (when applicable). The `StatefulSet` manifest (by default) has a single pod, with the number of replicas determined by the "parallelism" of the function. On pod boot, the pod downloads the function payload (via the functions worker REST API). The pod's container image is configurable, but must have the functions runtime. + +The Kubernetes runtime supports secrets, so you can create a Kubernetes secret and expose it as an environment variable in the pod. The Kubernetes runtime is extensible, you can implement classes and customize the way how to generate Kubernetes manifests, how to pass auth data to pods, and how to integrate secrets. + +### Basic configuration + +It is easy to configure Kubernetes runtime. You can just uncomment the settings of `kubernetesContainerFactory` in the `functions_worker.yaml` file. The following is an example. + +```yaml +kubernetesContainerFactory: + # uri to kubernetes cluster, leave it to empty and it will use the kubernetes settings in function worker + k8Uri: + # the kubernetes namespace to run the function instances. it is `default`, if this setting is left to be empty + jobNamespace: + # the docker image to run function instance. by default it is `apachepulsar/pulsar` + pulsarDockerImageName: + # the docker image to run function instance according to different configurations provided by users. + # By default it is `apachepulsar/pulsar`. + # e.g: + # functionDockerImages: + # JAVA: JAVA_IMAGE_NAME + # PYTHON: PYTHON_IMAGE_NAME + # GO: GO_IMAGE_NAME + functionDockerImages: + # the root directory of pulsar home directory in `pulsarDockerImageName`. by default it is `/pulsar`. + # if you are using your own built image in `pulsarDockerImageName`, you need to set this setting accordingly + pulsarRootDir: + # this setting only takes effects if `k8Uri` is set to null. if your function worker is running as a k8 pod, + # setting this to true is let function worker to submit functions to the same k8s cluster as function worker + # is running. setting this to false if your function worker is not running as a k8 pod. + submittingInsidePod: false + # setting the pulsar service url that pulsar function should use to connect to pulsar + # if it is not set, it will use the pulsar service url configured in worker service + pulsarServiceUrl: + # setting the pulsar admin url that pulsar function should use to connect to pulsar + # if it is not set, it will use the pulsar admin url configured in worker service + pulsarAdminUrl: + # the custom labels that function worker uses to select the nodes for pods + customLabels: + # the directory for dropping extra function dependencies + # if it is not an absolute path, it is relative to `pulsarRootDir` + extraFunctionDependenciesDir: + # Additional memory padding added on top of the memory requested by the function per on a per instance basis + percentMemoryPadding: 10 +``` + +If you run functions worker embedded in a broker on Kubernetes, you can use the default settings. + +### Run standalone functions worker on Kubernetes + +If you run functions worker standalone (that is, not embedded) on Kubernetes, you need to configure `pulsarSerivceUrl` to be the URL of the broker and `pulsarAdminUrl` as the URL to the functions worker. + +For example, both Pulsar brokers and Function Workers run in the `pulsar` K8S namespace. The brokers have a service called `brokers` and the functions worker has a service called `func-worker`. The settings are as follows: + +```yaml +pulsarServiceUrl: pulsar://broker.pulsar:6650 // or pulsar+ssl://broker.pulsar:6651 if using TLS +pulsarAdminUrl: http://func-worker.pulsar:8080 // or https://func-worker:8443 if using TLS +``` + +### Run RBAC in Kubernetes clusters + +If you run RBAC in your Kubernetes cluster, make sure that the service account you use for running functions workers (or brokers, if functions workers run along with brokers) have permissions on the following Kubernetes APIs. + +- services +- configmaps +- pods +- apps.statefulsets + +The following is sufficient: + +```yaml +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: functions-worker +rules: +- apiGroups: [""] + resources: + - services + - configmaps + - pods + verbs: + - '*' +- apiGroups: + - apps + resources: + - statefulsets + verbs: + - '*' +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: functions-worker +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: functions-worker +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: functions-worker +subjectsKubernetesSec: +- kind: ServiceAccount + name: functions-worker +``` + +If the service-account is not properly configured, an error message similar to this is displayed: +```bash +22:04:27.696 [Timer-0] ERROR org.apache.pulsar.functions.runtime.KubernetesRuntimeFactory - Error while trying to fetch configmap example-pulsar-4qvmb5gur3c6fc9dih0x1xn8b-function-worker-config at namespace pulsar +io.kubernetes.client.ApiException: Forbidden + at io.kubernetes.client.ApiClient.handleResponse(ApiClient.java:882) ~[io.kubernetes-client-java-2.0.0.jar:?] + at io.kubernetes.client.ApiClient.execute(ApiClient.java:798) ~[io.kubernetes-client-java-2.0.0.jar:?] + at io.kubernetes.client.apis.CoreV1Api.readNamespacedConfigMapWithHttpInfo(CoreV1Api.java:23673) ~[io.kubernetes-client-java-api-2.0.0.jar:?] + at io.kubernetes.client.apis.CoreV1Api.readNamespacedConfigMap(CoreV1Api.java:23655) ~[io.kubernetes-client-java-api-2.0.0.jar:?] + at org.apache.pulsar.functions.runtime.KubernetesRuntimeFactory.fetchConfigMap(KubernetesRuntimeFactory.java:284) [org.apache.pulsar-pulsar-functions-runtime-2.4.0-42c3bf949.jar:2.4.0-42c3bf949] + at org.apache.pulsar.functions.runtime.KubernetesRuntimeFactory$1.run(KubernetesRuntimeFactory.java:275) [org.apache.pulsar-pulsar-functions-runtime-2.4.0-42c3bf949.jar:2.4.0-42c3bf949] + at java.util.TimerThread.mainLoop(Timer.java:555) [?:1.8.0_212] + at java.util.TimerThread.run(Timer.java:505) [?:1.8.0_212] +``` + +### Integrate Kubernetes secrets + +In order to safely distribute secrets, Pulasr Functions can reference Kubernetes secrets. To enable this, set the `secretsProviderConfiguratorClassName` to `org.apache.pulsar.functions.secretsproviderconfigurator.KubernetesSecretsProviderConfigurator`. + +You can create a secret in the namespace where your functions are deployed. For example, you deploy functions to the `pulsar-func` Kubernetes namespace, and you have a secret named `database-creds` with a field name `password`, which you want to mount in the pod as an environment variable called `DATABASE_PASSWORD`. The following functions configuration enables you to reference that secret and mount the value as an environment variable in the pod. + +```Yaml +tenant: "mytenant" +namespace: "mynamespace" +name: "myfunction" +topicName: "persistent://mytenant/mynamespace/myfuncinput" +className: "com.company.pulsar.myfunction" + +secrets: + # the secret will be mounted from the `password` field in the `database-creds` secret as an env var called `DATABASE_PASSWORD` + DATABASE_PASSWORD: + path: "database-creds" + key: "password" + +``` + +### Enable token authentication + +When you enable authentication for your Pulsar cluster, you need a mechanism for the pod running your function to authenticate with the broker. + +The `org.apache.pulsar.functions.auth.KubernetesFunctionAuthProvider` interface provides support for any authentication mechanism. The `functionAuthProviderClassName` in `function-worker.yml` is used to specify your path to this implementation. + +Pulsar includes an implementation of this interface for token authentication, and distributes the certificate authority via the same implementation. The configuration is similar as follows: + +```Yaml +functionAuthProviderClassName: org.apache.pulsar.functions.auth.KubernetesSecretsTokenAuthProvider +``` + +For token authentication, the functions worker captures the token that is used to deploy (or update) the function. The token is saved as a secret and mounted into the pod. + +For custom authentication or TLS, you need to implement this interface or use an alternative mechanism to provide authentication. If you use token authentication and TLS encryption to secure the communication with the cluster, Pulsar passes your certificate authority (CA) to the client, so the client obtains what it needs to authenticate the cluster, and trusts the cluster with your signed certificate. + +> **Note** +> If you use tokens that expire when deploying functions, these tokens will expire. + +### Run clusters with authentication + +When you run a functions worker in a standalone process (that is, not embedded in the broker) in a cluster with authentication, you must configure your functions worker to interact with the broker and authenticate incoming requests. So you need to configure properties that the broker requires for authentication or authorization. + +For example, if you use token authentication, you need to configure the following properties in the `function-worker.yml` file. + +```Yaml +clientAuthenticationPlugin: org.apache.pulsar.client.impl.auth.AuthenticationToken +clientAuthenticationParameters: file:///etc/pulsar/token/admin-token.txt +configurationStoreServers: zookeeper-cluster:2181 # auth requires a connection to zookeeper +authenticationProviders: + - "org.apache.pulsar.broker.authentication.AuthenticationProviderToken" +authorizationEnabled: true +authenticationEnabled: true +superUserRoles: + - superuser + - proxy +properties: + tokenSecretKey: file:///etc/pulsar/jwt/secret # if using a secret token + tokenPublicKey: file:///etc/pulsar/jwt/public.key # if using public/private key tokens +``` + +> **Note** +> You must configure both the Function Worker authorization or authentication for the server to authenticate requests and configure the client to be authenticated to communicate with the broker. + +### Customize Kubernetes runtime + +The Kubernetes integration enables you to implement a class and customize how to generate manifests. You can configure it by setting `runtimeCustomizerClassName` in the `functions-worker.yml` file and use the fully qualified class name. You must implement the `org.apache.pulsar.functions.runtime.kubernetes.KubernetesManifestCustomizer` interface. + +The functions (and sinks/sources) API provides a flag, `customRuntimeOptions`, which is passed to this interface. + +Pulsar includes a built-in implementation. To use the basic implementation, set `runtimeCustomizerClassName` to `org.apache.pulsar.functions.runtime.kubernetes.BasicKubernetesManifestCustomizer`. The built-in implementation enables you to pass a JSON document with certain properties to augment how the manifests are generated. The following is an example. + +```Json +{ + "jobNamespace": "namespace", // the k8s namespace to run this function in + "extractLabels": { // extra labels to attach to the statefulSet, service, and pods + "extraLabel": "value" + }, + "extraAnnotations": { // extra annotations to attach to the statefulSet, service, and pods + "extraAnnotation": "value" + }, + "nodeSelectorLabels": { // node selector labels to add on to the pod spec + "customLabel": "value" + }, + "tolerations": [ // tolerations to add to the pod spec + { + "key": "custom-key", + "value": "value", + "effect": "NoSchedule" + } + ], + "resourceRequirements": { // values for cpu and memory should be defined as described here: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container + "requests": { + "cpu": 1, + "memory": "4G" + }, + "limits": { + "cpu": 2, + "memory": "8G" + } + } +} +``` + +## Run clusters with geo-replication + +If you run multiple clusters tied together with geo-replication, it is important to use a different function namespace for each cluster. Otherwise, the function shares a namespace and potentially schedule across clusters. + +For example, if you have two clusters: `east-1` and `west-1`, you can configure the functions workers for `east-1` and `west-1` perspectively as follows. + +```Yaml +pulsarFunctionsCluster: east-1 +pulsarFunctionsNamespace: public/functions-east-1 +``` + +```Yaml +pulsarFunctionsCluster: west-1 +pulsarFunctionsNamespace: public/functions-west-1 +``` + +This ensures the two different Functions Workers use distinct sets of topics for their internal coordination. + +## Configure standalone functions worker + +When configuring a standalone functions worker, you need to configure properties that the broker requires, especially if you use TLS. And then Functions Worker can communicate with the broker. + +You need to configure the following required properties. + +```Yaml +workerPort: 8080 +workerPortTls: 8443 # when using TLS +tlsCertificateFilePath: /etc/pulsar/tls/tls.crt # when using TLS +tlsKeyFilePath: /etc/pulsar/tls/tls.key # when using TLS +tlsTrustCertsFilePath: /etc/pulsar/tls/ca.crt # when using TLS +pulsarServiceUrl: pulsar://broker.pulsar:6650/ # or pulsar+ssl://pulsar-prod-broker.pulsar:6651/ when using TLS +pulsarWebServiceUrl: http://broker.pulsar:8080/ # or https://pulsar-prod-broker.pulsar:8443/ when using TLS +useTls: true # when using TLS, critical! + +``` \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/functions-worker.md b/site2/website/versioned_docs/version-2.7.0/functions-worker.md new file mode 100644 index 00000000000000..5cf1b18c1e81f4 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/functions-worker.md @@ -0,0 +1,286 @@ +--- +id: version-2.7.0-functions-worker +title: Deploy and manage functions worker +sidebar_label: Setup: Pulsar Functions Worker +original_id: functions-worker +--- +Before using Pulsar Functions, you need to learn how to set up Pulsar Functions worker and how to [configure Functions runtime](functions-runtime.md). + +Pulsar `functions-worker` is a logic component to run Pulsar Functions in cluster mode. Two options are available, and you can select either based on your requirements. +- [run with brokers](#run-functions-worker-with-brokers) +- [run it separately](#run-functions-worker-separately) in a different broker + +> Note +> The `--- Service Urls---` lines in the following diagrams represent Pulsar service URLs that Pulsar client and admin use to connect to a Pulsar cluster. + +## Run Functions-worker with brokers + +The following diagram illustrates the deployment of functions-workers running along with brokers. + +![assets/functions-worker-corun.png](assets/functions-worker-corun.png) + +To enable functions-worker running as part of a broker, you need to set `functionsWorkerEnabled` to `true` in the `broker.conf` file. + +```conf +functionsWorkerEnabled=true +``` + +If the `functionsWorkerEnabled` is set to `true`, the functions-worker is started as part of a broker. You need to configure the `conf/functions_worker.yml` file to customize your functions_worker. + +Before you run Functions-worker with broker, you have to configure Functions-worker, and then start it with brokers. + +### Configure Functions-Worker to run with brokers +In this mode, most of the settings are already inherited from your broker configuration (for example, configurationStore settings, authentication settings, and so on) since `functions-worker` is running as part of the broker. + +Pay attention to the following required settings when configuring functions-worker in this mode. + +- `numFunctionPackageReplicas`: The number of replicas to store function packages. The default value is `1`, which is good for standalone deployment. For production deployment, to ensure high availability, set it to be larger than `2`. +- `pulsarFunctionsCluster`: Set the value to your Pulsar cluster name (same as the `clusterName` setting in the broker configuration). + +If authentication is enabled on the BookKeeper cluster, configure the following BookKeeper authentication settings. + +- `bookkeeperClientAuthenticationPlugin`: the BookKeeper client authentication plugin name. +- `bookkeeperClientAuthenticationParametersName`: the BookKeeper client authentication plugin parameters name. +- `bookkeeperClientAuthenticationParameters`: the BookKeeper client authentication plugin parameters. + +### Start Functions-worker with broker + +Once you have configured the `functions_worker.yml` file, you can start or restart your broker. + +And then you can use the following command to verify if `functions-worker` is running well. + +```bash +curl :8080/admin/v2/worker/cluster +``` + +After entering the command above, a list of active function workers in the cluster is returned. The output is similar to the following. + +```json +[{"workerId":"","workerHostname":"","port":8080}] +``` + +## Run Functions-worker separately + +This section illustrates how to run `functions-worker` as a separate process in separate machines. + +![assets/functions-worker-separated.png](assets/functions-worker-separated.png) + +> Note +> In this mode, make sure `functionsWorkerEnabled` is set to `false`, so you won't start `functions-worker` with brokers by mistake. + +### Configure Functions-worker to run separately + +To run function-worker separately, you have to configure the following parameters. + +#### Worker parameters + +- `workerId`: The type is string. It is unique across clusters, which is used to identify a worker machine. +- `workerHostname`: The hostname of the worker machine. +- `workerPort`: The port that the worker server listens on. Keep it as default if you don't customize it. +- `workerPortTls`: The TLS port that the worker server listens on. Keep it as default if you don't customize it. + +#### Function package parameter + +- `numFunctionPackageReplicas`: The number of replicas to store function packages. The default value is `1`. + +#### Function metadata parameter + +- `pulsarServiceUrl`: The Pulsar service URL for your broker cluster. +- `pulsarWebServiceUrl`: The Pulser web service URL for your broker cluster. +- `pulsarFunctionsCluster`: Set the value to your Pulsar cluster name (same as the `clusterName` setting in the broker configuration). + +If authentication is enabled for your broker cluster, you *should* configure the authentication plugin and parameters for the functions worker to communicate with the brokers. + +- `clientAuthenticationPlugin` +- `clientAuthenticationParameters` + +#### Security settings + +If you want to enable security on functions workers, you *should*: +- [Enable TLS transport encryption](#enable-tls-transport-encryption) +- [Enable Authentication Provider](#enable-authentication-provider) +- [Enable Authorization Provider](#enable-authorization-provider) +- [Enable End-to-End Encryption](#enable-end-to-end-encryption) + +##### Enable TLS transport encryption + +To enable TLS transport encryption, configure the following settings. + +``` +useTLS: true +pulsarServiceUrl: pulsar+ssl://localhost:6651/ +pulsarWebServiceUrl: https://localhost:8443 + +tlsEnabled: true +tlsCertificateFilePath: /path/to/functions-worker.cert.pem +tlsKeyFilePath: /path/to/functions-worker.key-pk8.pem +tlsTrustCertsFilePath: /path/to/ca.cert.pem + +// The path to trusted certificates used by the Pulsar client to authenticate with Pulsar brokers +brokerClientTrustCertsFilePath: /path/to/ca.cert.pem +``` + +For details on TLS encryption, refer to [Transport Encryption using TLS](security-tls-transport.md). + +##### Enable Authentication Provider + +To enable authentication on Functions Worker, you need to configure the following settings. + +> Note +> Substitute the *providers list* with the providers you want to enable. + +``` +authenticationEnabled: true +authenticationProviders: [ provider1, provider2 ] +``` + +For *TLS Authentication* provider, follow the example below to add the necessary settings. +See [TLS Authentication](security-tls-authentication.md) for more details. + +``` +brokerClientAuthenticationPlugin: org.apache.pulsar.client.impl.auth.AuthenticationTls +brokerClientAuthenticationParameters: tlsCertFile:/path/to/admin.cert.pem,tlsKeyFile:/path/to/admin.key-pk8.pem + +authenticationEnabled: true +authenticationProviders: ['org.apache.pulsar.broker.authentication.AuthenticationProviderTls'] +``` + +For *SASL Authentication* provider, add `saslJaasClientAllowedIds` and `saslJaasBrokerSectionName` +under `properties` if needed. + +``` +properties: + saslJaasClientAllowedIds: .*pulsar.* + saslJaasBrokerSectionName: Broker +``` + +For *Token Authentication* provider, add necessary settings for `properties` if needed. +See [Token Authentication](security-jwt.md) for more details. +``` +properties: + tokenSecretKey: file://my/secret.key + # If using public/private + # tokenPublicKey: file:///path/to/public.key +``` + +##### Enable Authorization Provider + +To enable authorization on Functions Worker, you need to configure `authorizationEnabled`, `authorizationProvider` and `configurationStoreServers`. The authentication provider connects to `configurationStoreServers` to receive namespace policies. + +```yaml +authorizationEnabled: true +authorizationProvider: org.apache.pulsar.broker.authorization.PulsarAuthorizationProvider +configurationStoreServers: +``` + +You should also configure a list of superuser roles. The superuser roles are able to access any admin API. The following is a configuration example. + +```yaml +superUserRoles: + - role1 + - role2 + - role3 +``` + +##### Enable End-to-End Encryption + +You can use the public and private key pair that the application configures to perform encryption. Only the consumers with a valid key can decrypt the encrypted messages. + +To enable End-to-End encryption on Functions Worker, you can set it by specifying `--producer-config` in the command line terminal, for more information, please refer to [here](security-encryption.md). + +We include the relevant configuration information of `CryptoConfig` into `ProducerConfig`. The specific configurable field information about `CryptoConfig` is as follows: + +```text +public class CryptoConfig { + private String cryptoKeyReaderClassName; + private Map cryptoKeyReaderConfig; + + private String[] encryptionKeys; + private ProducerCryptoFailureAction producerCryptoFailureAction; + + private ConsumerCryptoFailureAction consumerCryptoFailureAction; +} +``` + +- `producerCryptoFailureAction`: define the action if producer fail to encrypt data one of `FAIL`, `SEND`. +- `consumerCryptoFailureAction`: define the action if consumer fail to decrypt data one of `FAIL`, `DISCARD`, `CONSUME`. + +#### BookKeeper Authentication + +If authentication is enabled on the BookKeeper cluster, you need configure the BookKeeper authentication settings as follows: + +- `bookkeeperClientAuthenticationPlugin`: the plugin name of BookKeeper client authentication. +- `bookkeeperClientAuthenticationParametersName`: the plugin parameters name of BookKeeper client authentication. +- `bookkeeperClientAuthenticationParameters`: the plugin parameters of BookKeeper client authentication. + +### Start Functions-worker + +Once you have finished configuring the `functions_worker.yml` configuration file, you can use the following command to start a `functions-worker`: + +```bash +bin/pulsar functions-worker +``` + +### Configure Proxies for Functions-workers + +When you are running `functions-worker` in a separate cluster, the admin rest endpoints are split into two clusters. `functions`, `function-worker`, `source` and `sink` endpoints are now served +by the `functions-worker` cluster, while all the other remaining endpoints are served by the broker cluster. +Hence you need to configure your `pulsar-admin` to use the right service URL accordingly. + +In order to address this inconvenience, you can start a proxy cluster for routing the admin rest requests accordingly. Hence you will have one central entry point for your admin service. + +If you already have a proxy cluster, continue reading. If you haven't setup a proxy cluster before, you can follow the [instructions](http://pulsar.apache.org/docs/en/administration-proxy/) to +start proxies. + +![assets/functions-worker-separated.png](assets/functions-worker-separated-proxy.png) + +To enable routing functions related admin requests to `functions-worker` in a proxy, you can edit the `proxy.conf` file to modify the following settings: + +```conf +functionWorkerWebServiceURL= +functionWorkerWebServiceURLTLS= +``` + +## Compare the Run-with-Broker and Run-separately modes + +As described above, you can run Function-worker with brokers, or run it separately. And it is more convenient to run functions-workers along with brokers. However, running functions-workers in a separate cluster provides better resource isolation for running functions in `Process` or `Thread` mode. + +Use which mode for your cases, refer to the following guidelines to determine. + +Use the `Run-with-Broker` mode in the following cases: +- a) if resource isolation is not required when running functions in `Process` or `Thread` mode; +- b) if you configure the functions-worker to run functions on Kubernetes (where the resource isolation problem is addressed by Kubernetes). + +Use the `Run-separately` mode in the following cases: +- a) you don't have a Kubernetes cluster; +- b) if you want to run functions and brokers separately. + +## Troubleshooting + +**Error message: Namespace missing local cluster name in clusters list** + +``` +Failed to get partitioned topic metadata: org.apache.pulsar.client.api.PulsarClientException$BrokerMetadataException: Namespace missing local cluster name in clusters list: local_cluster=xyz ns=public/functions clusters=[standalone] +``` + +The error message prompts when either of the cases occurs: +- a) a broker is started with `functionsWorkerEnabled=true`, but the `pulsarFunctionsCluster` is not set to the correct cluster in the `conf/functions_worker.yaml` file; +- b) setting up a geo-replicated Pulsar cluster with `functionsWorkerEnabled=true`, while brokers in one cluster run well, brokers in the other cluster do not work well. + +**Workaround** + +If any of these cases happens, follow the instructions below to fix the problem: + +1. Get the current clusters list of `public/functions` namespace. + +```bash +bin/pulsar-admin namespaces get-clusters public/functions +``` + +2. Check if the cluster is in the clusters list. If the cluster is not in the list, add it to the list and update the clusters list. + +```bash +bin/pulsar-admin namespaces set-clusters --cluster=, public/functions +``` + +3. Set the correct cluster name in `pulsarFunctionsCluster` in the `conf/functions_worker.yml` file. diff --git a/site2/website/versioned_docs/version-2.7.0/getting-started-clients.md b/site2/website/versioned_docs/version-2.7.0/getting-started-clients.md new file mode 100644 index 00000000000000..64bfb353c53ce7 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/getting-started-clients.md @@ -0,0 +1,35 @@ +--- +id: version-2.7.0-client-libraries +title: Pulsar client libraries +sidebar_label: Use Pulsar with client libraries +original_id: client-libraries +--- + +Pulsar supports the following client libraries: + +- [Java client](client-libraries-java.md) +- [Go client](client-libraries-go.md) +- [Python client](client-libraries-python.md) +- [C++ client](client-libraries-cpp.md) +- [Node.js client](client-libraries-node.md) +- [WebSocket client](client-libraries-websocket.md) +- [C# client](client-libraries-dotnet.md) + +## Feature matrix +Pulsar client feature matrix for different languages is listed on [Client Features Matrix](https://github.com/apache/pulsar/wiki/Client-Features-Matrix) page. + +## Third-party clients + +Besides the official released clients, multiple projects on developing Pulsar clients are available in different languages. + +> If you have developed a new Pulsar client, feel free to submit a pull request and add your client to the list below. + +| Language | Project | Maintainer | License | Description | +|----------|---------|------------|---------|-------------| +| Go | [pulsar-client-go](https://github.com/Comcast/pulsar-client-go) | [Comcast](https://github.com/Comcast) | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) | A native golang client | +| Go | [go-pulsar](https://github.com/t2y/go-pulsar) | [t2y](https://github.com/t2y) | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) | +| Haskell | [supernova](https://github.com/cr-org/supernova) | [Chatroulette](https://github.com/cr-org) | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) | Native Pulsar client for Haskell | +| Scala | [neutron](https://github.com/cr-org/neutron) | [Chatroulette](https://github.com/cr-org) | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) | Purely functional Apache Pulsar client for Scala built on top of Fs2 | +| Scala | [pulsar4s](https://github.com/sksamuel/pulsar4s) | [sksamuel](https://github.com/sksamuel) | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) | Idomatic, typesafe, and reactive Scala client for Apache Pulsar | +| Rust | [pulsar-rs](https://github.com/wyyerd/pulsar-rs) | [Wyyerd Group](https://github.com/wyyerd) | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) | Future-based Rust bindings for Apache Pulsar | +| .NET | [pulsar-client-dotnet](https://github.com/fsharplang-ru/pulsar-client-dotnet) | [Lanayx](https://github.com/Lanayx) | [![GitHub](https://img.shields.io/badge/license-MIT-green.svg)](https://opensource.org/licenses/MIT) | Native .NET client for C#/F#/VB | diff --git a/site2/website/versioned_docs/version-2.7.0/getting-started-helm.md b/site2/website/versioned_docs/version-2.7.0/getting-started-helm.md new file mode 100644 index 00000000000000..1c932b8e9037a3 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/getting-started-helm.md @@ -0,0 +1,358 @@ +--- +id: version-2.7.0-kubernetes-helm +title: Get started in Kubernetes +sidebar_label: Run Pulsar in Kubernetes +original_id: kubernetes-helm +--- + +This section guides you through every step of installing and running Apache Pulsar with Helm on Kubernetes quickly, including the following sections: + +- Install the Apache Pulsar on Kubernetes using Helm +- Start and stop Apache Pulsar +- Create topics using `pulsar-admin` +- Produce and consume messages using Pulsar clients +- Monitor Apache Pulsar status with Prometheus and Grafana + +For deploying a Pulsar cluster for production usage, read the documentation on [how to configure and install a Pulsar Helm chart](helm-deploy.md). + +## Prerequisite + +- Kubernetes server 1.14.0+ +- kubectl 1.14.0+ +- Helm 3.0+ + +> #### Tip +> For the following steps, step 2 and step 3 are for **developers** and step 4 and step 5 are for **administrators**. + +## Step 0: Prepare a Kubernetes cluster + +Before installing a Pulsar Helm chart, you have to create a Kubernetes cluster. You can follow [the instructions](helm-prepare.md) to prepare a Kubernetes cluster. + +We use [Minikube](https://kubernetes.io/docs/getting-started-guides/minikube/) in this quick start guide. To prepare a Kubernetes cluster, follow these steps: + +1. Create a Kubernetes cluster on Minikube. + + ```bash + minikube start --memory=8192 --cpus=4 --kubernetes-version= + ``` + + The `` can be any [Kubernetes version supported by your Minikube installation](https://minikube.sigs.k8s.io/docs/reference/configuration/kubernetes/), such as `v1.16.1`. + +2. Set `kubectl` to use Minikube. + + ```bash + kubectl config use-context minikube + ``` + +3. To use the [Kubernetes Dashboard](https://kubernetes.io/docs/tasks/access-application-cluster/web-ui-dashboard/) with the local Kubernetes cluster on Minikube, enter the command below: + + ```bash + minikube dashboard + ``` + The command automatically triggers opening a webpage in your browser. + +## Step 1: Install Pulsar Helm chart + +0. Add Pulsar charts repo. + + ```bash + helm repo add apache https://pulsar.apache.org/charts + ``` + + ```bash + helm repo update + ``` + +1. Clone the Pulsar Helm chart repository. + + ```bash + git clone https://github.com/apache/pulsar-helm-chart + cd pulsar-helm-chart + ``` + +2. Run the script `prepare_helm_release.sh` to create secrets required for installing the Apache Pulsar Helm chart. The username `pulsar` and password `pulsar` are used for logging into the Grafana dashboard and Pulsar Manager. + + ```bash + ./scripts/pulsar/prepare_helm_release.sh \ + -n pulsar \ + -k pulsar-mini \ + -c + ``` + +3. Use the Pulsar Helm chart to install a Pulsar cluster to Kubernetes. + + ```bash + helm install \ + --values examples/values-minikube.yaml \ + pulsar-mini apache/pulsar + ``` + +4. Check the status of all pods. + + ```bash + kubectl get pods -n pulsar + ``` + + If all pods start up successfully, you can see that the `STATUS` is changed to `Running` or `Completed`. + + **Output** + + ```bash + NAME READY STATUS RESTARTS AGE + pulsar-mini-bookie-0 1/1 Running 0 9m27s + pulsar-mini-bookie-init-5gphs 0/1 Completed 0 9m27s + pulsar-mini-broker-0 1/1 Running 0 9m27s + pulsar-mini-grafana-6b7bcc64c7-4tkxd 1/1 Running 0 9m27s + pulsar-mini-prometheus-5fcf5dd84c-w8mgz 1/1 Running 0 9m27s + pulsar-mini-proxy-0 1/1 Running 0 9m27s + pulsar-mini-pulsar-init-t7cqt 0/1 Completed 0 9m27s + pulsar-mini-pulsar-manager-9bcbb4d9f-htpcs 1/1 Running 0 9m27s + pulsar-mini-toolset-0 1/1 Running 0 9m27s + pulsar-mini-zookeeper-0 1/1 Running 0 9m27s + ``` + +5. Check the status of all services in the namespace `pulsar`. + + ```bash + kubectl get services -n pulsar + ``` + + **Output** + + ```bash + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE + pulsar-mini-bookie ClusterIP None 3181/TCP,8000/TCP 11m + pulsar-mini-broker ClusterIP None 8080/TCP,6650/TCP 11m + pulsar-mini-grafana LoadBalancer 10.106.141.246 3000:31905/TCP 11m + pulsar-mini-prometheus ClusterIP None 9090/TCP 11m + pulsar-mini-proxy LoadBalancer 10.97.240.109 80:32305/TCP,6650:31816/TCP 11m + pulsar-mini-pulsar-manager LoadBalancer 10.103.192.175 9527:30190/TCP 11m + pulsar-mini-toolset ClusterIP None 11m + pulsar-mini-zookeeper ClusterIP None 2888/TCP,3888/TCP,2181/TCP 11m + ``` + +## Step 2: Use pulsar-admin to create Pulsar tenants/namespaces/topics + +`pulsar-admin` is the CLI (command-Line Interface) tool for Pulsar. In this step, you can use `pulsar-admin` to create resources, including tenants, namespaces, and topics. + +1. Enter the `toolset` container. + + ```bash + kubectl exec -it -n pulsar pulsar-mini-toolset-0 -- /bin/bash + ``` + +2. In the `toolset` container, create a tenant named `apache`. + + ```bash + bin/pulsar-admin tenants create apache + ``` + + Then you can list the tenants to see if the tenant is created successfully. + + ```bash + bin/pulsar-admin tenants list + ``` + + You should see a similar output as below. The tenant `apache` has been successfully created. + + ```bash + "apache" + "public" + "pulsar" + ``` + +3. In the `toolset` container, create a namespace named `pulsar` in the tenant `apache`. + + ```bash + bin/pulsar-admin namespaces create apache/pulsar + ``` + + Then you can list the namespaces of tenant `apache` to see if the namespace is created successfully. + + ```bash + bin/pulsar-admin namespaces list apache + ``` + + You should see a similar output as below. The namespace `apache/pulsar` has been successfully created. + + ```bash + "apache/pulsar" + ``` + +4. In the `toolset` container, create a topic `test-topic` with `4` partitions in the namespace `apache/pulsar`. + + ```bash + bin/pulsar-admin topics create-partitioned-topic apache/pulsar/test-topic -p 4 + ``` + +5. In the `toolset` container, list all the partitioned topics in the namespace `apache/pulsar`. + + ```bash + bin/pulsar-admin topics list-partitioned-topics apache/pulsar + ``` + + Then you can see all the partitioned topics in the namespace `apache/pulsar`. + + ```bash + "persistent://apache/pulsar/test-topic" + ``` + +## Step 3: Use Pulsar client to produce and consume messages + +You can use the Pulsar client to create producers and consumers to produce and consume messages. + +By default, the Pulsar Helm chart exposes the Pulsar cluster through a Kubernetes `LoadBalancer`. In Minikube, you can use the following command to check the proxy service. + +```bash +kubectl get services -n pulsar | grep pulsar-mini-proxy +``` + +You will see a similar output as below. + +```bash +pulsar-mini-proxy LoadBalancer 10.97.240.109 80:32305/TCP,6650:31816/TCP 28m +``` + +This output tells what are the node ports that Pulsar cluster's binary port and HTTP port are mapped to. The port after `80:` is the HTTP port while the port after `6650:` is the binary port. + +Then you can find the IP address and exposed ports of your Minikube server by running the following command. + +```bash +minikube service pulsar-mini-proxy -n pulsar +``` + +**Output** + +```bash +|-----------|-------------------|-------------|-------------------------| +| NAMESPACE | NAME | TARGET PORT | URL | +|-----------|-------------------|-------------|-------------------------| +| pulsar | pulsar-mini-proxy | http/80 | http://172.17.0.4:32305 | +| | | pulsar/6650 | http://172.17.0.4:31816 | +|-----------|-------------------|-------------|-------------------------| +🏃 Starting tunnel for service pulsar-mini-proxy. +|-----------|-------------------|-------------|------------------------| +| NAMESPACE | NAME | TARGET PORT | URL | +|-----------|-------------------|-------------|------------------------| +| pulsar | pulsar-mini-proxy | | http://127.0.0.1:61853 | +| | | | http://127.0.0.1:61854 | +|-----------|-------------------|-------------|------------------------| +``` + +At this point, you can get the service URLs to connect to your Pulsar client. Here are URL examples: +``` +webServiceUrl=http://127.0.0.1:61853/ +brokerServiceUrl=pulsar://127.0.0.1:61854/ +``` + +Then you can proceed with the following steps: + +1. Download the Apache Pulsar tarball from the [downloads page](https://pulsar.apache.org/en/download/). + +2. Decompress the tarball based on your download file. + + ```bash + tar -xf .tar.gz + ``` + +3. Expose `PULSAR_HOME`. + + (1) Enter the directory of the decompressed download file. + + (2) Expose `PULSAR_HOME` as the environment variable. + + ```bash + export PULSAR_HOME=$(pwd) + ``` + +4. Configure the Pulsar client. + + In the `${PULSAR_HOME}/conf/client.conf` file, replace `webServiceUrl` and `brokerServiceUrl` with the service URLs you get from the above steps. + +5. Create a subscription to consume messages from `apache/pulsar/test-topic`. + + ```bash + bin/pulsar-client consume -s sub apache/pulsar/test-topic -n 0 + ``` + +6. Open a new terminal. In the new terminal, create a producer and send 10 messages to the `test-topic` topic. + + ```bash + bin/pulsar-client produce apache/pulsar/test-topic -m "---------hello apache pulsar-------" -n 10 + ``` + +7. Verify the results. + + - From the producer side + + **Output** + + The messages have been produced successfully. + + ```bash + 18:15:15.489 [main] INFO org.apache.pulsar.client.cli.PulsarClientTool - 10 messages successfully produced + ``` + + - From the consumer side + + **Output** + + At the same time, you can receive the messages as below. + + ```bash + ----- got message ----- + ---------hello apache pulsar------- + ----- got message ----- + ---------hello apache pulsar------- + ----- got message ----- + ---------hello apache pulsar------- + ----- got message ----- + ---------hello apache pulsar------- + ----- got message ----- + ---------hello apache pulsar------- + ----- got message ----- + ---------hello apache pulsar------- + ----- got message ----- + ---------hello apache pulsar------- + ----- got message ----- + ---------hello apache pulsar------- + ----- got message ----- + ---------hello apache pulsar------- + ----- got message ----- + ---------hello apache pulsar------- + ``` + +## Step 4: Use Pulsar Manager to manage the cluster + +[Pulsar Manager](administration-pulsar-manager.md) is a web-based GUI management tool for managing and monitoring Pulsar. + +1. By default, the `Pulsar Manager` is exposed as a separate `LoadBalancer`. You can open the Pulsar Manager UI using the following command: + + ```bash + minikube service -n pulsar pulsar-mini-pulsar-manager + ``` + +2. The Pulsar Manager UI will be open in your browser. You can use the username `pulsar` and password `pulsar` to log into Pulsar Manager. + +3. In Pulsar Manager UI, you can create an environment. + + - Click `New Environment` button in the top-left corner. + - Type `pulsar-mini` for the field `Environment Name` in the popup window. + - Type `http://pulsar-mini-broker:8080` for the field `Service URL` in the popup window. + - Click `Confirm` button in the popup window. + +4. After successfully created an environment, you are redirected to the `tenants` page of that environment. Then you can create `tenants`, `namespaces` and `topics` using the Pulsar Manager. + +## Step 5: Use Prometheus and Grafana to monitor cluster + +Grafana is an open-source visualization tool, which can be used for visualizing time series data into dashboards. + +1. By default, the Grafana is exposed as a separate `LoadBalancer`. You can open the Grafana UI using the following command: + + ```bash + minikube service pulsar-mini-grafana -n pulsar + ``` + +2. The Grafana UI is open in your browser. You can use the username `pulsar` and password `pulsar` to log into the Grafana Dashboard. + +3. You can view dashboards for different components of a Pulsar cluster. diff --git a/site2/website/versioned_docs/version-2.7.0/helm-deploy.md b/site2/website/versioned_docs/version-2.7.0/helm-deploy.md new file mode 100644 index 00000000000000..44ea86cf30dd96 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/helm-deploy.md @@ -0,0 +1,375 @@ +--- +id: version-2.7.0-helm-deploy +title: Deploy Pulsar cluster using Helm +sidebar_label: Deployment +original_id: helm-deploy +--- + +Before running `helm install`, you need to decide how to run Pulsar. +Options can be specified using Helm's `--set option.name=value` command line option. + +## Select configuration options + +In each section, collect the options that are combined to use with the `helm install` command. + +### Kubernetes namespace + +By default, the Pulsar Helm chart is installed to a namespace called `pulsar`. + +```yaml +namespace: pulsar +``` + +To install the Pulsar Helm chart into a different Kubernetes namespace, you can include this option in the `helm install` command. + +```bash +--set namespace= +``` + +By default, the Pulsar Helm chart doesn't create the namespace. + +```yaml +namespaceCreate: false +``` + +To use the Pulsar Helm chart to create the Kubernetes namespace automatically, you can include this option in the `helm install` command. + +```bash +--set namespaceCreate=true +``` + +### Persistence + +By default, the Pulsar Helm chart creates Volume Claims with the expectation that a dynamic provisioner creates the underlying Persistent Volumes. + +```yaml +volumes: + persistence: true + # configure the components to use local persistent volume + # the local provisioner should be installed prior to enable local persistent volume + local_storage: false +``` + +To use local persistent volumes as the persistent storage for Helm release, you can install the [local storage provisioner](#install-local-storage-provisioner) and include the following option in the `helm install` command. + +```bash +--set volumes.local_storage=true +``` + +> #### Note +> +> Before installing the production instance of Pulsar, ensure to plan the storage settings to avoid extra storage migration work. Because after initial installation, you must edit Kubernetes objects manually if you want to change storage settings. + +The Pulsar Helm chart is designed for production use. To use the Pulsar Helm chart in a development environment (such as Minikube), you can disable persistence by including this option in your `helm install` command. + +```bash +--set volumes.persistence=false +``` + +### Affinity + +By default, `anti-affinity` is enabled to ensure pods of the same component can run on different nodes. + +```yaml +affinity: + anti_affinity: true +``` + +To use the Pulsar Helm chart in a development environment (such as Minikue), you can disable `anti-affinity` by including this option in your `helm install` command. + +```bash +--set affinity.anti_affinity=false +``` + +### Components + +The Pulsar Helm chart is designed for production usage. It deploys a production-ready Pulsar cluster, including Pulsar core components and monitoring components. + +You can customize the components to be deployed by turning on/off individual components. + +```yaml +## Components +## +## Control what components of Apache Pulsar to deploy for the cluster +components: + # zookeeper + zookeeper: true + # bookkeeper + bookkeeper: true + # bookkeeper - autorecovery + autorecovery: true + # broker + broker: true + # functions + functions: true + # proxy + proxy: true + # toolset + toolset: true + # pulsar manager + pulsar_manager: true + +## Monitoring Components +## +## Control what components of the monitoring stack to deploy for the cluster +monitoring: + # monitoring - prometheus + prometheus: true + # monitoring - grafana + grafana: true +``` + +### Docker images + +The Pulsar Helm chart is designed to enable controlled upgrades. So it can configure independent image versions for components. You can customize the images by setting individual component. + +```yaml +## Images +## +## Control what images to use for each component +images: + zookeeper: + repository: apachepulsar/pulsar-all + tag: 2.5.0 + pullPolicy: IfNotPresent + bookie: + repository: apachepulsar/pulsar-all + tag: 2.5.0 + pullPolicy: IfNotPresent + autorecovery: + repository: apachepulsar/pulsar-all + tag: 2.5.0 + pullPolicy: IfNotPresent + broker: + repository: apachepulsar/pulsar-all + tag: 2.5.0 + pullPolicy: IfNotPresent + proxy: + repository: apachepulsar/pulsar-all + tag: 2.5.0 + pullPolicy: IfNotPresent + functions: + repository: apachepulsar/pulsar-all + tag: 2.5.0 + prometheus: + repository: prom/prometheus + tag: v1.6.3 + pullPolicy: IfNotPresent + grafana: + repository: streamnative/apache-pulsar-grafana-dashboard-k8s + tag: 0.0.4 + pullPolicy: IfNotPresent + pulsar_manager: + repository: apachepulsar/pulsar-manager + tag: v0.1.0 + pullPolicy: IfNotPresent + hasCommand: false +``` + +### TLS + +The Pulsar Helm chart can be configured to enable TLS (Transport Layer Security) to protect all the traffic between components. Before enabling TLS, you have to provision TLS certificates for the required components. + +#### Provision TLS certificates using cert-manager + +To use the `cert-manager` to provision the TLS certificates, you have to install the [cert-manager](#install-cert-manager) before installing the Pulsar Helm chart. After successfully installing the cert-manager, you can set `certs.internal_issuer.enabled` to `true`. Therefore, the Pulsar Helm chart can use the `cert-manager` to generate `selfsigning` TLS certificates for the configured components. + +```yaml +certs: + internal_issuer: + enabled: false + component: internal-cert-issuer + type: selfsigning +``` +You can also customize the generated TLS certificates by configuring the fields as the following. + +```yaml +tls: + # common settings for generating certs + common: + # 90d + duration: 2160h + # 15d + renewBefore: 360h + organization: + - pulsar + keySize: 4096 + keyAlgorithm: rsa + keyEncoding: pkcs8 +``` + +#### Enable TLS + +After installing the `cert-manager`, you can set `tls.enabled` to `true` to enable TLS encryption for the entire cluster. + +```yaml +tls: + enabled: false +``` + +You can also configure whether to enable TLS encryption for individual component. + +```yaml +tls: + # settings for generating certs for proxy + proxy: + enabled: false + cert_name: tls-proxy + # settings for generating certs for broker + broker: + enabled: false + cert_name: tls-broker + # settings for generating certs for bookies + bookie: + enabled: false + cert_name: tls-bookie + # settings for generating certs for zookeeper + zookeeper: + enabled: false + cert_name: tls-zookeeper + # settings for generating certs for recovery + autorecovery: + cert_name: tls-recovery + # settings for generating certs for toolset + toolset: + cert_name: tls-toolset +``` + +### Authentication + +By default, authentication is disabled. You can set `auth.authentication.enabled` to `true` to enable authentication. +Currently, the Pulsar Helm chart only supports JWT authentication provider. You can set `auth.authentication.provider` to `jwt` to use the JWT authentication provider. + +```yaml +# Enable or disable broker authentication and authorization. +auth: + authentication: + enabled: false + provider: "jwt" + jwt: + # Enable JWT authentication + # If the token is generated by a secret key, set the usingSecretKey as true. + # If the token is generated by a private key, set the usingSecretKey as false. + usingSecretKey: false + superUsers: + # broker to broker communication + broker: "broker-admin" + # proxy to broker communication + proxy: "proxy-admin" + # pulsar-admin client to broker/proxy communication + client: "admin" +``` + +To enable authentication, you can run [prepare helm release](#prepare-the-helm-release) to generate token secret keys and tokens for three super users specified in the `auth.superUsers` field. The generated token keys and super user tokens are uploaded and stored as Kubernetes secrets prefixed with `-token-`. You can use the following command to find those secrets. + +```bash +kubectl get secrets -n +``` + +### Authorization + +By default, authorization is disabled. Authorization can be enabled only when authentication is enabled. + +```yaml +auth: + authorization: + enabled: false +``` + +To enable authorization, you can include this option in the `helm install` command. + +```bash +--set auth.authorization.enabled=true +``` + +### CPU and RAM resource requirements + +By default, the resource requests and the number of replicas for the Pulsar components in the Pulsar Helm chart are adequate for a small production deployment. If you deploy a non-production instance, you can reduce the defaults to fit into a smaller cluster. + +Once you have all of your configuration options collected, you can install dependent charts before installing the Pulsar Helm chart. + +## Install dependent charts + +### Install local storage provisioner + +To use local persistent volumes as the persistent storage, you need to install a storage provisioner for [local persistent volumes](https://kubernetes.io/blog/2019/04/04/kubernetes-1.14-local-persistent-volumes-ga/). + +One of the easiest way to get started is to use the local storage provisioner provided along with the Pulsar Helm chart. + +``` +helm repo add streamnative https://charts.streamnative.io +helm repo update +helm install pulsar-storage-provisioner streamnative/local-storage-provisioner +``` + +### Install cert-manager + +The Pulsar Helm chart uses the [cert-manager](https://github.com/jetstack/cert-manager) to provision and manage TLS certificates automatically. To enable TLS encryption for brokers or proxies, you need to install the cert-manager in advance. + +For details about how to install the cert-manager, follow the [official instructions](https://cert-manager.io/docs/installation/kubernetes/#installing-with-helm). + +Alternatively, we provide a bash script [install-cert-manager.sh](https://github.com/apache/pulsar-helm-chart/blob/master/scripts/cert-manager/install-cert-manager.sh) to install a cert-manager release to the namespace `cert-manager`. + +```bash +git clone https://github.com/apache/pulsar-helm-chart +cd pulsar-helm-chart +./scripts/cert-manager/install-cert-manager.sh +``` + +## Prepare Helm release + +Once you have install all the dependent charts and collected all of your configuration options, you can run [prepare_helm_release.sh](https://github.com/apache/pulsar-helm-chart/blob/master/scripts/pulsar/prepare_helm_release.sh) to prepare the Helm release. + +```bash +git clone https://github.com/apache/pulsar-helm-chart +cd pulsar-helm-chart +./scripts/pulsar/prepare_helm_release.sh -n -k +``` + +The `prepare_helm_release` creates the following resources: + +- A Kubernetes namespace for installing the Pulsar release +- JWT secret keys and tokens for three super users: `broker-admin`, `proxy-admin`, and `admin`. By default, it generates an asymmetric pubic/private key pair. You can choose to generate a symmetric secret key by specifying `--symmetric`. + - `proxy-admin` role is used for proxies to communicate to brokers. + - `broker-admin` role is used for inter-broker communications. + - `admin` role is used by the admin tools. + +## Deploy Pulsar cluster using Helm + +Once you have finished the following three things, you can install a Helm release. + +- Collect all of your configuration options. +- Install dependent charts. +- Prepare the Helm release. + +In this example, we name our Helm release `pulsar`. + +```bash +helm repo add apache https://pulsar.apache.org/charts +helm repo update +helm upgrade --install pulsar apache/pulsar \ + --timeout 10m \ + --set [your configuration options] +``` + +You can also use the `--version ` option if you want to install a specific version of Pulsar Helm chart. + +## Monitor deployment + +A list of installed resources are output once the Pulsar cluster is deployed. This may take 5-10 minutes. + +The status of the deployment can be checked by running the `helm status pulsar` command, which can also be done while the deployment is taking place if you run the command in another terminal. + +## Access Pulsar cluster + +The default values will create a `ClusterIP` for the following resources, which you can use to interact with the cluster. + +- Proxy: You can use the IP address to produce and consume messages to the installed Pulsar cluster. +- Pulsar Manager: You can access the Pulsar Manager UI at `http://:9527`. +- Grafana Dashboard: You can access the Grafana dashboard at `http://:3000`. + +To find the IP addresses of those components, run the following command: + +```bash +kubectl get service -n +``` diff --git a/site2/website/versioned_docs/version-2.7.0/helm-overview.md b/site2/website/versioned_docs/version-2.7.0/helm-overview.md new file mode 100644 index 00000000000000..c97e5106c629c2 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/helm-overview.md @@ -0,0 +1,100 @@ +--- +id: version-2.7.0-helm-overview +title: Apache Pulsar Helm Chart +sidebar_label: Overview +original_id: helm-overview +--- + +This is the official supported Helm chart to install Apache Pulsar on a cloud-native environment. It was enhanced based on StreamNative's [Helm Chart](https://github.com/streamnative/charts). + +## Introduction + +The Apache Pulsar Helm chart is one of the most convenient ways to operate Pulsar on Kubernetes. This Pulsar Helm chart contains all the required components to get started and can scale to large deployments. + +This chart includes all the components for a complete experience, but each part can be configured to be installed separately. + +- Pulsar core components: + - ZooKeeper + - Bookies + - Brokers + - Function workers + - Proxies +- Control Center: + - Pulsar Manager + - Prometheus + - Grafana + +It includes support for: + +- Security + - Automatically provisioned TLS certificates, using [Jetstack](https://www.jetstack.io/)'s [cert-manager](https://cert-manager.io/docs/) + - self-signed + - [Let's Encrypt](https://letsencrypt.org/) + - TLS Encryption + - Proxy + - Broker + - Toolset + - Bookie + - ZooKeeper + - Authentication + - JWT + - Authorization +- Storage + - Non-persistence storage + - Persistence volume + - Local persistent volumes +- Functions + - Kubernetes Runtime + - Process Runtime + - Thread Runtime +- Operations + - Independent image versions for all components, enabling controlled upgrades + +## Pulsar Helm chart quick start + +To get up and run with these charts as fast as possible, in a **non-production** use case, we provide a [quick start guide](getting-started-helm.md) for Proof of Concept (PoC) deployments. + +This guide walks the user through deploying these charts with default values and features, but *does not* meet production ready requirements. To deploy these charts into production under sustained load, follow the complete [Installation Guide](helm-install.md). + +## Troubleshooting + +We have done our best to make these charts as seamless as possible. Occasionally, troubles do go outside of our control. We have collected tips and tricks for troubleshooting common issues. Please check them first before raising an [issue](https://github.com/apache/pulsar/issues/new/choose), and feel free to add to them by raising a [Pull Request](https://github.com/apache/pulsar/compare). + +## Installation + +The Apache Pulsar Helm chart contains all required dependencies. + +If you deploy a PoC for testing, we strongly suggest you follow our [Quick Start Guide](getting-started-helm.md) for your first iteration. + +1. [Preparation](helm-prepare.md) +2. [Deployment](helm-deploy.md) + +## Upgrading + +Once the Pulsar Helm chart is installed, use the `helm upgrade` to complete configuration changes and chart updates. + +```bash +helm repo add apache https://pulsar.apache.org/charts +helm repo update +helm get values > pulsar.yaml +helm upgrade apache/pulsar -f pulsar.yaml +``` + +For more detailed information, see [Upgrading](helm-upgrade.md). + +## Uninstallation + +To uninstall the Pulsar Helm chart, run the following command: + +```bash +helm delete +``` + +For the purposes of continuity, these charts have some Kubernetes objects that cannot be removed when performing `helm delete`. +It is recommended to *conciously* remove these items, as they affect re-deployment. + +* PVCs for stateful data: *consciously* remove these items. + - ZooKeeper: This is your metadata. + - BookKeeper: This is your data. + - Prometheus: This is your metrics data, which can be safely removed. +* Secrets: if the secrets are generated by the [prepare release script](https://github.com/apache/pulsar-helm-chart/blob/master/scripts/pulsar/prepare_helm_release.sh), they contain secret keys and tokens. You can use the [cleanup release script](https://github.com/apache/pulsar-helm-chart/blob/master/scripts/pulsar/cleanup_helm_release.sh) to remove these secrets and tokens as needed. \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/helm-upgrade.md b/site2/website/versioned_docs/version-2.7.0/helm-upgrade.md new file mode 100644 index 00000000000000..4eb30d4f85945c --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/helm-upgrade.md @@ -0,0 +1,34 @@ +--- +id: version-2.7.0-helm-upgrade +title: Upgrade Pulsar Helm release +sidebar_label: Upgrade +original_id: helm-upgrade +--- + +Before upgrading your Pulsar installation, you need to check the change log corresponding to the specific release you want to upgrade to and look for any release notes that might pertain to the new Pulsar helm chart version. + +We also recommend that you need to provide all values using the `helm upgrade --set key=value` syntax or the `-f values.yml` instead of using `--reuse-values`, because some of the current values might be deprecated. + +> #### Note +> +> You can retrieve your previous `--set` arguments cleanly, with `helm get values `. If you direct this into a file (`helm get values > pulsar.yml`), you can safely +pass this file through `-f`. Thus `helm upgrade apache/pulsar -f pulsar.yaml`. This safely replaces the behavior of `--reuse-values`. + +## Steps + +To upgrade Apache Pulsar to a newer version, follow these steps: + +1. Check the change log for the specific version you would like to upgrade to. +2. Go through [deployment documentation](helm-deploy.md) step by step. +3. Extract your previous `--set` arguments with the following command. + ```bash + helm get values > pulsar.yaml + ``` +4. Decide all the values you need to set. +5. Perform the upgrade, with all `--set` arguments extracted in step 4. + ```bash + helm upgrade apache/pulsar \ + --version \ + -f pulsar.yaml \ + --set ... + ``` \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/io-cli.md b/site2/website/versioned_docs/version-2.7.0/io-cli.md new file mode 100644 index 00000000000000..4519e591da723e --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/io-cli.md @@ -0,0 +1,606 @@ +--- +id: version-2.7.0-io-cli +title: Connector Admin CLI +sidebar_label: CLI +original_id: io-cli +--- + +The `pulsar-admin` tool helps you manage Pulsar connectors. + +## `sources` + +An interface for managing Pulsar IO sources (ingress data into Pulsar). + +```bash +$ pulsar-admin sources subcommands +``` + +Subcommands are: + +* `create` + +* `update` + +* `delete` + +* `get` + +* `status` + +* `list` + +* `stop` + +* `start` + +* `restart` + +* `localrun` + +* `available-sources` + +* `reload` + + +### `create` + +Submit a Pulsar IO source connector to run in a Pulsar cluster. + +#### Usage + +```bash +$ pulsar-admin sources create options +``` + +#### Options + +|Flag|Description| +|----|---| +| `-a`, `--archive` | The path to the NAR archive for the source.
    It also supports url-path (http/https/file [file protocol assumes that file already exists on worker host]) from which worker can download the package. +| `--classname` | The source's class name if `archive` is file-url-path (file://). +| `--cpu` | The CPU (in cores) that needs to be allocated per source instance (applicable only to Docker runtime). +| `--deserialization-classname` | The SerDe classname for the source. +| `--destination-topic-name` | The Pulsar topic to which data is sent. +| `--disk` | The disk (in bytes) that needs to be allocated per source instance (applicable only to Docker runtime). +|`--name` | The source's name. +| `--namespace` | The source's namespace. +| ` --parallelism` | The source's parallelism factor, that is, the number of source instances to run. +| `--processing-guarantees` | The processing guarantees (also named as delivery semantics) applied to the source. A source connector receives messages from external system and writes messages to a Pulsar topic. The `--processing-guarantees` is used to ensure the processing guarantees for writing messages to the Pulsar topic.
    The available values are ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE. +| `--ram` | The RAM (in bytes) that needs to be allocated per source instance (applicable only to the process and Docker runtimes). +| `-st`, `--schema-type` | The schema type.
    Either a builtin schema (for example, AVRO and JSON) or custom schema class name to be used to encode messages emitted from source. +| `--source-config` | Source config key/values. +| `--source-config-file` | The path to a YAML config file specifying the source's configuration. +| `-t`, `--source-type` | The source's connector provider. +| `--tenant` | The source's tenant. +|`--producer-config`| The custom producer configuration (as a JSON string). + +### `update` + +Update a already submitted Pulsar IO source connector. + +#### Usage + +```bash +$ pulsar-admin sources update options +``` + +#### Options + +|Flag|Description| +|----|---| +| `-a`, `--archive` | The path to the NAR archive for the source.
    It also supports url-path (http/https/file [file protocol assumes that file already exists on worker host]) from which worker can download the package. +| `--classname` | The source's class name if `archive` is file-url-path (file://). +| `--cpu` | The CPU (in cores) that needs to be allocated per source instance (applicable only to Docker runtime). +| `--deserialization-classname` | The SerDe classname for the source. +| `--destination-topic-name` | The Pulsar topic to which data is sent. +| `--disk` | The disk (in bytes) that needs to be allocated per source instance (applicable only to Docker runtime). +|`--name` | The source's name. +| `--namespace` | The source's namespace. +| ` --parallelism` | The source's parallelism factor, that is, the number of source instances to run. +| `--processing-guarantees` | The processing guarantees (also named as delivery semantics) applied to the source. A source connector receives messages from external system and writes messages to a Pulsar topic. The `--processing-guarantees` is used to ensure the processing guarantees for writing messages to the Pulsar topic.
    The available values are ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE. +| `--ram` | The RAM (in bytes) that needs to be allocated per source instance (applicable only to the process and Docker runtimes). +| `-st`, `--schema-type` | The schema type.
    Either a builtin schema (for example, AVRO and JSON) or custom schema class name to be used to encode messages emitted from source. +| `--source-config` | Source config key/values. +| `--source-config-file` | The path to a YAML config file specifying the source's configuration. +| `-t`, `--source-type` | The source's connector provider. The `source-type` parameter of the currently built-in connectors is determined by the setting of the `name` parameter specified in the pulsar-io.yaml file. +| `--tenant` | The source's tenant. +| `--update-auth-data` | Whether or not to update the auth data.
    **Default value: false.** + + +### `delete` + +Delete a Pulsar IO source connector. + +#### Usage + +```bash +$ pulsar-admin sources delete options +``` + +#### Option + +|Flag|Description| +|---|---| +|`--name`|The source's name.| +|`--namespace`|The source's namespace.| +|`--tenant`|The source's tenant.| + +### `get` + +Get the information about a Pulsar IO source connector. + +#### Usage + +```bash +$ pulsar-admin sources get options +``` + +#### Options +|Flag|Description| +|---|---| +|`--name`|The source's name.| +|`--namespace`|The source's namespace.| +|`--tenant`|The source's tenant.| + + +### `status` + +Check the current status of a Pulsar Source. + +#### Usage + +```bash +$ pulsar-admin sources status options +``` + +#### Options + +|Flag|Description| +|---|---| +|`--instance-id`|The source ID.
    If `instance-id` is not provided, Pulasr gets status of all instances.| +|`--name`|The source's name.| +|`--namespace`|The source's namespace.| +|`--tenant`|The source's tenant.| + +### `list` + +List all running Pulsar IO source connectors. + +#### Usage + +```bash +$ pulsar-admin sources list options +``` + +#### Options + +|Flag|Description| +|---|---| +|`--namespace`|The source's namespace.| +|`--tenant`|The source's tenant.| + + +### `stop` + +Stop a source instance. + +#### Usage + +```bash +$ pulsar-admin sources stop options +``` + +#### Options + +|Flag|Description| +|---|---| +|`--instance-id`|The source instanceID.
    If `instance-id` is not provided, Pulsar stops all instances.| +|`--name`|The source's name.| +|`--namespace`|The source's namespace.| +|`--tenant`|The source's tenant.| + +### `start` + +Start a source instance. + +#### Usage + +```bash +$ pulsar-admin sources start options +``` + +#### Options + +|Flag|Description| +|---|---| +|`--instance-id`|The source instanceID.
    If `instance-id` is not provided, Pulsar starts all instances.| +|`--name`|The source's name.| +|`--namespace`|The source's namespace.| +|`--tenant`|The source's tenant.| + + +### `restart` + +Restart a source instance. + +#### Usage + +```bash +$ pulsar-admin sources restart options +``` + +#### Options +|Flag|Description| +|---|---| +|`--instance-id`|The source instanceID.
    If `instance-id` is not provided, Pulsar restarts all instances. +|`--name`|The source's name.| +|`--namespace`|The source's namespace.| +|`--tenant`|The source's tenant.| + + +### `localrun` + +Run a Pulsar IO source connector locally rather than deploying it to the Pulsar cluster. + +#### Usage + +```bash +$ pulsar-admin sources localrun options +``` + +#### Options + +|Flag|Description| +|----|---| +| `-a`, `--archive` | The path to the NAR archive for the Source.
    It also supports url-path (http/https/file [file protocol assumes that file already exists on worker host]) from which worker can download the package. +| `--broker-service-url` | The URL for the Pulsar broker. +|`--classname`|The source's class name if `archive` is file-url-path (file://). +| `--client-auth-params` | Client authentication parameter. +| `--client-auth-plugin` | Client authentication plugin using which function-process can connect to broker. +|`--cpu`|The CPU (in cores) that needs to be allocated per source instance (applicable only to the Docker runtime).| +|`--deserialization-classname`|The SerDe classname for the source. +|`--destination-topic-name`|The Pulsar topic to which data is sent. +|`--disk`|The disk (in bytes) that needs to be allocated per source instance (applicable only to the Docker runtime).| +|`--hostname-verification-enabled`|Enable hostname verification.
    **Default value: false**. +|`--name`|The source’s name.| +|`--namespace`|The source’s namespace.| +|`--parallelism`|The source’s parallelism factor, that is, the number of source instances to run).| +|`--processing-guarantees` | The processing guarantees (also named as delivery semantics) applied to the source. A source connector receives messages from external system and writes messages to a Pulsar topic. The `--processing-guarantees` is used to ensure the processing guarantees for writing messages to the Pulsar topic.
    The available values are ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE. +|`--ram`|The RAM (in bytes) that needs to be allocated per source instance (applicable only to the Docker runtime).| +| `-st`, `--schema-type` | The schema type.
    Either a builtin schema (for example, AVRO and JSON) or custom schema class name to be used to encode messages emitted from source. +|`--source-config`|Source config key/values. +|`--source-config-file`|The path to a YAML config file specifying the source’s configuration. +|`--source-type`|The source's connector provider. +|`--tenant`|The source’s tenant. +|`--tls-allow-insecure`|Allow insecure tls connection.
    **Default value: false**. +|`--tls-trust-cert-path`|The tls trust cert file path. +|`--use-tls`|Use tls connection.
    **Default value: false**. +|`--producer-config`| The custom producer configuration (as a JSON string). + +### `available-sources` + +Get the list of Pulsar IO connector sources supported by Pulsar cluster. + +#### Usage + +```bash +$ pulsar-admin sources available-sources +``` + +### `reload` + +Reload the available built-in connectors. + +#### Usage + +```bash +$ pulsar-admin sources reload +``` + +## `sinks` + +An interface for managing Pulsar IO sinks (egress data from Pulsar). + +```bash +$ pulsar-admin sinks subcommands +``` + +Subcommands are: + +* `create` + +* `update` + +* `delete` + +* `get` + +* `status` + +* `list` + +* `stop` + +* `start` + +* `restart` + +* `localrun` + +* `available-sinks` + +* `reload` + + +### `create` + +Submit a Pulsar IO sink connector to run in a Pulsar cluster. + +#### Usage + +```bash +$ pulsar-admin sinks create options +``` + +#### Options + +|Flag|Description| +|----|---| +| `-a`, `--archive` | The path to the archive file for the sink.
    It also supports url-path (http/https/file [file protocol assumes that file already exists on worker host]) from which worker can download the package. +| `--auto-ack` | Whether or not the framework will automatically acknowledge messages. +| `--classname` | The sink's class name if `archive` is file-url-path (file://). +| `--cpu` | The CPU (in cores) that needs to be allocated per sink instance (applicable only to Docker runtime). +| `--custom-schema-inputs` | The map of input topics to schema types or class names (as a JSON string). +| `--custom-serde-inputs` | The map of input topics to SerDe class names (as a JSON string). +| `--disk` | The disk (in bytes) that needs to be allocated per sink instance (applicable only to Docker runtime). +|`-i, --inputs` | The sink's input topic or topics (multiple topics can be specified as a comma-separated list). +|`--name` | The sink's name. +| `--namespace` | The sink's namespace. +| ` --parallelism` | The sink's parallelism factor, that is, the number of sink instances to run. +| `--processing-guarantees` | The processing guarantees (also known as delivery semantics) applied to the sink. The `--processing-guarantees` implementation in Pulsar also relies on sink implementation.
    The available values are ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE. +| `--ram` | The RAM (in bytes) that needs to be allocated per sink instance (applicable only to the process and Docker runtimes). +| `--retain-ordering` | Sink consumes and sinks messages in order. +| `--sink-config` | sink config key/values. +| `--sink-config-file` | The path to a YAML config file specifying the sink's configuration. +| `-t`, `--sink-type` | The sink's connector provider. The `sink-type` parameter of the currently built-in connectors is determined by the setting of the `name` parameter specified in the pulsar-io.yaml file. +| `--subs-name` | Pulsar source subscription name if user wants a specific subscription-name for input-topic consumer. +| `--tenant` | The sink's tenant. +| `--timeout-ms` | The message timeout in milliseconds. +| `--topics-pattern` | TopicsPattern to consume from list of topics under a namespace that match the pattern.
    `--input` and `--topics-Pattern` are mutually exclusive.
    Add SerDe class name for a pattern in `--customSerdeInputs` (supported for java fun only). + +### `update` + +Update a Pulsar IO sink connector. + +#### Usage + +```bash +$ pulsar-admin sinks update options +``` + +#### Options + +|Flag|Description| +|----|---| +| `-a`, `--archive` | The path to the archive file for the sink.
    It also supports url-path (http/https/file [file protocol assumes that file already exists on worker host]) from which worker can download the package. +| `--auto-ack` | Whether or not the framework will automatically acknowledge messages. +| `--classname` | The sink's class name if `archive` is file-url-path (file://). +| `--cpu` | The CPU (in cores) that needs to be allocated per sink instance (applicable only to Docker runtime). +| `--custom-schema-inputs` | The map of input topics to schema types or class names (as a JSON string). +| `--custom-serde-inputs` | The map of input topics to SerDe class names (as a JSON string). +| `--disk` | The disk (in bytes) that needs to be allocated per sink instance (applicable only to Docker runtime). +|`-i, --inputs` | The sink's input topic or topics (multiple topics can be specified as a comma-separated list). +|`--name` | The sink's name. +| `--namespace` | The sink's namespace. +| ` --parallelism` | The sink's parallelism factor, that is, the number of sink instances to run. +| `--processing-guarantees` | The processing guarantees (also known as delivery semantics) applied to the sink. The `--processing-guarantees` implementation in Pulsar also relies on sink implementation.
    The available values are ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE. +| `--ram` | The RAM (in bytes) that needs to be allocated per sink instance (applicable only to the process and Docker runtimes). +| `--retain-ordering` | Sink consumes and sinks messages in order. +| `--sink-config` | sink config key/values. +| `--sink-config-file` | The path to a YAML config file specifying the sink's configuration. +| `-t`, `--sink-type` | The sink's connector provider. +| `--subs-name` | Pulsar source subscription name if user wants a specific subscription-name for input-topic consumer. +| `--tenant` | The sink's tenant. +| `--timeout-ms` | The message timeout in milliseconds. +| `--topics-pattern` | TopicsPattern to consume from list of topics under a namespace that match the pattern.
    `--input` and `--topics-Pattern` are mutually exclusive.
    Add SerDe class name for a pattern in `--customSerdeInputs` (supported for java fun only). +| `--update-auth-data` | Whether or not to update the auth data.
    **Default value: false.** + +### `delete` + +Delete a Pulsar IO sink connector. + +#### Usage + +```bash +$ pulsar-admin sinks delete options +``` + +#### Option + +|Flag|Description| +|---|---| +|`--name`|The sink's name.| +|`--namespace`|The sink's namespace.| +|`--tenant`|The sink's tenant.| + +### `get` + +Get the information about a Pulsar IO sink connector. + +#### Usage + +```bash +$ pulsar-admin sinks get options +``` + +#### Options +|Flag|Description| +|---|---| +|`--name`|The sink's name.| +|`--namespace`|The sink's namespace.| +|`--tenant`|The sink's tenant.| + + +### `status` + +Check the current status of a Pulsar sink. + +#### Usage + +```bash +$ pulsar-admin sinks status options +``` + +#### Options + +|Flag|Description| +|---|---| +|`--instance-id`|The sink ID.
    If `instance-id` is not provided, Pulasr gets status of all instances.| +|`--name`|The sink's name.| +|`--namespace`|The sink's namespace.| +|`--tenant`|The sink's tenant.| + + +### `list` + +List all running Pulsar IO sink connectors. + +#### Usage + +```bash +$ pulsar-admin sinks list options +``` + +#### Options + +|Flag|Description| +|---|---| +|`--namespace`|The sink's namespace.| +|`--tenant`|The sink's tenant.| + + +### `stop` + +Stop a sink instance. + +#### Usage + +```bash +$ pulsar-admin sinks stop options +``` + +#### Options + +|Flag|Description| +|---|---| +|`--instance-id`|The sink instanceID.
    If `instance-id` is not provided, Pulsar stops all instances.| +|`--name`|The sink's name.| +|`--namespace`|The sink's namespace.| +|`--tenant`|The sink's tenant.| + +### `start` + +Start a sink instance. + +#### Usage + +```bash +$ pulsar-admin sinks start options +``` + +#### Options + +|Flag|Description| +|---|---| +|`--instance-id`|The sink instanceID.
    If `instance-id` is not provided, Pulsar starts all instances.| +|`--name`|The sink's name.| +|`--namespace`|The sink's namespace.| +|`--tenant`|The sink's tenant.| + + +### `restart` + +Restart a sink instance. + +#### Usage + +```bash +$ pulsar-admin sinks restart options +``` + +#### Options + +|Flag|Description| +|---|---| +|`--instance-id`|The sink instanceID.
    If `instance-id` is not provided, Pulsar restarts all instances. +|`--name`|The sink's name.| +|`--namespace`|The sink's namespace.| +|`--tenant`|The sink's tenant.| + + +### `localrun` + +Run a Pulsar IO sink connector locally rather than deploying it to the Pulsar cluster. + +#### Usage + +```bash +$ pulsar-admin sinks localrun options +``` + +#### Options + +|Flag|Description| +|----|---| +| `-a`, `--archive` | The path to the archive file for the sink.
    It also supports url-path (http/https/file [file protocol assumes that file already exists on worker host]) from which worker can download the package. +| `--auto-ack` | Whether or not the framework will automatically acknowledge messages. +| `--broker-service-url` | The URL for the Pulsar broker. +|`--classname`|The sink's class name if `archive` is file-url-path (file://). +| `--client-auth-params` | Client authentication parameter. +| `--client-auth-plugin` | Client authentication plugin using which function-process can connect to broker. +|`--cpu`|The CPU (in cores) that needs to be allocated per sink instance (applicable only to the Docker runtime). +| `--custom-schema-inputs` | The map of input topics to Schema types or class names (as a JSON string). +| `--max-redeliver-count` | Maximum number of times that a message is redelivered before being sent to the dead letter queue. +| `--dead-letter-topic` | Name of the dead letter topic where the failing messages are sent. +| `--custom-serde-inputs` | The map of input topics to SerDe class names (as a JSON string). +|`--disk`|The disk (in bytes) that needs to be allocated per sink instance (applicable only to the Docker runtime).| +|`--hostname-verification-enabled`|Enable hostname verification.
    **Default value: false**. +| `-i`, `--inputs` | The sink's input topic or topics (multiple topics can be specified as a comma-separated list). +|`--name`|The sink’s name.| +|`--namespace`|The sink’s namespace.| +|`--parallelism`|The sink’s parallelism factor, that is, the number of sink instances to run).| +|`--processing-guarantees`|The processing guarantees (also known as delivery semantics) applied to the sink. The `--processing-guarantees` implementation in Pulsar also relies on sink implementation.
    The available values are ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE. +|`--ram`|The RAM (in bytes) that needs to be allocated per sink instance (applicable only to the Docker runtime).| +|`--retain-ordering` | Sink consumes and sinks messages in order. +|`--sink-config`|sink config key/values. +|`--sink-config-file`|The path to a YAML config file specifying the sink’s configuration. +|`--sink-type`|The sink's connector provider. +|`--subs-name` | Pulsar source subscription name if user wants a specific subscription-name for input-topic consumer. +|`--tenant`|The sink’s tenant. +| `--timeout-ms` | The message timeout in milliseconds. +| `--negative-ack-redelivery-delay-ms` | The negatively-acknowledged message redelivery delay in milliseconds. | +|`--tls-allow-insecure`|Allow insecure tls connection.
    **Default value: false**. +|`--tls-trust-cert-path`|The tls trust cert file path. +| `--topics-pattern` | TopicsPattern to consume from list of topics under a namespace that match the pattern.
    `--input` and `--topics-Pattern` are mutually exclusive.
    Add SerDe class name for a pattern in `--customSerdeInputs` (supported for java fun only). +|`--use-tls`|Use tls connection.
    **Default value: false**. + +### `available-sinks` + +Get the list of Pulsar IO connector sinks supported by Pulsar cluster. + +#### Usage + +```bash +$ pulsar-admin sinks available-sinks +``` + +### `reload` + +Reload the available built-in connectors. + +#### Usage + +```bash +$ pulsar-admin sinks reload +``` + diff --git a/site2/website/versioned_docs/version-2.7.0/io-connectors.md b/site2/website/versioned_docs/version-2.7.0/io-connectors.md new file mode 100644 index 00000000000000..e5bc8bdd19b18f --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/io-connectors.md @@ -0,0 +1,232 @@ +--- +id: version-2.7.0-io-connectors +title: Built-in connector +sidebar_label: Built-in connector +original_id: io-connectors +--- + +Pulsar distribution includes a set of common connectors that have been packaged and tested with the rest of Apache Pulsar. These connectors import and export data from some of the most commonly used data systems. + +Using any of these connectors is as easy as writing a simple connector and running the connector locally or submitting the connector to a Pulsar Functions cluster. + +## Source connector + +Pulsar has various source connectors, which are sorted alphabetically as below. + +### Canal + +* [Configuration](io-canal-source.md#configuration) + +* [Example](io-canal-source.md#usage) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/canal/src/main/java/org/apache/pulsar/io/canal/CanalStringSource.java) + + +### Debezium MySQL + +* [Configuration](io-debezium-source.md#configuration) + +* [Example](io-debezium-source.md#example-of-mysql) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/debezium/mysql/src/main/java/org/apache/pulsar/io/debezium/mysql/DebeziumMysqlSource.java) + +### Debezium PostgreSQL + +* [Configuration](io-debezium-source.md#configuration) + +* [Example](io-debezium-source.md#example-of-postgresql) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/debezium/postgres/src/main/java/org/apache/pulsar/io/debezium/postgres/DebeziumPostgresSource.java) + +### Debezium MongoDB + +* [Configuration](io-debezium-source.md#configuration) + +* [Example](io-debezium-source.md#example-of-mongodb) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/debezium/mongodb/src/main/java/org/apache/pulsar/io/debezium/mongodb/DebeziumMongoDbSource.java) + +### DynamoDB + +* [Configuration](io-dynamodb-source.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/dynamodb/src/main/java/org/apache/pulsar/io/dynamodb/DynamoDBSource.java) + +### File + +* [Configuration](io-file-source.md#configuration) + +* [Example](io-file-source.md#usage) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/file/src/main/java/org/apache/pulsar/io/file/FileSource.java) + +### Flume + +* [Configuration](io-flume-source.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/flume/src/main/java/org/apache/pulsar/io/flume/FlumeConnector.java) + +### Twitter firehose + +* [Configuration](io-twitter-source.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/twitter/src/main/java/org/apache/pulsar/io/twitter/TwitterFireHose.java) + +### Kafka + +* [Configuration](io-kafka-source.md#configuration) + +* [Example](io-kafka-source.md#usage) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/kafka/src/main/java/org/apache/pulsar/io/kafka/KafkaAbstractSource.java) + +### Kinesis + +* [Configuration](io-kinesis-source.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/kinesis/src/main/java/org/apache/pulsar/io/kinesis/KinesisSource.java) + +### Netty + +* [Configuration](io-netty-source.md#configuration) + +* [Example of TCP](io-netty-source.md#tcp) + +* [Example of HTTP](io-netty-source.md#http) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/netty/src/main/java/org/apache/pulsar/io/netty/NettySource.java) + +### NSQ + +* [Configuration](io-nsq-source.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/nsq/src/main/java/org/apache/pulsar/io/nsq/NSQSource.java) + +### RabbitMQ + +* [Configuration](io-rabbitmq-source.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/rabbitmq/src/main/java/org/apache/pulsar/io/rabbitmq/RabbitMQSource.java) + +## Sink connector + +Pulsar has various sink connectors, which are sorted alphabetically as below. + +### Aerospike + +* [Configuration](io-aerospike-sink.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/aerospike/src/main/java/org/apache/pulsar/io/aerospike/AerospikeStringSink.java) + +### Cassandra + +* [Configuration](io-cassandra-sink.md#configuration) + +* [Example](io-cassandra-sink.md#usage) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/cassandra/src/main/java/org/apache/pulsar/io/cassandra/CassandraStringSink.java) + +### ElasticSearch + +* [Configuration](io-elasticsearch-sink.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/elastic-search/src/main/java/org/apache/pulsar/io/elasticsearch/ElasticSearchSink.java) + +### Flume + +* [Configuration](io-flume-sink.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/flume/src/main/java/org/apache/pulsar/io/flume/sink/StringSink.java) + +### HBase + +* [Configuration](io-hbase-sink.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/hbase/src/main/java/org/apache/pulsar/io/hbase/HbaseAbstractConfig.java) + +### HDFS2 + +* [Configuration](io-hdfs2-sink.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/hdfs2/src/main/java/org/apache/pulsar/io/hdfs2/AbstractHdfsConnector.java) + +### HDFS3 + +* [Configuration](io-hdfs3-sink.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/hdfs3/src/main/java/org/apache/pulsar/io/hdfs3/AbstractHdfsConnector.java) + +### InfluxDB + +* [Configuration](io-influxdb-sink.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/influxdb/src/main/java/org/apache/pulsar/io/influxdb/InfluxDBGenericRecordSink.java) + +### JDBC ClickHouse + +* [Configuration](io-jdbc-sink.md#configuration) + +* [Example](io-jdbc-sink.md#example-for-clickhouse) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/jdbc/clickhouse/src/main/java/org/apache/pulsar/io/jdbc/ClickHouseJdbcAutoSchemaSink.java) + +### JDBC MariaDB + +* [Configuration](io-jdbc-sink.md#configuration) + +* [Example](io-jdbc-sink.md#example-for-mariadb) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/jdbc/mariadb/src/main/java/org/apache/pulsar/io/jdbc/MariadbJdbcAutoSchemaSink.java) + +### JDBC PostgreSQL + +* [Configuration](io-jdbc-sink.md#configuration) + +* [Example](io-jdbc-sink.md#example-for-postgresql) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/jdbc/postgres/src/main/java/org/apache/pulsar/io/jdbc/PostgresJdbcAutoSchemaSink.java) + +### JDBC SQLite + +* [Configuration](io-jdbc-sink.md#configuration) + +* [Example](io-jdbc-sink.md#example-for-sqlite) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/jdbc/sqlite/src/main/java/org/apache/pulsar/io/jdbc/SqliteJdbcAutoSchemaSink.java) + +### Kafka + +* [Configuration](io-kafka-sink.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/kafka/src/main/java/org/apache/pulsar/io/kafka/KafkaAbstractSink.java) + +### Kinesis + +* [Configuration](io-kinesis-sink.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/kinesis/src/main/java/org/apache/pulsar/io/kinesis/KinesisSink.java) + +### MongoDB + +* [Configuration](io-mongo-sink.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/mongo/src/main/java/org/apache/pulsar/io/mongodb/MongoSink.java) + +### RabbitMQ + +* [Configuration](io-rabbitmq-sink.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/rabbitmq/src/main/java/org/apache/pulsar/io/rabbitmq/RabbitMQSink.java) + +### Redis + +* [Configuration](io-redis-sink.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/redis/src/main/java/org/apache/pulsar/io/redis/RedisAbstractConfig.java) + +### Solr + +* [Configuration](io-solr-sink.md#configuration) + +* [Java class](https://github.com/apache/pulsar/blob/master/pulsar-io/solr/src/main/java/org/apache/pulsar/io/solr/SolrSinkConfig.java) + diff --git a/site2/website/versioned_docs/version-2.7.0/io-hdfs2-sink.md b/site2/website/versioned_docs/version-2.7.0/io-hdfs2-sink.md new file mode 100644 index 00000000000000..9e21941fc09c3a --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/io-hdfs2-sink.md @@ -0,0 +1,59 @@ +--- +id: version-2.7.0-io-hdfs2-sink +title: HDFS2 sink connector +sidebar_label: HDFS2 sink connector +original_id: io-hdfs2-sink +--- + +The HDFS2 sink connector pulls the messages from Pulsar topics +and persists the messages to HDFS files. + +## Configuration + +The configuration of the HDFS2 sink connector has the following properties. + +### Property + +| Name | Type|Required | Default | Description +|------|----------|----------|---------|-------------| +| `hdfsConfigResources` | String|true| None | A file or a comma-separated list containing the Hadoop file system configuration.

    **Example**
    'core-site.xml'
    'hdfs-site.xml' | +| `directory` | String | true | None|The HDFS directory where files read from or written to. | +| `encoding` | String |false |None |The character encoding for the files.

    **Example**
    UTF-8
    ASCII | +| `compression` | Compression |false |None |The compression code used to compress or de-compress the files on HDFS.

    Below are the available options:
  • BZIP2
  • DEFLATE
  • GZIP
  • LZ4
  • SNAPPY| +| `kerberosUserPrincipal` |String| false| None|The principal account of Kerberos user used for authentication. | +| `keytab` | String|false|None| The full pathname of the Kerberos keytab file used for authentication. | +| `filenamePrefix` |String| true, if `compression` is set to `None`. | None |The prefix of the files created inside the HDFS directory.

    **Example**
    The value of topicA result in files named topicA-. | +| `fileExtension` | String| true | None | The extension added to the files written to HDFS.

    **Example**
    '.txt'
    '.seq' | +| `separator` | char|false |None |The character used to separate records in a text file.

    If no value is provided, the contents from all records are concatenated together in one continuous byte array. | +| `syncInterval` | long| false |0| The interval between calls to flush data to HDFS disk in milliseconds. | +| `maxPendingRecords` |int| false|Integer.MAX_VALUE | The maximum number of records that hold in memory before acking.

    Setting this property to 1 makes every record send to disk before the record is acked.

    Setting this property to a higher value allows buffering records before flushing them to disk. +| `subdirectoryPattern` | String | false | None | A subdirectory associated with the created time of the sink.
    The pattern is the formatted pattern of `directory`'s subdirectory.

    See [DateTimeFormatter](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html) for pattern's syntax. | + +### Example + +Before using the HDFS2 sink connector, you need to create a configuration file through one of the following methods. + +* JSON + + ```json + { + "hdfsConfigResources": "core-site.xml", + "directory": "/foo/bar", + "filenamePrefix": "prefix", + "fileExtension": ".log", + "compression": "SNAPPY", + "subdirectoryPattern": "yyyy-MM-dd" + } + ``` + +* YAML + + ```yaml + configs: + hdfsConfigResources: "core-site.xml" + directory: "/foo/bar" + filenamePrefix: "prefix" + fileExtension: ".log" + compression: "SNAPPY" + subdirectoryPattern: "yyyy-MM-dd" + ``` diff --git a/site2/website/versioned_docs/version-2.7.0/io-nsq-source.md b/site2/website/versioned_docs/version-2.7.0/io-nsq-source.md new file mode 100644 index 00000000000000..826c5e2ceac8c4 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/io-nsq-source.md @@ -0,0 +1,21 @@ +--- +id: version-2.7.0-io-nsq-source +title: NSQ source connector +sidebar_label: NSQ source connector +original_id: io-nsq-source +--- + +The NSQ source connector receives messages from NSQ topics +and writes messages to Pulsar topics. + +## Configuration + +The configuration of the NSQ source connector has the following properties. + +### Property + +| Name | Type|Required | Default | Description +|------|----------|----------|---------|-------------| +| `lookupds` |String| true | " " (empty string) | A comma-separated list of nsqlookupds to connect to. | +| `topic` | String|true | " " (empty string) | The NSQ topic to transport. | +| `channel` | String |false | pulsar-transport-{$topic} | The channel to consume from on the provided NSQ topic. | \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/io-quickstart.md b/site2/website/versioned_docs/version-2.7.0/io-quickstart.md new file mode 100644 index 00000000000000..0ba4a3f9b7d7b2 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/io-quickstart.md @@ -0,0 +1,816 @@ +--- +id: version-2.7.0-io-quickstart +title: How to connect Pulsar to database +sidebar_label: Get started +original_id: io-quickstart +--- + +This tutorial provides a hands-on look at how you can move data out of Pulsar without writing a single line of code. + +It is helpful to review the [concepts](io-overview.md) for Pulsar I/O with running the steps in this guide to gain a deeper understanding. + +At the end of this tutorial, you are able to: + +- [Connect Pulsar to Cassandra](#Connect-Pulsar-to-Cassandra) + +- [Connect Pulsar to PostgreSQL](#Connect-Pulsar-to-PostgreSQL) + +> #### Tip +> +> * These instructions assume you are running Pulsar in [standalone mode](getting-started-standalone.md). However, all +> the commands used in this tutorial can be used in a multi-nodes Pulsar cluster without any changes. +> +> * All the instructions are assumed to run at the root directory of a Pulsar binary distribution. + +## Install Pulsar and built-in connector + +Before connecting Pulsar to a database, you need to install Pulsar and the desired built-in connector. + +For more information about **how to install a standalone Pulsar and built-in connectors**, see [here](getting-started-standalone.md/#installing-pulsar). + +## Start Pulsar standalone + +1. Start Pulsar locally. + + ```bash + bin/pulsar standalone + ``` + + All the components of a Pulsar service are start in order. + + You can curl those pulsar service endpoints to make sure Pulsar service is up running correctly. + +2. Check Pulsar binary protocol port. + + ```bash + telnet localhost 6650 + ``` + +3. Check Pulsar Function cluster. + + ```bash + curl -s http://localhost:8080/admin/v2/worker/cluster + ``` + + **Example output** + ```json + [{"workerId":"c-standalone-fw-localhost-6750","workerHostname":"localhost","port":6750}] + ``` + +4. Make sure a public tenant and a default namespace exist. + + ```bash + curl -s http://localhost:8080/admin/v2/namespaces/public + ``` + + **Example output** + ```json + ["public/default","public/functions"] + ``` + +5. All built-in connectors should be listed as available. + + ```bash + curl -s http://localhost:8080/admin/v2/functions/connectors + ``` + + **Example output** + + ```json + [{"name":"aerospike","description":"Aerospike database sink","sinkClass":"org.apache.pulsar.io.aerospike.AerospikeStringSink"},{"name":"cassandra","description":"Writes data into Cassandra","sinkClass":"org.apache.pulsar.io.cassandra.CassandraStringSink"},{"name":"kafka","description":"Kafka source and sink connector","sourceClass":"org.apache.pulsar.io.kafka.KafkaStringSource","sinkClass":"org.apache.pulsar.io.kafka.KafkaBytesSink"},{"name":"kinesis","description":"Kinesis sink connector","sinkClass":"org.apache.pulsar.io.kinesis.KinesisSink"},{"name":"rabbitmq","description":"RabbitMQ source connector","sourceClass":"org.apache.pulsar.io.rabbitmq.RabbitMQSource"},{"name":"twitter","description":"Ingest data from Twitter firehose","sourceClass":"org.apache.pulsar.io.twitter.TwitterFireHose"}] + ``` + + If an error occurs when starting Pulsar service, you may see an exception at the terminal running `pulsar/standalone`, + or you can navigate to the `logs` directory under the Pulsar directory to view the logs. + +## Connect Pulsar to Cassandra + +This section demonstrates how to connect Pulsar to Cassandra. + +> #### Tip +> +> * Make sure you have Docker installed. If you do not have one, see [install Docker](https://docs.docker.com/docker-for-mac/install/). +> +> * The Cassandra sink connector reads messages from Pulsar topics and writes the messages into Cassandra tables. For more information, see [Cassandra sink connector](io-cassandra-sink.md). + +### Setup a Cassandra cluster + +This example uses `cassandra` Docker image to start a single-node Cassandra cluster in Docker. + +1. Start a Cassandra cluster. + + ```bash + docker run -d --rm --name=cassandra -p 9042:9042 cassandra + ``` + + > **Note** + > + > Before moving to the next steps, make sure the Cassandra cluster is running. + +2. Make sure the Docker process is running. + + ```bash + docker ps + ``` + +3. Check the Cassandra logs to make sure the Cassandra process is running as expected. + + ```bash + docker logs cassandra + ``` + +4. Check the status of the Cassandra cluster. + + ```bash + docker exec cassandra nodetool status + ``` + + **Example output** + + ``` + Datacenter: datacenter1 + ======================= + Status=Up/Down + |/ State=Normal/Leaving/Joining/Moving + -- Address Load Tokens Owns (effective) Host ID Rack + UN 172.17.0.2 103.67 KiB 256 100.0% af0e4b2f-84e0-4f0b-bb14-bd5f9070ff26 rack1 + ``` + +5. Use `cqlsh` to connect to the Cassandra cluster. + + ```bash + $ docker exec -ti cassandra cqlsh localhost + Connected to Test Cluster at localhost:9042. + [cqlsh 5.0.1 | Cassandra 3.11.2 | CQL spec 3.4.4 | Native protocol v4] + Use HELP for help. + cqlsh> + ``` + +6. Create a keyspace `pulsar_test_keyspace`. + + ```bash + cqlsh> CREATE KEYSPACE pulsar_test_keyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':1}; + ``` + +7. Create a table `pulsar_test_table`. + + ```bash + cqlsh> USE pulsar_test_keyspace; + cqlsh:pulsar_test_keyspace> CREATE TABLE pulsar_test_table (key text PRIMARY KEY, col text); + ``` + +### Configure a Cassandra sink + +Now that we have a Cassandra cluster running locally. + +In this section, you need to configure a Cassandra sink connector. + +To run a Cassandra sink connector, you need to prepare a configuration file including the information that Pulsar connector runtime needs to know. + +For example, how Pulsar connector can find the Cassandra cluster, what is the keyspace and the table that Pulsar connector uses for writing Pulsar messages to, and so on. + +You can create a configuration file through one of the following methods. + +* JSON + + ```json + { + "roots": "localhost:9042", + "keyspace": "pulsar_test_keyspace", + "columnFamily": "pulsar_test_table", + "keyname": "key", + "columnName": "col" + } + ``` + +* YAML + + ```yaml + configs: + roots: "localhost:9042" + keyspace: "pulsar_test_keyspace" + columnFamily: "pulsar_test_table" + keyname: "key" + columnName: "col" + ``` + +For more information, see [Cassandra sink connector](io-cassandra-sink.md). + +### Create a Cassandra sink + +You can use the [Connector Admin CLI](io-cli.md) +to create a sink connector and perform other operations on them. + +Run the following command to create a Cassandra sink connector with sink type _cassandra_ and the config file _examples/cassandra-sink.yml_ created previously. + +#### Note +> The `sink-type` parameter of the currently built-in connectors is determined by the setting of the `name` parameter specified in the pulsar-io.yaml file. + +```bash +bin/pulsar-admin sinks create \ + --tenant public \ + --namespace default \ + --name cassandra-test-sink \ + --sink-type cassandra \ + --sink-config-file examples/cassandra-sink.yml \ + --inputs test_cassandra +``` + +Once the command is executed, Pulsar creates the sink connector _cassandra-test-sink_. + +This sink connector runs +as a Pulsar Function and writes the messages produced in the topic _test_cassandra_ to the Cassandra table _pulsar_test_table_. + +### Inspect a Cassandra sink + +You can use the [Connector Admin CLI](io-cli.md) +to monitor a connector and perform other operations on it. + +* Get the information of a Cassandra sink. + + ```bash + bin/pulsar-admin sinks get \ + --tenant public \ + --namespace default \ + --name cassandra-test-sink + ``` + + **Example output** + + ```json + { + "tenant": "public", + "namespace": "default", + "name": "cassandra-test-sink", + "className": "org.apache.pulsar.io.cassandra.CassandraStringSink", + "inputSpecs": { + "test_cassandra": { + "isRegexPattern": false + } + }, + "configs": { + "roots": "localhost:9042", + "keyspace": "pulsar_test_keyspace", + "columnFamily": "pulsar_test_table", + "keyname": "key", + "columnName": "col" + }, + "parallelism": 1, + "processingGuarantees": "ATLEAST_ONCE", + "retainOrdering": false, + "autoAck": true, + "archive": "builtin://cassandra" + } + ``` + +* Check the status of a Cassandra sink. + + ```bash + bin/pulsar-admin sinks status \ + --tenant public \ + --namespace default \ + --name cassandra-test-sink + ``` + + **Example output** + + ```json + { + "numInstances" : 1, + "numRunning" : 1, + "instances" : [ { + "instanceId" : 0, + "status" : { + "running" : true, + "error" : "", + "numRestarts" : 0, + "numReadFromPulsar" : 0, + "numSystemExceptions" : 0, + "latestSystemExceptions" : [ ], + "numSinkExceptions" : 0, + "latestSinkExceptions" : [ ], + "numWrittenToSink" : 0, + "lastReceivedTime" : 0, + "workerId" : "c-standalone-fw-localhost-8080" + } + } ] + } + ``` + +### Verify a Cassandra sink + +1. Produce some messages to the input topic of the Cassandra sink _test_cassandra_. + + ```bash + for i in {0..9}; do bin/pulsar-client produce -m "key-$i" -n 1 test_cassandra; done + ``` + +2. Inspect the status of the Cassandra sink _test_cassandra_. + + ```bash + bin/pulsar-admin sinks status \ + --tenant public \ + --namespace default \ + --name cassandra-test-sink + ``` + + You can see 10 messages are processed by the Cassandra sink _test_cassandra_. + + **Example output** + + ```json + { + "numInstances" : 1, + "numRunning" : 1, + "instances" : [ { + "instanceId" : 0, + "status" : { + "running" : true, + "error" : "", + "numRestarts" : 0, + "numReadFromPulsar" : 10, + "numSystemExceptions" : 0, + "latestSystemExceptions" : [ ], + "numSinkExceptions" : 0, + "latestSinkExceptions" : [ ], + "numWrittenToSink" : 10, + "lastReceivedTime" : 1551685489136, + "workerId" : "c-standalone-fw-localhost-8080" + } + } ] + } + ``` + +3. Use `cqlsh` to connect to the Cassandra cluster. + + ```bash + docker exec -ti cassandra cqlsh localhost + ``` + +4. Check the data of the Cassandra table _pulsar_test_table_. + + ```bash + cqlsh> use pulsar_test_keyspace; + cqlsh:pulsar_test_keyspace> select * from pulsar_test_table; + + key | col + --------+-------- + key-5 | key-5 + key-0 | key-0 + key-9 | key-9 + key-2 | key-2 + key-1 | key-1 + key-3 | key-3 + key-6 | key-6 + key-7 | key-7 + key-4 | key-4 + key-8 | key-8 + ``` + +### Delete a Cassandra Sink + +You can use the [Connector Admin CLI](io-cli.md) +to delete a connector and perform other operations on it. + +```bash +bin/pulsar-admin sinks delete \ + --tenant public \ + --namespace default \ + --name cassandra-test-sink +``` + +## Connect Pulsar to PostgreSQL + +This section demonstrates how to connect Pulsar to PostgreSQL. + +> #### Tip +> +> * Make sure you have Docker installed. If you do not have one, see [install Docker](https://docs.docker.com/docker-for-mac/install/). +> +> * The JDBC sink connector pulls messages from Pulsar topics +and persists the messages to ClickHouse, MariaDB, PostgreSQL, or SQlite. +>For more information, see [JDBC sink connector](io-jdbc-sink.md). + + +### Setup a PostgreSQL cluster + +This example uses the PostgreSQL 12 docker image to start a single-node PostgreSQL cluster in Docker. + +1. Pull the PostgreSQL 12 image from Docker. + + ```bash + $ docker pull postgres:12 + ``` + +2. Start PostgreSQL. + + ```bash + $ docker run -d -it --rm \ + --name pulsar-postgres \ + -p 5432:5432 \ + -e POSTGRES_PASSWORD=password \ + -e POSTGRES_USER=postgres \ + postgres:12 + ``` + + #### Tip + + Flag | Description | This example + ---|---|---| + `-d` | To start a container in detached mode. | / + `-it` | Keep STDIN open even if not attached and allocate a terminal. | / + `--rm` | Remove the container automatically when it exits. | / + `-name` | Assign a name to the container. | This example specifies _pulsar-postgres_ for the container. + `-p` | Publish the port of the container to the host. | This example publishes the port _5432_ of the container to the host. + `-e` | Set environment variables. | This example sets the following variables:
    - The password for the user is _password_.
    - The name for the user is _postgres_. + + > #### Tip + > + > For more information about Docker commands, see [Docker CLI](https://docs.docker.com/engine/reference/commandline/run/). + +3. Check if PostgreSQL has been started successfully. + + ```bash + $ docker logs -f pulsar-postgres + ``` + + PostgreSQL has been started successfully if the following message appears. + + ```text + 2020-05-11 20:09:24.492 UTC [1] LOG: starting PostgreSQL 12.2 (Debian 12.2-2.pgdg100+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 8.3.0-6) 8.3.0, 64-bit + 2020-05-11 20:09:24.492 UTC [1] LOG: listening on IPv4 address "0.0.0.0", port 5432 + 2020-05-11 20:09:24.492 UTC [1] LOG: listening on IPv6 address "::", port 5432 + 2020-05-11 20:09:24.499 UTC [1] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432" + 2020-05-11 20:09:24.523 UTC [55] LOG: database system was shut down at 2020-05-11 20:09:24 UTC + 2020-05-11 20:09:24.533 UTC [1] LOG: database system is ready to accept connections + ``` + +4. Access to PostgreSQL. + + ```bash + $ docker exec -it pulsar-postgres /bin/bash + ``` + +5. Create a PostgreSQL table _pulsar_postgres_jdbc_sink_. + + ```bash + $ psql -U postgres postgres + + postgres=# create table if not exists pulsar_postgres_jdbc_sink + ( + id serial PRIMARY KEY, + name VARCHAR(255) NOT NULL + ); + ``` + +### Configure a JDBC sink + +Now we have a PostgreSQL running locally. + +In this section, you need to configure a JDBC sink connector. + +1. Add a configuration file. + + To run a JDBC sink connector, you need to prepare a YAML configuration file including the information that Pulsar connector runtime needs to know. + + For example, how Pulsar connector can find the PostgreSQL cluster, what is the JDBC URL and the table that Pulsar connector uses for writing messages to. + + Create a _pulsar-postgres-jdbc-sink.yaml_ file, copy the following contents to this file, and place the file in the `pulsar/connectors` folder. + + ```yaml + configs: + userName: "postgres" + password: "password" + jdbcUrl: "jdbc:postgresql://localhost:5432/pulsar_postgres_jdbc_sink" + tableName: "pulsar_postgres_jdbc_sink" + ``` + +2. Create a schema. + + Create a _avro-schema_ file, copy the following contents to this file, and place the file in the `pulsar/connectors` folder. + + ```json + { + "type": "AVRO", + "schema": "{\"type\":\"record\",\"name\":\"Test\",\"fields\":[{\"name\":\"id\",\"type\":[\"null\",\"int\"]},{\"name\":\"name\",\"type\":[\"null\",\"string\"]}]}", + "properties": {} + } + ``` + + > #### Tip + > + > For more information about AVRO, see [Apache Avro](https://avro.apache.org/docs/1.9.1/). + + +3. Upload a schema to a topic. + + This example uploads the _avro-schema_ schema to the _pulsar-postgres-jdbc-sink-topic_ topic. + + ```bash + $ bin/pulsar-admin schemas upload pulsar-postgres-jdbc-sink-topic -f ./connectors/avro-schema + ``` + +4. Check if the schema has been uploaded successfully. + + ```bash + $ bin/pulsar-admin schemas get pulsar-postgres-jdbc-sink-topic + ``` + + The schema has been uploaded successfully if the following message appears. + + ```json + {"name":"pulsar-postgres-jdbc-sink-topic","schema":"{\"type\":\"record\",\"name\":\"Test\",\"fields\":[{\"name\":\"id\",\"type\":[\"null\",\"int\"]},{\"name\":\"name\",\"type\":[\"null\",\"string\"]}]}","type":"AVRO","properties":{}} + ``` + +### Create a JDBC sink + +You can use the [Connector Admin CLI](io-cli.md) +to create a sink connector and perform other operations on it. + +This example creates a sink connector and specifies the desired information. + +```bash +$ bin/pulsar-admin sinks create \ +--archive ./connectors/pulsar-io-jdbc-postgres-{{pulsar:version}}.nar \ +--inputs pulsar-postgres-jdbc-sink-topic \ +--name pulsar-postgres-jdbc-sink \ +--sink-config-file ./connectors/pulsar-postgres-jdbc-sink.yaml \ +--parallelism 1 +``` + +Once the command is executed, Pulsar creates a sink connector _pulsar-postgres-jdbc-sink_. + +This sink connector runs as a Pulsar Function and writes the messages produced in the topic _pulsar-postgres-jdbc-sink-topic_ to the PostgreSQL table _pulsar_postgres_jdbc_sink_. + + #### Tip + + Flag | Description | This example + ---|---|---| + `--archive` | The path to the archive file for the sink. | _pulsar-io-jdbc-postgres-{{pulsar:version}}.nar_ | + `--inputs` | The input topic(s) of the sink.

    Multiple topics can be specified as a comma-separated list.|| + `--name` | The name of the sink. | _pulsar-postgres-jdbc-sink_ | + `--sink-config-file` | The path to a YAML config file specifying the configuration of the sink. | _pulsar-postgres-jdbc-sink.yaml_ | + `--parallelism` | The parallelism factor of the sink.

    For example, the number of sink instances to run. | _1_ | + + > #### Tip + > + > For more information about `pulsar-admin sinks create options`, see [here](io-cli.md#sinks). + +The sink has been created successfully if the following message appears. + +```bash +"Created successfully" +``` + +### Inspect a JDBC sink + +You can use the [Connector Admin CLI](io-cli.md) +to monitor a connector and perform other operations on it. + +* List all running JDBC sink(s). + + ```bash + $ bin/pulsar-admin sinks list \ + --tenant public \ + --namespace default + ``` + + > #### Tip + > + > For more information about `pulsar-admin sinks list options`, see [here](io-cli.md/#list-1). + + The result shows that only the _postgres-jdbc-sink_ sink is running. + + ```json + [ + "pulsar-postgres-jdbc-sink" + ] + ``` + +* Get the information of a JDBC sink. + + ```bash + $ bin/pulsar-admin sinks get \ + --tenant public \ + --namespace default \ + --name pulsar-postgres-jdbc-sink + ``` + + > #### Tip + > + > For more information about `pulsar-admin sinks get options`, see [here](io-cli.md/#get-1). + + The result shows the information of the sink connector, including tenant, namespace, topic and so on. + + ```json + { + "tenant": "public", + "namespace": "default", + "name": "pulsar-postgres-jdbc-sink", + "className": "org.apache.pulsar.io.jdbc.PostgresJdbcAutoSchemaSink", + "inputSpecs": { + "pulsar-postgres-jdbc-sink-topic": { + "isRegexPattern": false + } + }, + "configs": { + "password": "password", + "jdbcUrl": "jdbc:postgresql://localhost:5432/pulsar_postgres_jdbc_sink", + "userName": "postgres", + "tableName": "pulsar_postgres_jdbc_sink" + }, + "parallelism": 1, + "processingGuarantees": "ATLEAST_ONCE", + "retainOrdering": false, + "autoAck": true + } + ``` + +* Get the status of a JDBC sink + + ```bash + $ bin/pulsar-admin sinks status \ + --tenant public \ + --namespace default \ + --name pulsar-postgres-jdbc-sink + ``` + + > #### Tip + > + > For more information about `pulsar-admin sinks status options`, see [here](io-cli.md/#status-1). + + The result shows the current status of sink connector, including the number of instance, running status, worker ID and so on. + + ```json + { + "numInstances" : 1, + "numRunning" : 1, + "instances" : [ { + "instanceId" : 0, + "status" : { + "running" : true, + "error" : "", + "numRestarts" : 0, + "numReadFromPulsar" : 0, + "numSystemExceptions" : 0, + "latestSystemExceptions" : [ ], + "numSinkExceptions" : 0, + "latestSinkExceptions" : [ ], + "numWrittenToSink" : 0, + "lastReceivedTime" : 0, + "workerId" : "c-standalone-fw-192.168.2.52-8080" + } + } ] + } + ``` + +### Stop a JDBC sink + +You can use the [Connector Admin CLI](io-cli.md) +to stop a connector and perform other operations on it. + +```bash +$ bin/pulsar-admin sinks stop \ +--tenant public \ +--namespace default \ +--name pulsar-postgres-jdbc-sink +``` + +> #### Tip +> +> For more information about `pulsar-admin sinks stop options`, see [here](io-cli.md/#stop-1). + +The sink instance has been stopped successfully if the following message disappears. + +```bash +"Stopped successfully" +``` + +### Restart a JDBC sink + +You can use the [Connector Admin CLI](io-cli.md) +to restart a connector and perform other operations on it. + +```bash +$ bin/pulsar-admin sinks restart \ +--tenant public \ +--namespace default \ +--name pulsar-postgres-jdbc-sink +``` + +> #### Tip +> +> For more information about `pulsar-admin sinks restart options`, see [here](io-cli.md/#restart-1). + +The sink instance has been started successfully if the following message disappears. + +```bash +"Started successfully" +``` + +> #### Tip +> +> * Optionally, you can run a standalone sink connector using `pulsar-admin sinks localrun options`. +> +> Note that `pulsar-admin sinks localrun options` **runs a sink connector locally**, while `pulsar-admin sinks start options` **starts a sink connector in a cluster**. +> +> * For more information about `pulsar-admin sinks localrun options`, see [here](io-cli.md#localrun-1). + +### Update a JDBC sink + +You can use the [Connector Admin CLI](io-cli.md) +to update a connector and perform other operations on it. + +This example updates the parallelism of the _pulsar-postgres-jdbc-sink_ sink connector to 2. + +```bash +$ bin/pulsar-admin sinks update \ +--name pulsar-postgres-jdbc-sink \ +--parallelism 2 +``` + +> #### Tip +> +> For more information about `pulsar-admin sinks update options`, see [here](io-cli.md/#update-1). + +The sink connector has been updated successfully if the following message disappears. + +```bash +"Updated successfully" +``` + +This example double-checks the information. + +```bash +$ bin/pulsar-admin sinks get \ +--tenant public \ +--namespace default \ +--name pulsar-postgres-jdbc-sink +``` + +The result shows that the parallelism is 2. + +```json +{ + "tenant": "public", + "namespace": "default", + "name": "pulsar-postgres-jdbc-sink", + "className": "org.apache.pulsar.io.jdbc.PostgresJdbcAutoSchemaSink", + "inputSpecs": { + "pulsar-postgres-jdbc-sink-topic": { + "isRegexPattern": false + } + }, + "configs": { + "password": "password", + "jdbcUrl": "jdbc:postgresql://localhost:5432/pulsar_postgres_jdbc_sink", + "userName": "postgres", + "tableName": "pulsar_postgres_jdbc_sink" + }, + "parallelism": 2, + "processingGuarantees": "ATLEAST_ONCE", + "retainOrdering": false, + "autoAck": true +} +``` + +### Delete a JDBC sink + +You can use the [Connector Admin CLI](io-cli.md) +to delete a connector and perform other operations on it. + +This example deletes the _pulsar-postgres-jdbc-sink_ sink connector. + +```bash +$ bin/pulsar-admin sinks delete \ +--tenant public \ +--namespace default \ +--name pulsar-postgres-jdbc-sink +``` + +> #### Tip +> +> For more information about `pulsar-admin sinks delete options`, see [here](io-cli.md/#delete-1). + +The sink connector has been deleted successfully if the following message appears. + +```text +"Deleted successfully" +``` + +This example double-checks the status of the sink connector. + +```bash +$ bin/pulsar-admin sinks get \ +--tenant public \ +--namespace default \ +--name pulsar-postgres-jdbc-sink +``` + +The result shows that the sink connector does not exist. + +```text +HTTP 404 Not Found + +Reason: Sink pulsar-postgres-jdbc-sink doesn't exist +``` diff --git a/site2/website/versioned_docs/version-2.7.0/io-rabbitmq-source.md b/site2/website/versioned_docs/version-2.7.0/io-rabbitmq-source.md new file mode 100644 index 00000000000000..4628fed92863f6 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/io-rabbitmq-source.md @@ -0,0 +1,81 @@ +--- +id: version-2.7.0-io-rabbitmq-source +title: RabbitMQ source connector +sidebar_label: RabbitMQ source connector +original_id: io-rabbitmq-source +--- + +The RabbitMQ source connector receives messages from RabbitMQ clusters +and writes messages to Pulsar topics. + +## Configuration + +The configuration of the RabbitMQ source connector has the following properties. + +### Property + +| Name | Type|Required | Default | Description +|------|----------|----------|---------|-------------| +| `connectionName` |String| true | " " (empty string) | The connection name. | +| `host` | String| true | " " (empty string) | The RabbitMQ host. | +| `port` | int |true | 5672 | The RabbitMQ port. | +| `virtualHost` |String|true | / | The virtual host used to connect to RabbitMQ. | +| `username` | String|false | guest | The username used to authenticate to RabbitMQ. | +| `password` | String|false | guest | The password used to authenticate to RabbitMQ. | +| `queueName` | String|true | " " (empty string) | The RabbitMQ queue name that messages should be read from or written to. | +| `requestedChannelMax` | int|false | 0 | The initially requested maximum channel number.

    0 means unlimited. | +| `requestedFrameMax` | int|false |0 | The initially requested maximum frame size in octets.

    0 means unlimited. | +| `connectionTimeout` | int|false | 60000 | The timeout of TCP connection establishment in milliseconds.

    0 means infinite. | +| `handshakeTimeout` | int|false | 10000 | The timeout of AMQP0-9-1 protocol handshake in milliseconds. | +| `requestedHeartbeat` | int|false | 60 | The requested heartbeat timeout in seconds. | +| `prefetchCount` | int|false | 0 | The maximum number of messages that the server delivers.

    0 means unlimited. | +| `prefetchGlobal` | boolean|false | false |Whether the setting should be applied to the entire channel rather than each consumer. | +| `passive` | boolean|false | false | Whether the rabbitmq consumer should create its own queue or bind to an existing one. | + +### Example + +Before using the RabbitMQ source connector, you need to create a configuration file through one of the following methods. + +* JSON + + ```json + { + "host": "localhost", + "port": "5672", + "virtualHost": "/", + "username": "guest", + "password": "guest", + "queueName": "test-queue", + "connectionName": "test-connection", + "requestedChannelMax": "0", + "requestedFrameMax": "0", + "connectionTimeout": "60000", + "handshakeTimeout": "10000", + "requestedHeartbeat": "60", + "prefetchCount": "0", + "prefetchGlobal": "false", + "passive": "false" + } + ``` + +* YAML + + ```yaml + configs: + host: "localhost" + port: 5672 + virtualHost: "/" + username: "guest" + password: "guest" + queueName: "test-queue" + connectionName: "test-connection" + requestedChannelMax: 0 + requestedFrameMax: 0 + connectionTimeout: 60000 + handshakeTimeout: 10000 + requestedHeartbeat: 60 + prefetchCount: 0 + prefetchGlobal: "false" + passive: "false" + ``` + diff --git a/site2/website/versioned_docs/version-2.7.0/io-use.md b/site2/website/versioned_docs/version-2.7.0/io-use.md new file mode 100644 index 00000000000000..ed94198e1dbd7b --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/io-use.md @@ -0,0 +1,1505 @@ +--- +id: version-2.7.0-io-use +title: How to use Pulsar connectors +sidebar_label: Use +original_id: io-use +--- + +This guide describes how to use Pulsar connectors. + +## Install a connector + +Pulsar bundles several [builtin connectors](io-connectors.md) used to move data in and out of commonly used systems (such as database and messaging system). Optionally, you can create and use your desired non-builtin connectors. + +> #### Note +> +> When using a non-builtin connector, you need to specify the path of a archive file for the connector. + +To set up a builtin connector, follow +the instructions [here](getting-started-standalone.md#installing-builtin-connectors). + +After the setup, the builtin connector is automatically discovered by Pulsar brokers (or function-workers), so no additional installation steps are required. + +## Configure a connector + +You can configure the following information: + +* [Configure a default storage location for a connector](#configure-a-default-storage-location-for-a-connector) + +* [Configure a connector with a YAML file](#configure-a-connector-with-yaml-file) + +### Configure a default storage location for a connector + +To configure a default folder for builtin connectors, set the `connectorsDirectory` parameter in the `./conf/functions_worker.yml` configuration file. + +**Example** + +Set the `./connectors` folder as the default storage location for builtin connectors. + +``` +######################## +# Connectors +######################## + +connectorsDirectory: ./connectors +``` + +### Configure a connector with a YAML file + +To configure a connector, you need to provide a YAML configuration file when creating a connector. + +The YAML configuration file tells Pulsar where to locate connectors and how to connect connectors with Pulsar topics. + +**Example 1** + +Below is a YAML configuration file of a Cassandra sink, which tells Pulsar: + +* Which Cassandra cluster to connect + +* What is the `keyspace` and `columnFamily` to be used in Cassandra for collecting data + +* How to map Pulsar messages into Cassandra table key and columns + +```shell +tenant: public +namespace: default +name: cassandra-test-sink +... +# cassandra specific config +configs: + roots: "localhost:9042" + keyspace: "pulsar_test_keyspace" + columnFamily: "pulsar_test_table" + keyname: "key" + columnName: "col" +``` + +**Example 2** + +Below is a YAML configuration file of a Kafka source. + +```shell +configs: + bootstrapServers: "pulsar-kafka:9092" + groupId: "test-pulsar-io" + topic: "my-topic" + sessionTimeoutMs: "10000" + autoCommitEnabled: "false" +``` + +**Example 3** + +Below is a YAML configuration file of a PostgreSQL JDBC sink. + +```shell +configs: + userName: "postgres" + password: "password" + jdbcUrl: "jdbc:postgresql://localhost:5432/test_jdbc" + tableName: "test_jdbc" +``` + +## Get available connectors + +Before starting using connectors, you can perform the following operations: + +* [Reload connectors](#reload) + +* [Get a list of available connectors](#get-available-connectors) + +### `reload` + +If you add or delete a nar file in a connector folder, reload the available builtin connector before using it. + +#### Source + +Use the `reload` subcommand. + +```shell +$ pulsar-admin sources reload +``` + +For more information, see [`here`](io-cli.md#reload). + +#### Sink + +Use the `reload` subcommand. + +```shell +$ pulsar-admin sinks reload +``` + +For more information, see [`here`](io-cli.md#reload-1). + +### `available` + +After reloading connectors (optional), you can get a list of available connectors. + +#### Source + +Use the `available-sources` subcommand. + +```shell +$ pulsar-admin sources available-sources +``` + +#### Sink + +Use the `available-sinks` subcommand. + +```shell +$ pulsar-admin sinks available-sinks +``` + +## Run a connector + +To run a connector, you can perform the following operations: + +* [Create a connector](#create) + +* [Start a connector](#start) + +* [Run a connector locally](#localrun) + +### `create` + +You can create a connector using **Admin CLI**, **REST API** or **JAVA admin API**.f + +#### Source + +Create a source connector. + + + + + +Use the `create` subcommand. + +``` +$ pulsar-admin sources create options +``` + +For more information, see [here](io-cli.md#create). + + + +Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sources/:tenant/:namespace/:sourceName|operation/registerSource} + + + +* Create a source connector with a **local file**. + + ```java + void createSource(SourceConfig sourceConfig, + String fileName) + throws PulsarAdminException + ``` + + **Parameter** + + |Name|Description + |---|--- + `sourceConfig` | The source configuration object + + **Exception** + + |Name|Description| + |---|--- + | `PulsarAdminException` | Unexpected error + + For more information, see [`createSource`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Source.html#createSource-SourceConfig-java.lang.String-). + +* Create a source connector using a **remote file** with a URL from which fun-pkg can be downloaded. + + ```java + void createSourceWithUrl(SourceConfig sourceConfig, + String pkgUrl) + throws PulsarAdminException + ``` + + Supported URLs are `http` and `file`. + + **Example** + + * HTTP: http://www.repo.com/fileName.jar + + * File: file:///dir/fileName.jar + + **Parameter** + + Parameter| Description + |---|--- + `sourceConfig` | The source configuration object + `pkgUrl` | URL from which pkg can be downloaded + + **Exception** + + |Name|Description| + |---|--- + | `PulsarAdminException` | Unexpected error + + For more information, see [`createSourceWithUrl`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Source.html#createSourceWithUrl-SourceConfig-java.lang.String-). + + + +#### Sink + +Create a sink connector. + + + + + +Use the `create` subcommand. + +``` +$ pulsar-admin sinks create options +``` + +For more information, see [here](io-cli.md#create-1). + + + +Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sinks/:tenant/:namespace/:sinkName|operation/registerSink} + + + +* Create a sink connector with a **local file**. + + ```java + void createSink(SinkConfig sinkConfig, + String fileName) + throws PulsarAdminException + ``` + + **Parameter** + + |Name|Description + |---|--- + `sinkConfig` | The sink configuration object + + **Exception** + + |Name|Description| + |---|--- + | `PulsarAdminException` | Unexpected error + + For more information, see [`createSink`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Sink.html#createSink-SinkConfig-java.lang.String-). + +* Create a sink connector using a **remote file** with a URL from which fun-pkg can be downloaded. + + ```java + void createSinkWithUrl(SinkConfig sinkConfig, + String pkgUrl) + throws PulsarAdminException + ``` + + Supported URLs are `http` and `file`. + + **Example** + + * HTTP: http://www.repo.com/fileName.jar + + * File: file:///dir/fileName.jar + + **Parameter** + + Parameter| Description + |---|--- + `sinkConfig` | The sink configuration object + `pkgUrl` | URL from which pkg can be downloaded + + **Exception** + + |Name|Description| + |---|--- + | `PulsarAdminException` | Unexpected error + + For more information, see [`createSinkWithUrl`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Sink.html#createSinkWithUrl-SinkConfig-java.lang.String-). + + + +### `start` + +You can start a connector using **Admin CLI** or **REST API**. + +#### Source + +Start a source connector. + + + + + +Use the `start` subcommand. + +``` +$ pulsar-admin sources start options +``` + +For more information, see [here](io-cli.md#start). + + + +* Start **all** source connectors. + + Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sources/:tenant/:namespace/:sourceName/start|operation/startSource} + +* Start a **specified** source connector. + + Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sources/:tenant/:namespace/:sourceName/:instanceId/start|operation/startSource} + + + +#### Sink + +Start a sink connector. + + + + + +Use the `start` subcommand. + +``` +$ pulsar-admin sinks start options +``` + +For more information, see [here](io-cli.md#start-1). + + + +* Start **all** sink connectors. + + Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sources/:tenant/:namespace/:sinkName/start|operation/startSink} + +* Start a **specified** sink connector. + + Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sinks/:tenant/:namespace/:sourceName/:instanceId/start|operation/startSink} + + + +### `localrun` + +You can run a connector locally rather than deploying it on a Pulsar cluster using **Admin CLI**. + +#### Source + +Run a source connector locally. + + + + + +Use the `localrun` subcommand. + +``` +$ pulsar-admin sources localrun options +``` + +For more information, see [here](io-cli.md#localrun). + + + +#### Sink + +Run a sink connector locally. + + + + + +Use the `localrun` subcommand. + +``` +$ pulsar-admin sinks localrun options +``` + +For more information, see [here](io-cli.md#localrun-1). + + + +## Monitor a connector + +To monitor a connector, you can perform the following operations: + +* [Get the information of a connector](#get) + +* [Get the list of all running connectors](#list) + +* [Get the current status of a connector](#status) + +### `get` + +You can get the information of a connector using **Admin CLI**, **REST API** or **JAVA admin API**. + +#### Source + +Get the information of a source connector. + + + + + +Use the `get` subcommand. + +``` +$ pulsar-admin sources get options +``` + +For more information, see [here](io-cli.md#get). + + + +Send a `GET` request to this endpoint: {@inject: endpoint|GET|/admin/v3/sources/:tenant/:namespace/:sourceName|operation/getSourceInfo} + + + +```java +SourceConfig getSource(String tenant, + String namespace, + String source) + throws PulsarAdminException +``` + +**Example** + +This is a sourceConfig. + +```java +{ + "tenant": "tenantName", + "namespace": "namespaceName", + "name": "sourceName", + "className": "className", + "topicName": "topicName", + "configs": {}, + "parallelism": 1, + "processingGuarantees": "ATLEAST_ONCE", + "resources": { + "cpu": 1.0, + "ram": 1073741824, + "disk": 10737418240 + } +} +``` + +This is a sourceConfig example. + +``` +{ + "tenant": "public", + "namespace": "default", + "name": "debezium-mysql-source", + "className": "org.apache.pulsar.io.debezium.mysql.DebeziumMysqlSource", + "topicName": "debezium-mysql-topic", + "configs": { + "database.user": "debezium", + "database.server.id": "184054", + "database.server.name": "dbserver1", + "database.port": "3306", + "database.hostname": "localhost", + "database.password": "dbz", + "database.history.pulsar.service.url": "pulsar://127.0.0.1:6650", + "value.converter": "org.apache.kafka.connect.json.JsonConverter", + "database.whitelist": "inventory", + "key.converter": "org.apache.kafka.connect.json.JsonConverter", + "database.history": "org.apache.pulsar.io.debezium.PulsarDatabaseHistory", + "pulsar.service.url": "pulsar://127.0.0.1:6650", + "database.history.pulsar.topic": "history-topic2" + }, + "parallelism": 1, + "processingGuarantees": "ATLEAST_ONCE", + "resources": { + "cpu": 1.0, + "ram": 1073741824, + "disk": 10737418240 + } +} +``` + +**Exception** + +Exception name | Description +|---|--- +`PulsarAdminException.NotAuthorizedException` | You don't have the admin permission +`PulsarAdminException.NotFoundException` | Cluster doesn't exist +`PulsarAdminException` | Unexpected error + +For more information, see [`getSource`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Source.html#getSource-java.lang.String-java.lang.String-java.lang.String-). + + + +#### Sink + +Get the information of a sink connector. + + + + + +Use the `get` subcommand. + +``` +$ pulsar-admin sinks get options +``` + +For more information, see [here](io-cli.md#get-1). + + + +Send a `GET` request to this endpoint: {@inject: endpoint|GET|/admin/v3/sinks/:tenant/:namespace/:sinkName|operation/getSinkInfo} + + + +```java +SinkConfig getSink(String tenant, + String namespace, + String sink) + throws PulsarAdminException +``` + +**Example** + +This is a sinkConfig. + +```json +{ +"tenant": "tenantName", +"namespace": "namespaceName", +"name": "sinkName", +"className": "className", +"inputSpecs": { +"topicName": { + "isRegexPattern": false +} +}, +"configs": {}, +"parallelism": 1, +"processingGuarantees": "ATLEAST_ONCE", +"retainOrdering": false, +"autoAck": true +} +``` + +This is a sinkConfig example. + +```json +{ + "tenant": "public", + "namespace": "default", + "name": "pulsar-postgres-jdbc-sink", + "className": "org.apache.pulsar.io.jdbc.PostgresJdbcAutoSchemaSink", + "inputSpecs": { + "pulsar-postgres-jdbc-sink-topic": { + "isRegexPattern": false + } + }, + "configs": { + "password": "password", + "jdbcUrl": "jdbc:postgresql://localhost:5432/pulsar_postgres_jdbc_sink", + "userName": "postgres", + "tableName": "pulsar_postgres_jdbc_sink" + }, + "parallelism": 1, + "processingGuarantees": "ATLEAST_ONCE", + "retainOrdering": false, + "autoAck": true +} +``` + +**Parameter description** + +Name| Description +|---|--- +`tenant` | Tenant name +`namespace` | Namespace name +`sink` | Sink name + +For more information, see [`getSink`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Sink.html#getSink-java.lang.String-java.lang.String-java.lang.String-). + + + +### `list` + +You can get the list of all running connectors using **Admin CLI**, **REST API** or **JAVA admin API**. + +#### Source + +Get the list of all running source connectors. + + + + + +Use the `list` subcommand. + +``` +$ pulsar-admin sources list options +``` + +For more information, see [here](io-cli.md#list). + + + +Send a `GET` request to this endpoint: {@inject: endpoint|GET|/admin/v3/sources/:tenant/:namespace/|operation/listSources} + + + +```java +List listSources(String tenant, + String namespace) + throws PulsarAdminException +``` + +**Response example** + +```java +["f1", "f2", "f3"] +``` + +**Exception** + +Exception name | Description +|---|--- +`PulsarAdminException.NotAuthorizedException` | You don't have the admin permission +`PulsarAdminException` | Unexpected error + +For more information, see [`listSource`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Source.html#listSources-java.lang.String-java.lang.String-). + + + +#### Sink + +Get the list of all running sink connectors. + + + + + +Use the `list` subcommand. + +``` +$ pulsar-admin sinks list options +``` + +For more information, see [here](io-cli.md#list-1). + + + +Send a `GET` request to this endpoint: {@inject: endpoint|GET|/admin/v3/sinks/:tenant/:namespace/|operation/listSinks} + + + +```java +List listSinks(String tenant, + String namespace) + throws PulsarAdminException +``` + +**Response example** + +```java +["f1", "f2", "f3"] +``` + +**Exception** + +Exception name | Description +|---|--- +`PulsarAdminException.NotAuthorizedException` | You don't have the admin permission +`PulsarAdminException` | Unexpected error + +For more information, see [`listSource`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Sink.html#listSinks-java.lang.String-java.lang.String-). + + + +### `status` + +You can get the current status of a connector using **Admin CLI**, **REST API** or **JAVA admin API**. + +#### Source + +Get the current status of a source connector. + + + + + +Use the `status` subcommand. + +``` +$ pulsar-admin sources status options +``` + +For more information, see [here](io-cli.md#status). + + + +* Get the current status of **all** source connectors. + + Send a `GET` request to this endpoint: {@inject: endpoint|GET|/admin/v3/sources/:tenant/:namespace/:sourceName/status|operation/getSourceStatus} + +* Gets the current status of a **specified** source connector. + + Send a `GET` request to this endpoint: {@inject: endpoint|GET|/admin/v3/sources/:tenant/:namespace/:sourceName/:instanceId/status|operation/getSourceStatus} + + + +* Get the current status of **all** source connectors. + + ```java + SourceStatus getSourceStatus(String tenant, + String namespace, + String source) + throws PulsarAdminException + ``` + + **Parameter** + + Parameter| Description + |---|--- + `tenant` | Tenant name + `namespace` | Namespace name + `sink` | Source name + + **Exception** + + Name | Description + |---|--- + `PulsarAdminException` | Unexpected error + + For more information, see [`getSourceStatus`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Source.html#getSource-java.lang.String-java.lang.String-java.lang.String-). + +* Gets the current status of a **specified** source connector. + + ```java + SourceStatus.SourceInstanceStatus.SourceInstanceStatusData getSourceStatus(String tenant, + String namespace, + String source, + int id) + throws PulsarAdminException + ``` + + **Parameter** + + Parameter| Description + |---|--- + `tenant` | Tenant name + `namespace` | Namespace name + `sink` | Source name + `id` | Source instanceID + + **Exception** + + Exception name | Description + |---|--- + `PulsarAdminException` | Unexpected error + + For more information, see [`getSourceStatus`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Source.html#getSourceStatus-java.lang.String-java.lang.String-java.lang.String-int-). + + + +#### Sink + +Get the current status of a Pulsar sink connector. + + + + + +Use the `status` subcommand. + +``` +$ pulsar-admin sinks status options +``` + +For more information, see [here](io-cli.md#status-1). + + + +* Get the current status of **all** sink connectors. + + Send a `GET` request to this endpoint: {@inject: endpoint|GET|/admin/v3/sinks/:tenant/:namespace/:sinkName/status|operation/getSinkStatus} + +* Gets the current status of a **specified** sink connector. + + Send a `GET` request to this endpoint: {@inject: endpoint|GET|/admin/v3/sinks/:tenant/:namespace/:sourceName/:instanceId/status|operation/getSinkInstanceStatus} + + + +* Get the current status of **all** sink connectors. + + ```java + SinkStatus getSinkStatus(String tenant, + String namespace, + String sink) + throws PulsarAdminException + ``` + + **Parameter** + + Parameter| Description + |---|--- + `tenant` | Tenant name + `namespace` | Namespace name + `sink` | Source name + + **Exception** + + Exception name | Description + |---|--- + `PulsarAdminException` | Unexpected error + + For more information, see [`getSinkStatus`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Sink.html#getSinkStatus-java.lang.String-java.lang.String-java.lang.String-). + +* Gets the current status of a **specified** source connector. + + ```java + SinkStatus.SinkInstanceStatus.SinkInstanceStatusData getSinkStatus(String tenant, + String namespace, + String sink, + int id) + throws PulsarAdminException + ``` + + **Parameter** + + Parameter| Description + |---|--- + `tenant` | Tenant name + `namespace` | Namespace name + `sink` | Source name + `id` | Sink instanceID + + **Exception** + + Exception name | Description + |---|--- + `PulsarAdminException` | Unexpected error + + For more information, see [`getSinkStatusWithInstanceID`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Sink.html#getSinkStatus-java.lang.String-java.lang.String-java.lang.String-int-). + + + +## Update a connector + +### `update` + +You can update a running connector using **Admin CLI**, **REST API** or **JAVA admin API**. + +#### Source + +Update a running Pulsar source connector. + + + + + +Use the `update` subcommand. + +``` +$ pulsar-admin sources update options +``` + +For more information, see [here](io-cli.md#update). + + + +Send a `PUT` request to this endpoint: {@inject: endpoint|PUT|/admin/v3/sources/:tenant/:namespace/:sourceName|operation/updateSource} + + + +* Update a running source connector with a **local file**. + + ```java + void updateSource(SourceConfig sourceConfig, + String fileName) + throws PulsarAdminException + ``` + + **Parameter** + + | Name | Description + |---|--- + |`sourceConfig` | The source configuration object + + **Exception** + + |Name|Description| + |---|--- + |`PulsarAdminException.NotAuthorizedException`| You don't have the admin permission + | `PulsarAdminException.NotFoundException` | Cluster doesn't exist + | `PulsarAdminException` | Unexpected error + + For more information, see [`updateSource`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Source.html#updateSource-SourceConfig-java.lang.String-). + +* Update a source connector using a **remote file** with a URL from which fun-pkg can be downloaded. + + ```java + void updateSourceWithUrl(SourceConfig sourceConfig, + String pkgUrl) + throws PulsarAdminException + ``` + + Supported URLs are `http` and `file`. + + **Example** + + * HTTP: http://www.repo.com/fileName.jar + + * File: file:///dir/fileName.jar + + **Parameter** + + | Name | Description + |---|--- + | `sourceConfig` | The source configuration object + | `pkgUrl` | URL from which pkg can be downloaded + + **Exception** + + |Name|Description| + |---|--- + |`PulsarAdminException.NotAuthorizedException`| You don't have the admin permission + | `PulsarAdminException.NotFoundException` | Cluster doesn't exist + | `PulsarAdminException` | Unexpected error + +For more information, see [`createSourceWithUrl`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Source.html#updateSourceWithUrl-SourceConfig-java.lang.String-). + + + +#### Sink + +Update a running Pulsar sink connector. + + + + + +Use the `update` subcommand. + +``` +$ pulsar-admin sinks update options +``` + +For more information, see [here](io-cli.md#update-1). + + + +Send a `PUT` request to this endpoint: {@inject: endpoint|PUT|/admin/v3/sinks/:tenant/:namespace/:sinkName|operation/updateSink} + + + +* Update a running sink connector with a **local file**. + + ```java + void updateSink(SinkConfig sinkConfig, + String fileName) + throws PulsarAdminException + ``` + + **Parameter** + + | Name | Description + |---|--- + |`sinkConfig` | The sink configuration object + + **Exception** + + |Name|Description| + |---|--- + |`PulsarAdminException.NotAuthorizedException`| You don't have the admin permission + | `PulsarAdminException.NotFoundException` | Cluster doesn't exist + | `PulsarAdminException` | Unexpected error + + For more information, see [`updateSink`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Sink.html#updateSink-SinkConfig-java.lang.String-). + +* Update a sink connector using a **remote file** with a URL from which fun-pkg can be downloaded. + + ```java + void updateSinkWithUrl(SinkConfig sinkConfig, + String pkgUrl) + throws PulsarAdminException + ``` + + Supported URLs are `http` and `file`. + + **Example** + + * HTTP: http://www.repo.com/fileName.jar + + * File: file:///dir/fileName.jar + + **Parameter** + + | Name | Description + |---|--- + | `sinkConfig` | The sink configuration object + | `pkgUrl` | URL from which pkg can be downloaded + + **Exception** + + |Name|Description| + |---|--- + |`PulsarAdminException.NotAuthorizedException`| You don't have the admin permission + |`PulsarAdminException.NotFoundException` | Cluster doesn't exist + |`PulsarAdminException` | Unexpected error + +For more information, see [`updateSinkWithUrl`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Sink.html#updateSinkWithUrl-SinkConfig-java.lang.String-). + + + +## Stop a connector + +### `stop` + +You can stop a connector using **Admin CLI**, **REST API** or **JAVA admin API**. + +#### Source + +Stop a source connector. + + + + + +Use the `stop` subcommand. + +``` +$ pulsar-admin sources stop options +``` + +For more information, see [here](io-cli.md#stop). + + + +* Stop **all** source connectors. + + Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sources/:tenant/:namespace/:sourceName|operation/stopSource} + +* Stop a **specified** source connector. + + Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sources/:tenant/:namespace/:sourceName/:instanceId|operation/stopSource} + + + +* Stop **all** source connectors. + + ```java + void stopSource(String tenant, + String namespace, + String source) + throws PulsarAdminException + ``` + + **Parameter** + + | Name | Description + |---|--- + `tenant` | Tenant name + `namespace` | Namespace name + `source` | Source name + + **Exception** + + |Name|Description| + |---|--- + | `PulsarAdminException` | Unexpected error + + For more information, see [`stopSource`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Source.html#stopSource-java.lang.String-java.lang.String-java.lang.String-). + +* Stop a **specified** source connector. + + ```java + void stopSource(String tenant, + String namespace, + String source, + int instanceId) + throws PulsarAdminException + ``` + + **Parameter** + + | Name | Description + |---|--- + `tenant` | Tenant name + `namespace` | Namespace name + `source` | Source name + `instanceId` | Source instanceID + + **Exception** + + |Name|Description| + |---|--- + | `PulsarAdminException` | Unexpected error + + For more information, see [`stopSource`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Source.html#stopSource-java.lang.String-java.lang.String-java.lang.String-int-). + + + +#### Sink + +Stop a sink connector. + + + + + +Use the `stop` subcommand. + +``` +$ pulsar-admin sinks stop options +``` + +For more information, see [here](io-cli.md#stop-1). + + + +* Stop **all** sink connectors. + + Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sinks/:tenant/:namespace/:sinkName/stop|operation/stopSink} + +* Stop a **specified** sink connector. + + Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sources/:tenant/:namespace/:sinkeName/:instanceId/stop|operation/stopSink} + + + +* Stop **all** sink connectors. + + ```java + void stopSink(String tenant, + String namespace, + String sink) + throws PulsarAdminException + ``` + + **Parameter** + + | Name | Description + |---|--- + `tenant` | Tenant name + `namespace` | Namespace name + `source` | Source name + + **Exception** + + |Name|Description| + |---|--- + | `PulsarAdminException` | Unexpected error + + For more information, see [`stopSink`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Sink.html#stopSink-java.lang.String-java.lang.String-java.lang.String-). + +* Stop a **specified** sink connector. + + ```java + void stopSink(String tenant, + String namespace, + String sink, + int instanceId) + throws PulsarAdminException + ``` + + **Parameter** + + | Name | Description + |---|--- + `tenant` | Tenant name + `namespace` | Namespace name + `source` | Source name + `instanceId` | Source instanceID + + **Exception** + + |Name|Description| + |---|--- + | `PulsarAdminException` | Unexpected error + + For more information, see [`stopSink`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Sink.html#stopSink-java.lang.String-java.lang.String-java.lang.String-int-). + + + +## Restart a connector + +### `restart` + +You can restart a connector using **Admin CLI**, **REST API** or **JAVA admin API**. + +#### Source + +Restart a source connector. + + + + + +Use the `restart` subcommand. + +``` +$ pulsar-admin sources restart options +``` + +For more information, see [here](io-cli.md#restart). + + + +* Restart **all** source connectors. + + Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sources/:tenant/:namespace/:sourceName/restart|operation/restartSource} + +* Restart a **specified** source connector. + + Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sources/:tenant/:namespace/:sourceName/:instanceId/restart|operation/restartSource} + + + +* Restart **all** source connectors. + + ```java + void restartSource(String tenant, + String namespace, + String source) + throws PulsarAdminException + ``` + + **Parameter** + + | Name | Description + |---|--- + `tenant` | Tenant name + `namespace` | Namespace name + `source` | Source name + + **Exception** + + |Name|Description| + |---|--- + | `PulsarAdminException` | Unexpected error + + For more information, see [`restartSource`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Source.html#restartSource-java.lang.String-java.lang.String-java.lang.String-). + +* Restart a **specified** source connector. + + ```java + void restartSource(String tenant, + String namespace, + String source, + int instanceId) + throws PulsarAdminException + ``` + + **Parameter** + + | Name | Description + |---|--- + `tenant` | Tenant name + `namespace` | Namespace name + `source` | Source name + `instanceId` | Source instanceID + + **Exception** + + |Name|Description| + |---|--- + | `PulsarAdminException` | Unexpected error + + For more information, see [`restartSource`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Source.html#restartSource-java.lang.String-java.lang.String-java.lang.String-int-). + + + +#### Sink + +Restart a sink connector. + + + + + +Use the `restart` subcommand. + +``` +$ pulsar-admin sinks restart options +``` + +For more information, see [here](io-cli.md#restart-1). + + + +* Restart **all** sink connectors. + + Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sources/:tenant/:namespace/:sinkName/restart|operation/restartSource} + +* Restart a **specified** sink connector. + + Send a `POST` request to this endpoint: {@inject: endpoint|POST|/admin/v3/sources/:tenant/:namespace/:sinkName/:instanceId/restart|operation/restartSource} + + + +* Restart all Pulsar sink connectors. + + ```java + void restartSink(String tenant, + String namespace, + String sink) + throws PulsarAdminException + ``` + + **Parameter** + + | Name | Description + |---|--- + `tenant` | Tenant name + `namespace` | Namespace name + `sink` | Sink name + + **Exception** + + |Name|Description| + |---|--- + | `PulsarAdminException` | Unexpected error + + For more information, see [`restartSink`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Sink.html#restartSink-java.lang.String-java.lang.String-java.lang.String-). + +* Restart a **specified** sink connector. + + ```java + void restartSink(String tenant, + String namespace, + String sink, + int instanceId) + throws PulsarAdminException + ``` + + **Parameter** + + | Name | Description + |---|--- + `tenant` | Tenant name + `namespace` | Namespace name + `source` | Source name + `instanceId` | Sink instanceID + + **Exception** + + |Name|Description| + |---|--- + | `PulsarAdminException` | Unexpected error + + For more information, see [`restartSink`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Sink.html#restartSink-java.lang.String-java.lang.String-java.lang.String-int-). + + + +## Delete a connector + +### `delete` + +You can delete a connector using **Admin CLI**, **REST API** or **JAVA admin API**. + +#### Source + +Delete a source connector. + + + + + +Use the `delete` subcommand. + +``` +$ pulsar-admin sources delete options +``` + +For more information, see [here](io-cli.md#delete). + + + +Delete al Pulsar source connector. + +Send a `DELETE` request to this endpoint: {@inject: endpoint|DELETE|/admin/v3/sources/:tenant/:namespace/:sourceName|operation/deregisterSource} + + + +Delete a source connector. + +```java +void deleteSource(String tenant, + String namespace, + String source) + throws PulsarAdminException +``` + +**Parameter** + +| Name | Description +|---|--- +`tenant` | Tenant name +`namespace` | Namespace name +`source` | Source name + +**Exception** + +|Name|Description| +|---|--- +|`PulsarAdminException.NotAuthorizedException`| You don't have the admin permission +| `PulsarAdminException.NotFoundException` | Cluster doesn't exist +| `PulsarAdminException.PreconditionFailedException` | Cluster is not empty +| `PulsarAdminException` | Unexpected error + +For more information, see [`deleteSource`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Source.html#deleteSource-java.lang.String-java.lang.String-java.lang.String-). + + + +#### Sink + +Delete a sink connector. + + + + + +Use the `delete` subcommand. + +``` +$ pulsar-admin sinks delete options +``` + +For more information, see [here](io-cli.md#delete-1). + + + +Delete a sink connector. + +Send a `DELETE` request to this endpoint: {@inject: endpoint|DELETE|/admin/v3/sinks/:tenant/:namespace/:sinkName|operation/deregisterSink} + + + +Delete a Pulsar sink connector. + +```java +void deleteSink(String tenant, + String namespace, + String source) + throws PulsarAdminException +``` + +**Parameter** + +| Name | Description +|---|--- +`tenant` | Tenant name +`namespace` | Namespace name +`sink` | Sink name + +**Exception** + +|Name|Description| +|---|--- +|`PulsarAdminException.NotAuthorizedException`| You don't have the admin permission +| `PulsarAdminException.NotFoundException` | Cluster doesn't exist +| `PulsarAdminException.PreconditionFailedException` | Cluster is not empty +| `PulsarAdminException` | Unexpected error + +For more information, see [`deleteSource`](https://pulsar.apache.org/api/admin/org/apache/pulsar/client/admin/Sink.html#deleteSink-java.lang.String-java.lang.String-java.lang.String-). + + diff --git a/site2/website/versioned_docs/version-2.7.0/reference-cli-tools.md b/site2/website/versioned_docs/version-2.7.0/reference-cli-tools.md new file mode 100644 index 00000000000000..01f7ce59dad1b9 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/reference-cli-tools.md @@ -0,0 +1,745 @@ +--- +id: version-2.7.0-reference-cli-tools +title: Pulsar command-line tools +sidebar_label: Pulsar CLI tools +original_id: reference-cli-tools +--- + +Pulsar offers several command-line tools that you can use for managing Pulsar installations, performance testing, using command-line producers and consumers, and more. + +All Pulsar command-line tools can be run from the `bin` directory of your [installed Pulsar package](getting-started-standalone.md). The following tools are currently documented: + +* [`pulsar`](#pulsar) +* [`pulsar-client`](#pulsar-client) +* [`pulsar-daemon`](#pulsar-daemon) +* [`pulsar-perf`](#pulsar-perf) +* [`bookkeeper`](#bookkeeper) +* [`broker-tool`](#broker-tool) + +> ### Getting help +> You can get help for any CLI tool, command, or subcommand using the `--help` flag, or `-h` for short. Here's an example: +> ```shell +> $ bin/pulsar broker --help +> ``` + +## `pulsar` + +The pulsar tool is used to start Pulsar components, such as bookies and ZooKeeper, in the foreground. + +These processes can also be started in the background, using nohup, using the pulsar-daemon tool, which has the same command interface as pulsar. + +Usage: +```bash +$ pulsar command +``` +Commands: +* `bookie` +* `broker` +* `compact-topic` +* `discovery` +* `configuration-store` +* `initialize-cluster-metadata` +* `proxy` +* `standalone` +* `websocket` +* `zookeeper` +* `zookeeper-shell` + +Example: +```bash +$ PULSAR_BROKER_CONF=/path/to/broker.conf pulsar broker +``` + +The table below lists the environment variables that you can use to configure the `pulsar` tool. + +|Variable|Description|Default| +|---|---|---| +|`PULSAR_LOG_CONF`|Log4j configuration file|`conf/log4j2.yaml`| +|`PULSAR_BROKER_CONF`|Configuration file for broker|`conf/broker.conf`| +|`PULSAR_BOOKKEEPER_CONF`|description: Configuration file for bookie|`conf/bookkeeper.conf`| +|`PULSAR_ZK_CONF`|Configuration file for zookeeper|`conf/zookeeper.conf`| +|`PULSAR_CONFIGURATION_STORE_CONF`|Configuration file for the configuration store|`conf/global_zookeeper.conf`| +|`PULSAR_DISCOVERY_CONF`|Configuration file for discovery service|`conf/discovery.conf`| +|`PULSAR_WEBSOCKET_CONF`|Configuration file for websocket proxy|`conf/websocket.conf`| +|`PULSAR_STANDALONE_CONF`|Configuration file for standalone|`conf/standalone.conf`| +|`PULSAR_EXTRA_OPTS`|Extra options to be passed to the jvm|| +|`PULSAR_EXTRA_CLASSPATH`|Extra paths for Pulsar's classpath|| +|`PULSAR_PID_DIR`|Folder where the pulsar server PID file should be stored|| +|`PULSAR_STOP_TIMEOUT`|Wait time before forcefully killing the Bookie server instance if attempts to stop it are not successful|| + + + +### `bookie` + +Starts up a bookie server + +Usage: +```bash +$ pulsar bookie options +``` + +Options + +|Option|Description|Default| +|---|---|---| +|`-readOnly`|Force start a read-only bookie server|false| +|`-withAutoRecovery`|Start auto-recover service bookie server|false| + + +Example +```bash +$ PULSAR_BOOKKEEPER_CONF=/path/to/bookkeeper.conf pulsar bookie \ + -readOnly \ + -withAutoRecovery +``` + +### `broker` + +Starts up a Pulsar broker + +Usage +```bash +$ pulsar broker options +``` + +Options +|Option|Description|Default| +|---|---|---| +|`-bc` , `--bookie-conf`|Configuration file for BookKeeper|| +|`-rb` , `--run-bookie`|Run a BookKeeper bookie on the same host as the Pulsar broker|false| +|`-ra` , `--run-bookie-autorecovery`|Run a BookKeeper autorecovery daemon on the same host as the Pulsar broker|false| + +Example +```bash +$ PULSAR_BROKER_CONF=/path/to/broker.conf pulsar broker +``` + +### `compact-topic` + +Run compaction against a Pulsar topic (in a new process) + +Usage +```bash +$ pulsar compact-topic options +``` +Options +|Flag|Description|Default| +|---|---|---| +|`-t` , `--topic`|The Pulsar topic that you would like to compact|| + +Example +```bash +$ pulsar compact-topic --topic topic-to-compact +``` + +### `discovery` + +Run a discovery server + +Usage +```bash +$ pulsar discovery +``` + +Example +```bash +$ PULSAR_DISCOVERY_CONF=/path/to/discovery.conf pulsar discovery +``` + +### `configuration-store` + +Starts up the Pulsar configuration store + +Usage +```bash +$ pulsar configuration-store +``` + +Example +```bash +$ PULSAR_CONFIGURATION_STORE_CONF=/path/to/configuration_store.conf pulsar configuration-store +``` + +### `initialize-cluster-metadata` + +One-time cluster metadata initialization + +Usage +```bash +$ pulsar initialize-cluster-metadata options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-ub` , `--broker-service-url`|The broker service URL for the new cluster|| +|`-tb` , `--broker-service-url-tls`|The broker service URL for the new cluster with TLS encryption|| +|`-c` , `--cluster`|Cluster name|| +|`-cs` , `--configuration-store`|The configuration store quorum connection string|| +|`--existing-bk-metadata-service-uri`|The metadata service URI of the existing BookKeeper cluster that you want to use|| +|`-h` , `--help`|Cluster name|false| +|`--initial-num-stream-storage-containers`|The number of storage containers of BookKeeper stream storage|16| +|`--initial-num-transaction-coordinators`|The number of transaction coordinators assigned in a cluster|16| +|`-uw` , `--web-service-url`|The web service URL for the new cluster|| +|`-tw` , `--web-service-url-tls`|The web service URL for the new cluster with TLS encryption|| +|`-zk` , `--zookeeper`|The local ZooKeeper quorum connection string|| +|`--zookeeper-session-timeout-ms`|The local ZooKeeper session timeout. The time unit is in millisecond(ms)|30000| + + +### `proxy` + +Manages the Pulsar proxy + +Usage +```bash +$ pulsar proxy options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--configuration-store`|Configuration store connection string|| +|`-zk` , `--zookeeper-servers`|Local ZooKeeper connection string|| + +Example +```bash +$ PULSAR_PROXY_CONF=/path/to/proxy.conf pulsar proxy \ + --zookeeper-servers zk-0,zk-1,zk2 \ + --configuration-store zk-0,zk-1,zk-2 +``` + +### `standalone` + +Run a broker service with local bookies and local ZooKeeper + +Usage +```bash +$ pulsar standalone options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-a` , `--advertised-address`|The standalone broker advertised address|| +|`--bookkeeper-dir`|Local bookies’ base data directory|data/standalone/bookeeper| +|`--bookkeeper-port`|Local bookies’ base port|3181| +|`--no-broker`|Only start ZooKeeper and BookKeeper services, not the broker|false| +|`--num-bookies`|The number of local bookies|1| +|`--only-broker`|Only start the Pulsar broker service (not ZooKeeper or BookKeeper)|| +|`--wipe-data`|Clean up previous ZooKeeper/BookKeeper data|| +|`--zookeeper-dir`|Local ZooKeeper’s data directory|data/standalone/zookeeper| +|`--zookeeper-port` |Local ZooKeeper’s port|2181| + +Example +```bash +$ PULSAR_STANDALONE_CONF=/path/to/standalone.conf pulsar standalone +``` + +### `websocket` + +Usage +```bash +$ pulsar websocket +``` + +Example +```bash +$ PULSAR_WEBSOCKET_CONF=/path/to/websocket.conf pulsar websocket +``` + +### `zookeeper` + +Starts up a ZooKeeper cluster + +Usage +```bash +$ pulsar zookeeper +``` + +Example +```bash +$ PULSAR_ZK_CONF=/path/to/zookeeper.conf pulsar zookeeper +``` + + +### `zookeeper-shell` + +Connects to a running ZooKeeper cluster using the ZooKeeper shell + +Usage +```bash +$ pulsar zookeeper-shell options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-c`, `--conf`|Configuration file for ZooKeeper|| + + + +## `pulsar-client` + +The pulsar-client tool + +Usage +```bash +$ pulsar-client command +``` + +Commands +* `produce` +* `consume` + + +Options +|Flag|Description|Default| +|---|---|---| +|`--auth-params`|Authentication parameters, whose format is determined by the implementation of method `configure` in authentication plugin class, for example "key1:val1,key2:val2" or "{\"key1\":\"val1\",\"key2\":\"val2\"}"|{"saslJaasClientSectionName":"PulsarClient", "serverType":"broker"}| +|`--auth-plugin`|Authentication plugin class name|org.apache.pulsar.client.impl.auth.AuthenticationSasl| +|`--listener-name`|Listener name for the broker|| +|`--url`|Broker URL to which to connect|pulsar://localhost:6650/
    ws://localhost:8080 | + + +### `produce` +Send a message or messages to a specific broker and topic + +Usage +```bash +$ pulsar-client produce topic options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-f`, `--files`|Comma-separated file paths to send; either -m or -f must be specified|[]| +|`-m`, `--messages`|Comma-separated string of messages to send; either -m or -f must be specified|[]| +|`-n`, `--num-produce`|The number of times to send the message(s); the count of messages/files * num-produce should be below 1000|1| +|`-r`, `--rate`|Rate (in messages per second) at which to produce; a value 0 means to produce messages as fast as possible|0.0| + + +### `consume` +Consume messages from a specific broker and topic + +Usage +```bash +$ pulsar-client consume topic options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--hex`|Display binary messages in hexadecimal format.|false| +|`-n`, `--num-messages`|Number of messages to consume, 0 means to consume forever.|1| +|`-r`, `--rate`|Rate (in messages per second) at which to consume; a value 0 means to consume messages as fast as possible|0.0| +|`--regex`|Indicate the topic name is a regex pattern|false| +|`-s`, `--subscription-name`|Subscription name|| +|`-t`, `--subscription-type`|The type of the subscription. Possible values: Exclusive, Shared, Failover, Key_Shared.|Exclusive| +|`-p`, `--subscription-position`|The position of the subscription. Possible values: Latest, Earliest.|Latest| + + + +## `pulsar-daemon` +A wrapper around the pulsar tool that’s used to start and stop processes, such as ZooKeeper, bookies, and Pulsar brokers, in the background using nohup. + +pulsar-daemon has a similar interface to the pulsar command but adds start and stop commands for various services. For a listing of those services, run pulsar-daemon to see the help output or see the documentation for the pulsar command. + +Usage +```bash +$ pulsar-daemon command +``` + +Commands +* `start` +* `stop` + + +### `start` +Start a service in the background using nohup. + +Usage +```bash +$ pulsar-daemon start service +``` + +### `stop` +Stop a service that’s already been started using start. + +Usage +```bash +$ pulsar-daemon stop service options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|-force|Stop the service forcefully if not stopped by normal shutdown.|false| + + + +## `pulsar-perf` +A tool for performance testing a Pulsar broker. + +Usage +```bash +$ pulsar-perf command +``` + +Commands +* `consume` +* `produce` +* `read` +* `websocket-producer` +* `managed-ledger` +* `monitor-brokers` +* `simulation-client` +* `simulation-controller` +* `help` + +Environment variables + +The table below lists the environment variables that you can use to configure the pulsar-perf tool. + +|Variable|Description|Default| +|---|---|---| +|`PULSAR_LOG_CONF`|Log4j configuration file|conf/log4j2.yaml| +|`PULSAR_CLIENT_CONF`|Configuration file for the client|conf/client.conf| +|`PULSAR_EXTRA_OPTS`|Extra options to be passed to the JVM|| +|`PULSAR_EXTRA_CLASSPATH`|Extra paths for Pulsar's classpath|| + + +### `consume` +Run a consumer + +Usage +``` +$ pulsar-perf consume options +``` + +Options + +|Flag|Description|Default| +|---|---|---| +|`--auth_params`|Authentication parameters, whose format is determined by the implementation of method `configure` in authentication plugin class, for example "key1:val1,key2:val2" or "{"key1":"val1","key2":"val2"}.|| +|`--auth_plugin`|Authentication plugin class name|| +|`--listener-name`|Listener name for the broker|| +|`--acks-delay-millis`|Acknowlegments grouping delay in millis|100| +|`-k`, `--encryption-key-name`|The private key name to decrypt payload|| +|`-v`, `--encryption-key-value-file`|The file which contains the private key to decrypt payload|| +|`-h`, `--help`|Help message|false| +|`--conf-file`|Configuration file|| +|`-c`, `--max-connections`|Max number of TCP connections to a single broker|100| +|`-n`, `--num-consumers`|Number of consumers (per topic)|1| +|`-t`, `--num-topic`|The number of topics|1| +|`-r`, `--rate`|Simulate a slow message consumer (rate in msg/s)|0| +|`-q`, `--receiver-queue-size`|Size of the receiver queue|1000| +|`-u`, `--service-url`|Pulsar service URL|| +|`-i`, `--stats-interval-seconds`|Statistics interval seconds. If 0, statistics will be disabled|0| +|`-s`, `--subscriber-name`|Subscriber name prefix|sub| +|`-st`, `--subscription-type`|Subscriber type. Possible values are Exclusive, Shared, Failover, Key_Shared.|Exclusive| +|`-sp`, `--subscription-position`|Subscriber position. Possible values are Latest, Earliest.|Latest| +|`--trust-cert-file`|Path for the trusted TLS certificate file|| +|`--tls-allow-insecure`|Allow insecure TLS connection|| + + +### `produce` +Run a producer + +Usage +```bash +$ pulsar-perf produce options +``` + +Options + +|Flag|Description|Default| +|---|---|---| +|`--auth_params`|Authentication parameters, whose format is determined by the implementation of method `configure` in authentication plugin class, for example "key1:val1,key2:val2" or "{"key1":"val1","key2":"val2"}.|| +|`--auth_plugin`|Authentication plugin class name|| +|`--listener-name`|Listener name for the broker|| +|`-b`, `--batch-time-window`|Batch messages in a window of the specified number of milliseconds|1| +|`-z`, `--compression`|Compress messages’ payload. Possible values are NONE, LZ4, ZLIB, ZSTD or SNAPPY.|| +|`--conf-file`|Configuration file|| +|`-k`, `--encryption-key-name`|The public key name to encrypt payload|| +|`-v`, `--encryption-key-value-file`|The file which contains the public key to encrypt payload|| +|`-h`, `--help`|Help message|false| +|`-c`, `--max-connections`|Max number of TCP connections to a single broker|100| +|`-o`, `--max-outstanding`|Max number of outstanding messages|1000| +|`-p`, `--max-outstanding-across-partitions`|Max number of outstanding messages across partitions|50000| +|`-m`, `--num-messages`|Number of messages to publish in total. If set to 0, it will keep publishing.|0| +|`-n`, `--num-producers`|The number of producers (per topic)|1| +|`-t`, `--num-topic`|The number of topics|1| +|`-f`, `--payload-file`|Use payload from an UTF-8 encoded text file and a payload will be randomly selected when publishing messages|| +|`-e`, `--payload-delimiter`|The delimiter used to split lines when using payload from a file|\n| +|`-r`, `--rate`|Publish rate msg/s across topics|100| +|`-u`, `--service-url`|Pulsar service URL|| +|`-s`, `--size`|Message size (in bytes)|1024| +|`-i`, `--stats-interval-seconds`|Statistics interval seconds. If 0, statistics will be disabled.|0| +|`-time`, `--test-duration`|Test duration in secs. If set to 0, it will keep publishing.|0| +|`--trust-cert-file`|Path for the trusted TLS certificate file|| +|`--warmup-time`|Warm-up time in seconds|1| +|`--tls-allow-insecure`|Allow insecure TLS connection|| + + +### `read` +Run a topic reader + +Usage +```bash +$ pulsar-perf read options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--auth_params`|Authentication parameters, whose format is determined by the implementation of method `configure` in authentication plugin class, for example "key1:val1,key2:val2" or "{"key1":"val1","key2":"val2"}.|| +|`--auth_plugin`|Authentication plugin class name|| +|`--listener-name`|Listener name for the broker|| +|`--conf-file`|Configuration file|| +|`-h`, `--help`|Help message|false| +|`-c`, `--max-connections`|Max number of TCP connections to a single broker|100| +|`-t`, `--num-topic`|The number of topics|1| +|`-r`, `--rate`|Simulate a slow message reader (rate in msg/s)|0| +|`-q`, `--receiver-queue-size`|Size of the receiver queue|1000| +|`-u`, `--service-url`|Pulsar service URL|| +|`-m`, `--start-message-id`|Start message id. This can be either 'earliest', 'latest' or a specific message id by using 'lid:eid'|earliest| +|`-i`, `--stats-interval-seconds`|Statistics interval seconds. If 0, statistics will be disabled.|0| +|`--trust-cert-file`|Path for the trusted TLS certificate file|| +|`--use-tls`|Use TLS encryption on the connection|false| +|`--tls-allow-insecure`|Allow insecure TLS connection|| + +### `websocket-producer` +Run a websocket producer + +Usage +```bash +$ pulsar-perf websocket-producer options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--auth_params`|Authentication parameters, whose format is determined by the implementation of method `configure` in authentication plugin class, for example "key1:val1,key2:val2" or "{"key1":"val1","key2":"val2"}.|| +|`--auth_plugin`|Authentication plugin class name|| +|`--conf-file`|Configuration file|| +|`-h`, `--help`|Help message|false| +|`-m`, `--num-messages`|Number of messages to publish in total. If 0, it will keep publishing|0| +|`-t`, `--num-topic`|The number of topics|1| +|`-f`, `--payload-file`|Use payload from a file instead of empty buffer|| +|`-u`, `--proxy-url`|Pulsar Proxy URL, e.g., "ws://localhost:8080/"|| +|`-r`, `--rate`|Publish rate msg/s across topics|100| +|`-s`, `--size`|Message size in byte|1024| +|`-time`, `--test-duration`|Test duration in secs. If 0, it will keep publishing|0| + + +### `managed-ledger` +Write directly on managed-ledgers + +Usage +```bash +$ pulsar-perf managed-ledger options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-a`, `--ack-quorum`|Ledger ack quorum|1| +|`-dt`, `--digest-type`|BookKeeper digest type. Possible Values: [CRC32, MAC, CRC32C, DUMMY]|CRC32C| +|`-e`, `--ensemble-size`|Ledger ensemble size|1| +|`-h`, `--help`|Help message|false| +|`-c`, `--max-connections`|Max number of TCP connections to a single bookie|1| +|`-o`, `--max-outstanding`|Max number of outstanding requests|1000| +|`-m`, `--num-messages`|Number of messages to publish in total. If 0, it will keep publishing|0| +|`-t`, `--num-topic`|Number of managed ledgers|1| +|`-r`, `--rate`|Write rate msg/s across managed ledgers|100| +|`-s`, `--size`|Message size in byte|1024| +|`-time`, `--test-duration`|Test duration in secs. If 0, it will keep publishing|0| +|`--threads`|Number of threads writing|1| +|`-w`, `--write-quorum`|Ledger write quorum|1| +|`-zk`, `--zookeeperServers`|ZooKeeper connection string|| + + +### `monitor-brokers` +Continuously receive broker data and/or load reports + +Usage +```bash +$ pulsar-perf monitor-brokers options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--connect-string`|A connection string for one or more ZooKeeper servers|| +|`-h`, `--help`|Help message|false| + + +### `simulation-client` +Run a simulation server acting as a Pulsar client. Uses the client configuration specified in `conf/client.conf`. + +Usage +```bash +$ pulsar-perf simulation-client options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--port`|Port to listen on for controller|0| +|`--service-url`|Pulsar Service URL|| +|`-h`, `--help`|Help message|false| + +### `simulation-controller` +Run a simulation controller to give commands to servers + +Usage +```bash +$ pulsar-perf simulation-controller options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--client-port`|The port that the clients are listening on|0| +|`--clients`|Comma-separated list of client hostnames|| +|`--cluster`|The cluster to test on|| +|`-h`, `--help`|Help message|false| + + +### `help` +This help message + +Usage +```bash +$ pulsar-perf help +``` + + +## `bookkeeper` +A tool for managing BookKeeper. + +Usage +```bash +$ bookkeeper command +``` + +Commands +* `auto-recovery` +* `bookie` +* `localbookie` +* `upgrade` +* `shell` + + +Environment variables + +The table below lists the environment variables that you can use to configure the bookkeeper tool. + +|Variable|Description|Default| +|---|---|---| +|BOOKIE_LOG_CONF|Log4j configuration file|conf/log4j2.yaml| +|BOOKIE_CONF|BookKeeper configuration file|conf/bk_server.conf| +|BOOKIE_EXTRA_OPTS|Extra options to be passed to the JVM|| +|BOOKIE_EXTRA_CLASSPATH|Extra paths for BookKeeper's classpath|| +|ENTRY_FORMATTER_CLASS|The Java class used to format entries|| +|BOOKIE_PID_DIR|Folder where the BookKeeper server PID file should be stored|| +|BOOKIE_STOP_TIMEOUT|Wait time before forcefully killing the Bookie server instance if attempts to stop it are not successful|| + + +### `auto-recovery` +Runs an auto-recovery service daemon + +Usage +```bash +$ bookkeeper auto-recovery options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-c`, `--conf`|Configuration for the auto-recovery daemon|| + + +### `bookie` +Starts up a BookKeeper server (aka bookie) + +Usage +```bash +$ bookkeeper bookie options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-c`, `--conf`|Configuration for the auto-recovery daemon|| +|-readOnly|Force start a read-only bookie server|false| +|-withAutoRecovery|Start auto-recovery service bookie server|false| + + +### `localbookie` +Runs a test ensemble of N bookies locally + +Usage +```bash +$ bookkeeper localbookie N +``` + +### `upgrade` +Upgrade the bookie’s filesystem + +Usage +```bash +$ bookkeeper upgrade options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-c`, `--conf`|Configuration for the auto-recovery daemon|| +|`-u`, `--upgrade`|Upgrade the bookie’s directories|| + + +### `shell` +Run shell for admin commands. To see a full listing of those commands, run bookkeeper shell without an argument. + +Usage +```bash +$ bookkeeper shell +``` + +Example +```bash +$ bookkeeper shell bookiesanity +``` + +## `broker-tool` + +The `broker- tool` is used for operations on a specific broker. + +Usage +```bash +$ broker-tool command +``` +Commands +* `load-report` +* `help` + +Example +Two ways to get more information about a command as below: + +```bash +$ broker-tool help command +$ broker-tool command --help +``` + +### `load-report` + +Collect the load report of a specific broker. +The command is run on a broker, and used for troubleshooting why broker can’t collect right load report. + +Options +|Flag|Description|Default| +|---|---|---| +|`-i`, `--interval`| Interval to collect load report, in milliseconds || +|`-h`, `--help`| Display help information || + diff --git a/site2/website/versioned_docs/version-2.7.0/reference-configuration.md b/site2/website/versioned_docs/version-2.7.0/reference-configuration.md new file mode 100644 index 00000000000000..25266f0f59bf9e --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/reference-configuration.md @@ -0,0 +1,781 @@ +--- +id: version-2.7.0-reference-configuration +title: Pulsar configuration +sidebar_label: Pulsar configuration +original_id: reference-configuration +--- + + + + +Pulsar configuration can be managed via a series of configuration files contained in the [`conf`](https://github.com/apache/pulsar/tree/master/conf) directory of a Pulsar [installation](getting-started-standalone.md) + +- [BookKeeper](#bookkeeper) +- [Broker](#broker) +- [Client](#client) +- [Service discovery](#service-discovery) +- [Log4j](#log4j) +- [Log4j shell](#log4j-shell) +- [Standalone](#standalone) +- [WebSocket](#websocket) +- [Pulsar proxy](#pulsar-proxy) +- [ZooKeeper](#zookeeper) + +## BookKeeper + +BookKeeper is a replicated log storage system that Pulsar uses for durable storage of all messages. + + +|Name|Description|Default| +|---|---|---| +|bookiePort|The port on which the bookie server listens.|3181| +|allowLoopback|Whether the bookie is allowed to use a loopback interface as its primary interface (i.e. the interface used to establish its identity). By default, loopback interfaces are not allowed as the primary interface. Using a loopback interface as the primary interface usually indicates a configuration error. For example, it’s fairly common in some VPS setups to not configure a hostname or to have the hostname resolve to `127.0.0.1`. If this is the case, then all bookies in the cluster will establish their identities as `127.0.0.1:3181` and only one will be able to join the cluster. For VPSs configured like this, you should explicitly set the listening interface.|false| +|listeningInterface|The network interface on which the bookie listens. If not set, the bookie will listen on all interfaces.|eth0| +|advertisedAddress|Configure a specific hostname or IP address that the bookie should use to advertise itself to clients. If not set, bookie will advertised its own IP address or hostname, depending on the `listeningInterface` and `useHostNameAsBookieID` settings.|N/A| +|allowMultipleDirsUnderSameDiskPartition|Configure the bookie to allow/disallow multiple ledger/index/journal directories in the same filesystem disk partition|false| +|minUsableSizeForIndexFileCreation|The minimum safe usable size available in index directory for bookie to create index files while replaying journal at the time of bookie starts in Readonly Mode (in bytes).|1073741824| +|journalDirectory|The directory where Bookkeeper outputs its write-ahead log (WAL)|data/bookkeeper/journal| +|journalDirectories|Directories that BookKeeper outputs its write ahead log. Multi directories are available, being separated by `,`. For example: `journalDirectories=/tmp/bk-journal1,/tmp/bk-journal2`. If `journalDirectories` is set, bookies will skip `journalDirectory` and use this setting directory.|/tmp/bk-journal| +|ledgerDirectories|The directory where Bookkeeper outputs ledger snapshots. This could define multiple directories to store snapshots separated by comma, for example `ledgerDirectories=/tmp/bk1-data,/tmp/bk2-data`. Ideally, ledger dirs and the journal dir are each in a different device, which reduces the contention between random I/O and sequential write. It is possible to run with a single disk, but performance will be significantly lower.|data/bookkeeper/ledgers| +|ledgerManagerType|The type of ledger manager used to manage how ledgers are stored, managed, and garbage collected. See [BookKeeper Internals](http://bookkeeper.apache.org/docs/latest/getting-started/concepts) for more info.|hierarchical| +|zkLedgersRootPath|The root ZooKeeper path used to store ledger metadata. This parameter is used by the ZooKeeper-based ledger manager as a root znode to store all ledgers.|/ledgers| +|ledgerStorageClass|Ledger storage implementation class|org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage| +|entryLogFilePreallocationEnabled|Enable or disable entry logger preallocation|true| +|logSizeLimit|Max file size of the entry logger, in bytes. A new entry log file will be created when the old one reaches the file size limitation.|2147483648| +|minorCompactionThreshold|Threshold of minor compaction. Entry log files whose remaining size percentage reaches below this threshold will be compacted in a minor compaction. If set to less than zero, the minor compaction is disabled.|0.2| +|minorCompactionInterval|Time interval to run minor compaction, in seconds. If set to less than zero, the minor compaction is disabled. Note: should be greater than gcWaitTime. |3600| +|majorCompactionThreshold|The threshold of major compaction. Entry log files whose remaining size percentage reaches below this threshold will be compacted in a major compaction. Those entry log files whose remaining size percentage is still higher than the threshold will never be compacted. If set to less than zero, the minor compaction is disabled.|0.5| +|majorCompactionInterval|The time interval to run major compaction, in seconds. If set to less than zero, the major compaction is disabled. Note: should be greater than gcWaitTime. |86400| +|readOnlyModeEnabled|If `readOnlyModeEnabled=true`, then on all full ledger disks, bookie will be converted to read-only mode and serve only read requests. Otherwise the bookie will be shutdown.|true| +|forceReadOnlyBookie|Whether the bookie is force started in read only mode.|false| +|persistBookieStatusEnabled|Persist the bookie status locally on the disks. So the bookies can keep their status upon restarts.|false| +|compactionMaxOutstandingRequests|Sets the maximum number of entries that can be compacted without flushing. When compacting, the entries are written to the entrylog and the new offsets are cached in memory. Once the entrylog is flushed the index is updated with the new offsets. This parameter controls the number of entries added to the entrylog before a flush is forced. A higher value for this parameter means more memory will be used for offsets. Each offset consists of 3 longs. This parameter should not be modified unless you’re fully aware of the consequences.|100000| +|compactionRate|The rate at which compaction will read entries, in adds per second.|1000| +|isThrottleByBytes|Throttle compaction by bytes or by entries.|false| +|compactionRateByEntries|The rate at which compaction will read entries, in adds per second.|1000| +|compactionRateByBytes|Set the rate at which compaction will readd entries. The unit is bytes added per second.|1000000| +|journalMaxSizeMB|Max file size of journal file, in megabytes. A new journal file will be created when the old one reaches the file size limitation.|2048| +|journalMaxBackups|The max number of old journal filse to keep. Keeping a number of old journal files would help data recovery in special cases.|5| +|journalPreAllocSizeMB|How space to pre-allocate at a time in the journal.|16| +|journalWriteBufferSizeKB|The of the write buffers used for the journal.|64| +|journalRemoveFromPageCache|Whether pages should be removed from the page cache after force write.|true| +|journalAdaptiveGroupWrites|Whether to group journal force writes, which optimizes group commit for higher throughput.|true| +|journalMaxGroupWaitMSec|The maximum latency to impose on a journal write to achieve grouping.|1| +|journalAlignmentSize|All the journal writes and commits should be aligned to given size|4096| +|journalBufferedWritesThreshold|Maximum writes to buffer to achieve grouping|524288| +|journalFlushWhenQueueEmpty|If we should flush the journal when journal queue is empty|false| +|numJournalCallbackThreads|The number of threads that should handle journal callbacks|8| +|openLedgerRereplicationGracePeriod | The grace period, in milliseconds, that the replication worker waits before fencing and replicating a ledger fragment that's still being written to upon bookie failure. | 30000 | +|rereplicationEntryBatchSize|The number of max entries to keep in fragment for re-replication|100| +|autoRecoveryDaemonEnabled|Whether the bookie itself can start auto-recovery service.|true| +|lostBookieRecoveryDelay|How long to wait, in seconds, before starting auto recovery of a lost bookie.|0| +|gcWaitTime|How long the interval to trigger next garbage collection, in milliseconds. Since garbage collection is running in background, too frequent gc will heart performance. It is better to give a higher number of gc interval if there is enough disk capacity.|900000| +|gcOverreplicatedLedgerWaitTime|How long the interval to trigger next garbage collection of overreplicated ledgers, in milliseconds. This should not be run very frequently since we read the metadata for all the ledgers on the bookie from zk.|86400000| +|flushInterval|How long the interval to flush ledger index pages to disk, in milliseconds. Flushing index files will introduce much random disk I/O. If separating journal dir and ledger dirs each on different devices, flushing would not affect performance. But if putting journal dir and ledger dirs on same device, performance degrade significantly on too frequent flushing. You can consider increment flush interval to get better performance, but you need to pay more time on bookie server restart after failure.|60000| +|bookieDeathWatchInterval|Interval to watch whether bookie is dead or not, in milliseconds|1000| +|allowStorageExpansion|Allow the bookie storage to expand. Newly added ledger and index dirs must be empty.|false| +|zkServers|A list of one of more servers on which zookeeper is running. The server list can be comma separated values, for example: zkServers=zk1:2181,zk2:2181,zk3:2181.|localhost:2181| +|zkTimeout|ZooKeeper client session timeout in milliseconds Bookie server will exit if it received SESSION_EXPIRED because it was partitioned off from ZooKeeper for more than the session timeout JVM garbage collection, disk I/O will cause SESSION_EXPIRED. Increment this value could help avoiding this issue|30000| +|zkRetryBackoffStartMs|The start time that the Zookeeper client backoff retries in milliseconds.|1000| +|zkRetryBackoffMaxMs|The maximum time that the Zookeeper client backoff retries in milliseconds.|10000| +|zkEnableSecurity|Set ACLs on every node written on ZooKeeper, allowing users to read and write BookKeeper metadata stored on ZooKeeper. In order to make ACLs work you need to setup ZooKeeper JAAS authentication. All the bookies and Client need to share the same user, and this is usually done using Kerberos authentication. See ZooKeeper documentation.|false| +|httpServerEnabled|The flag enables/disables starting the admin http server.|false| +|httpServerPort|The http server port to listen on. By default, the value is 8080. Use `8000` as the port to keep it consistent with prometheus stats provider.|8000 +|httpServerClass|The http server class.|org.apache.bookkeeper.http.vertx.VertxHttpServer| +|serverTcpNoDelay|This settings is used to enabled/disabled Nagle’s algorithm, which is a means of improving the efficiency of TCP/IP networks by reducing the number of packets that need to be sent over the network. If you are sending many small messages, such that more than one can fit in a single IP packet, setting server.tcpnodelay to false to enable Nagle algorithm can provide better performance.|true| +|serverSockKeepalive|This setting is used to send keep-alive messages on connection-oriented sockets.|true| +|serverTcpLinger|The socket linger timeout on close. When enabled, a close or shutdown will not return until all queued messages for the socket have been successfully sent or the linger timeout has been reached. Otherwise, the call returns immediately and the closing is done in the background.|0| +|byteBufAllocatorSizeMax|The maximum buf size of the received ByteBuf allocator.|1048576| +|nettyMaxFrameSizeBytes|The maximum netty frame size in bytes. Any message received larger than this will be rejected.|5253120| +|openFileLimit|Max number of ledger index files could be opened in bookie server If number of ledger index files reaches this limitation, bookie server started to swap some ledgers from memory to disk. Too frequent swap will affect performance. You can tune this number to gain performance according your requirements.|0| +|pageSize|Size of a index page in ledger cache, in bytes A larger index page can improve performance writing page to disk, which is efficient when you have small number of ledgers and these ledgers have similar number of entries. If you have large number of ledgers and each ledger has fewer entries, smaller index page would improve memory usage.|8192| +|pageLimit|How many index pages provided in ledger cache If number of index pages reaches this limitation, bookie server starts to swap some ledgers from memory to disk. You can increment this value when you found swap became more frequent. But make sure pageLimit*pageSize should not more than JVM max memory limitation, otherwise you would got OutOfMemoryException. In general, incrementing pageLimit, using smaller index page would gain better performance in lager number of ledgers with fewer entries case If pageLimit is -1, bookie server will use 1/3 of JVM memory to compute the limitation of number of index pages.|0| +|readOnlyModeEnabled|If all ledger directories configured are full, then support only read requests for clients. If “readOnlyModeEnabled=true” then on all ledger disks full, bookie will be converted to read-only mode and serve only read requests. Otherwise the bookie will be shutdown. By default this will be disabled.|true| +|diskUsageThreshold|For each ledger dir, maximum disk space which can be used. Default is 0.95f. i.e. 95% of disk can be used at most after which nothing will be written to that partition. If all ledger dir partitions are full, then bookie will turn to readonly mode if ‘readOnlyModeEnabled=true’ is set, else it will shutdown. Valid values should be in between 0 and 1 (exclusive).|0.95| +|diskCheckInterval|Disk check interval in milli seconds, interval to check the ledger dirs usage.|10000| +|auditorPeriodicCheckInterval|Interval at which the auditor will do a check of all ledgers in the cluster. By default this runs once a week. The interval is set in seconds. To disable the periodic check completely, set this to 0. Note that periodic checking will put extra load on the cluster, so it should not be run more frequently than once a day.|604800| +|sortedLedgerStorageEnabled|Whether sorted-ledger storage is enabled.|ture| +|auditorPeriodicBookieCheckInterval|The interval between auditor bookie checks. The auditor bookie check, checks ledger metadata to see which bookies should contain entries for each ledger. If a bookie which should contain entries is unavailable, thea the ledger containing that entry is marked for recovery. Setting this to 0 disabled the periodic check. Bookie checks will still run when a bookie fails. The interval is specified in seconds.|86400| +|numAddWorkerThreads|The number of threads that should handle write requests. if zero, the writes would be handled by netty threads directly.|0| +|numReadWorkerThreads|The number of threads that should handle read requests. if zero, the reads would be handled by netty threads directly.|8| +|numHighPriorityWorkerThreads|The umber of threads that should be used for high priority requests (i.e. recovery reads and adds, and fencing).|8| +|maxPendingReadRequestsPerThread|If read workers threads are enabled, limit the number of pending requests, to avoid the executor queue to grow indefinitely.|2500| +|maxPendingAddRequestsPerThread|The limited number of pending requests, which is used to avoid the executor queue to grow indefinitely when add workers threads are enabled.|10000| +|isForceGCAllowWhenNoSpace|Whether force compaction is allowed when the disk is full or almost full. Forcing GC could get some space back, but could also fill up the disk space more quickly. This is because new log files are created before GC, while old garbage log files are deleted after GC.|false| +|verifyMetadataOnGC|True if the bookie should double check `readMetadata` prior to GC.|false| +|flushEntrylogBytes|Entry log flush interval in bytes. Flushing in smaller chunks but more frequently reduces spikes in disk I/O. Flushing too frequently may also affect performance negatively.|268435456| +|readBufferSizeBytes|The number of bytes we should use as capacity for BufferedReadChannel.|4096| +|writeBufferSizeBytes|The number of bytes used as capacity for the write buffer|65536| +|useHostNameAsBookieID|Whether the bookie should use its hostname to register with the coordination service (e.g.: zookeeper service). When false, bookie will use its ip address for the registration.|false| +|allowEphemeralPorts|Whether the bookie is allowed to use an ephemeral port (port 0) as its server port. By default, an ephemeral port is not allowed. Using an ephemeral port as the service port usually indicates a configuration error. However, in unit tests, using an ephemeral port will address port conflict problems and allow running tests in parallel.|false| +|enableLocalTransport|Whether the bookie is allowed to listen for the BookKeeper clients executed on the local JVM.|false| +|disableServerSocketBind|Whether the bookie is allowed to disable bind on network interfaces. This bookie will be available only to BookKeeper clients executed on the local JVM.|false| +|skipListArenaChunkSize|The number of bytes that we should use as chunk allocation for `org.apache.bookkeeper.bookie.SkipListArena`.|4194304| +|skipListArenaMaxAllocSize|The maximum size that we should allocate from the skiplist arena. Allocations larger than this should be allocated directly by the VM to avoid fragmentation.|131072| +|bookieAuthProviderFactoryClass|The factory class name of the bookie authentication provider. If this is null, then there is no authentication.|null| +|statsProviderClass||org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider| +|prometheusStatsHttpPort||8000| +|dbStorage_writeCacheMaxSizeMb|Size of Write Cache. Memory is allocated from JVM direct memory. Write cache is used to buffer entries before flushing into the entry log. For good performance, it should be big enough to hold a substantial amount of entries in the flush interval. By default, it is allocated to 25% of the available direct memory.|N/A +|dbStorage_readAheadCacheMaxSizeMb|Size of Read cache. Memory is allocated from JVM direct memory. This read cache is pre-filled doing read-ahead whenever a cache miss happens. By default, it is allocated to 25% of the available direct memory.|N/A| +|dbStorage_readAheadCacheBatchSize|How many entries to pre-fill in cache after a read cache miss|1000| +|dbStorage_rocksDB_blockCacheSize|Size of RocksDB block-cache. For best performance, this cache should be big enough to hold a significant portion of the index database which can reach ~2GB in some cases. By default, it uses 10% of direct memory.|N/A| +|dbStorage_rocksDB_writeBufferSizeMB||64| +|dbStorage_rocksDB_sstSizeInMB||64| +|dbStorage_rocksDB_blockSize||65536| +|dbStorage_rocksDB_bloomFilterBitsPerKey||10| +|dbStorage_rocksDB_numLevels||-1| +|dbStorage_rocksDB_numFilesInLevel0||4| +|dbStorage_rocksDB_maxSizeInLevel1MB||256| + + +## Broker + +Pulsar brokers are responsible for handling incoming messages from producers, dispatching messages to consumers, replicating data between clusters, and more. + +|Name|Description|Default| +|---|---|---| +|advertisedListeners|Specify multiple advertised listeners for the broker.

    The format is `:pulsar://:`.

    If there are multiple listeners, separate them with commas.

    **Note**: do not use this configuration with `advertisedAddress` and `brokerServicePort`. If the value of this configuration is empty, the broker uses `advertisedAddress` and `brokerServicePort`|/| +internalListenerName|Specify the internal listener name for the broker.

    **Note**: the listener name must be contained in `advertisedListeners`.

    If the value of this configuration is empty, the broker uses the first listener as the internal listener.|/| +|authenticateOriginalAuthData| If this flag is set to `true`, the broker authenticates the original Auth data; else it just accepts the originalPrincipal and authorizes it (if required). |false| +|enablePersistentTopics| Whether persistent topics are enabled on the broker |true| +|enableNonPersistentTopics| Whether non-persistent topics are enabled on the broker |true| +|functionsWorkerEnabled| Whether the Pulsar Functions worker service is enabled in the broker |false| +|exposePublisherStats|Whether to enable topic level metrics.|true| +|statsUpdateFrequencyInSecs||60| +|statsUpdateInitialDelayInSecs||60| +|zookeeperServers| Zookeeper quorum connection string || +|zooKeeperCacheExpirySeconds|ZooKeeper cache expiry time in seconds|300 +|configurationStoreServers| Configuration store connection string (as a comma-separated list) || +|brokerServicePort| Broker data port |6650| +|brokerServicePortTls| Broker data port for TLS |6651| +|webServicePort| Port to use to server HTTP request |8080| +|webServicePortTls| Port to use to server HTTPS request |8443| +|webSocketServiceEnabled| Enable the WebSocket API service in broker |false| +|webSocketNumIoThreads|The number of IO threads in Pulsar Client used in WebSocket proxy.|8| +|webSocketConnectionsPerBroker|The number of connections per Broker in Pulsar Client used in WebSocket proxy.|8| +|webSocketSessionIdleTimeoutMillis|Time in milliseconds that idle WebSocket session times out.|300000| +|webSocketMaxTextFrameSize|The maximum size of a text message during parsing in WebSocket proxy.|1048576| +|exposeTopicLevelMetricsInPrometheus|Whether to enable topic level metrics.|true| +|exposeConsumerLevelMetricsInPrometheus|Whether to enable consumer level metrics.|false| +|jvmGCMetricsLoggerClassName|Classname of Pluggable JVM GC metrics logger that can log GC specific metrics.|N/A| +|bindAddress| Hostname or IP address the service binds on, default is 0.0.0.0. |0.0.0.0| +|advertisedAddress| Hostname or IP address the service advertises to the outside world. If not set, the value of `InetAddress.getLocalHost().getHostName()` is used. || +|clusterName| Name of the cluster to which this broker belongs to || +|brokerDeduplicationEnabled| Sets the default behavior for message deduplication in the broker. If enabled, the broker will reject messages that were already stored in the topic. This setting can be overridden on a per-namespace basis. |false| +|brokerDeduplicationMaxNumberOfProducers| The maximum number of producers for which information will be stored for deduplication purposes. |10000| +|brokerDeduplicationEntriesInterval| The number of entries after which a deduplication informational snapshot is taken. A larger interval will lead to fewer snapshots being taken, though this would also lengthen the topic recovery time (the time required for entries published after the snapshot to be replayed). |1000| +|brokerDeduplicationProducerInactivityTimeoutMinutes| The time of inactivity (in minutes) after which the broker will discard deduplication information related to a disconnected producer. |360| +|dispatchThrottlingRatePerReplicatorInMsg| The default messages per second dispatch throttling-limit for every replicator in replication. The value of `0` means disabling replication message dispatch-throttling| 0 | +|dispatchThrottlingRatePerReplicatorInByte| The default bytes per second dispatch throttling-limit for every replicator in replication. The value of `0` means disabling replication message-byte dispatch-throttling| 0 | +|zooKeeperSessionTimeoutMillis| Zookeeper session timeout in milliseconds |30000| +|brokerShutdownTimeoutMs| Time to wait for broker graceful shutdown. After this time elapses, the process will be killed |60000| +|skipBrokerShutdownOnOOM| Flag to skip broker shutdown when broker handles Out of memory error. |false| +|backlogQuotaCheckEnabled| Enable backlog quota check. Enforces action on topic when the quota is reached |true| +|backlogQuotaCheckIntervalInSeconds| How often to check for topics that have reached the quota |60| +|backlogQuotaDefaultLimitGB| The default per-topic backlog quota limit. Being less than 0 means no limitation. By default, it is -1. | -1 | +|backlogQuotaDefaultRetentionPolicy|The defaulted backlog quota retention policy. By Default, it is `producer_request_hold`.
  • 'producer_request_hold' Policy which holds producer's send request until the resource becomes available (or holding times out)
  • 'producer_exception' Policy which throws `javax.jms.ResourceAllocationException` to the producer
  • 'consumer_backlog_eviction' Policy which evicts the oldest message from the slowest consumer's backlog
  • |producer_request_hold| +|allowAutoTopicCreation| Enable topic auto creation if a new producer or consumer connected |true| +|allowAutoTopicCreationType| The type of topic that is allowed to be automatically created.(partitioned/non-partitioned) |non-partitioned| +|allowAutoSubscriptionCreation| Enable subscription auto creation if a new consumer connected |true| +|defaultNumPartitions| The number of partitioned topics that is allowed to be automatically created if `allowAutoTopicCreationType` is partitioned |1| +|brokerDeleteInactiveTopicsEnabled| Enable the deletion of inactive topics |true| +|brokerDeleteInactiveTopicsFrequencySeconds| How often to check for inactive topics |60| +| brokerDeleteInactiveTopicsMode | Set the mode to delete inactive topics.
  • `delete_when_no_subscriptions`: delete the topic which has no subscriptions or active producers.
  • `delete_when_subscriptions_caught_up`: delete the topic whose subscriptions have no backlogs and which has no active producers or consumers. | `delete_when_no_subscriptions` | +| brokerDeleteInactiveTopicsMaxInactiveDurationSeconds | Set the maximum duration for inactive topics. If it is not specified, the `brokerDeleteInactiveTopicsFrequencySeconds` parameter is adopted. | N/A | +|messageExpiryCheckIntervalInMinutes| How frequently to proactively check and purge expired messages |5| +|brokerServiceCompactionMonitorIntervalInSeconds| Interval between checks to see if topics with compaction policies need to be compacted |60| +|delayedDeliveryEnabled|Whether to enable the delayed delivery for messages. If disabled, messages will be immediately delivered and there will be no tracking overhead.|true| +|delayedDeliveryTickTimeMillis|Control the tick time for retrying on delayed delivery, which affecte the accuracy of the delivery time compared to the scheduled time. By default, it is 1 second.|1000| +|activeConsumerFailoverDelayTimeMillis| How long to delay rewinding cursor and dispatching messages when active consumer is changed. |1000| +|clientLibraryVersionCheckEnabled| Enable check for minimum allowed client library version |false| +|clientLibraryVersionCheckAllowUnversioned| Allow client libraries with no version information |true| +|statusFilePath| Path for the file used to determine the rotation status for the broker when responding to service discovery health checks || +|preferLaterVersions| If true, (and ModularLoadManagerImpl is being used), the load manager will attempt to use only brokers running the latest software version (to minimize impact to bundles) |false| +|maxNumPartitionsPerPartitionedTopic|Max number of partitions per partitioned topic. Use 0 or negative number to disable the check|0| +|tlsEnabled|Deprecated - Use `webServicePortTls` and `brokerServicePortTls` instead. |false| +|tlsCertificateFilePath| Path for the TLS certificate file || +|tlsKeyFilePath| Path for the TLS private key file || +|tlsTrustCertsFilePath| Path for the trusted TLS certificate file. This cert is used to verify that any certs presented by connecting clients are signed by a certificate authority. If this verification fails, then the certs are untrusted and the connections are dropped. || +|tlsAllowInsecureConnection| Accept untrusted TLS certificate from client. If it is set to `true`, a client with a cert which cannot be verified with the +'tlsTrustCertsFilePath' cert will be allowed to connect to the server, though the cert will not be used for client authentication. |false| +|tlsProtocols|Specify the tls protocols the broker will use to negotiate during TLS Handshake. Multiple values can be specified, separated by commas. Example:- ```TLSv1.2```, ```TLSv1.1```, ```TLSv1``` || +|tlsCiphers|Specify the tls cipher the broker will use to negotiate during TLS Handshake. Multiple values can be specified, separated by commas. Example:- ```TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256```|| +|tlsEnabledWithKeyStore| Enable TLS with KeyStore type configuration in broker |false| +|tlsProvider| TLS Provider for KeyStore type || +|tlsKeyStoreType| LS KeyStore type configuration in broker: JKS, PKCS12 |JKS| +|tlsKeyStore| TLS KeyStore path in broker || +|tlsKeyStorePassword| TLS KeyStore password for broker || +|brokerClientTlsEnabledWithKeyStore| Whether internal client use KeyStore type to authenticate with Pulsar brokers |false| +|brokerClientSslProvider| The TLS Provider used by internal client to authenticate with other Pulsar brokers || +|brokerClientTlsTrustStoreType| TLS TrustStore type configuration for internal client: JKS, PKCS12, used by the internal client to authenticate with Pulsar brokers |JKS| +|brokerClientTlsTrustStore| TLS TrustStore path for internal client, used by the internal client to authenticate with Pulsar brokers || +|brokerClientTlsTrustStorePassword| TLS TrustStore password for internal client, used by the internal client to authenticate with Pulsar brokers || +|brokerClientTlsCiphers| Specify the tls cipher the internal client will use to negotiate during TLS Handshake. (a comma-separated list of ciphers) e.g. [TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256]|| +|brokerClientTlsProtocols|Specify the tls protocols the broker will use to negotiate during TLS handshake. (a comma-separated list of protocol names). e.g. [TLSv1.2, TLSv1.1, TLSv1] || +|ttlDurationDefaultInSeconds| The default ttl for namespaces if ttl is not configured at namespace policies. |0| +|tokenSecretKey| Configure the secret key to be used to validate auth tokens. The key can be specified like: `tokenSecretKey=data:;base64,xxxxxxxxx` or `tokenSecretKey=file:///my/secret.key`|| +|tokenPublicKey| Configure the public key to be used to validate auth tokens. The key can be specified like: `tokenPublicKey=data:;base64,xxxxxxxxx` or `tokenPublicKey=file:///my/secret.key`|| +|tokenPublicAlg| Configure the algorithm to be used to validate auth tokens. This can be any of the asymettric algorithms supported by Java JWT (https://github.com/jwtk/jjwt#signature-algorithms-keys) |RS256| +|tokenAuthClaim| Specify which of the token's claims will be used as the authentication "principal" or "role". The default "sub" claim will be used if this is left blank || +|tokenAudienceClaim| The token audience "claim" name, e.g. "aud", that will be used to get the audience from token. If not set, audience will not be verified. || +|tokenAudience| The token audience stands for this broker. The field `tokenAudienceClaim` of a valid token, need contains this. || +|maxUnackedMessagesPerConsumer| Max number of unacknowledged messages allowed to receive messages by a consumer on a shared subscription. Broker will stop sending messages to consumer once, this limit reaches until consumer starts acknowledging messages back. Using a value of 0, is disabling unackeMessage limit check and consumer can receive messages without any restriction |50000| +|maxUnackedMessagesPerSubscription| Max number of unacknowledged messages allowed per shared subscription. Broker will stop dispatching messages to all consumers of the subscription once this limit reaches until consumer starts acknowledging messages back and unack count reaches to limit/2. Using a value of 0, is disabling unackedMessage-limit check and dispatcher can dispatch messages without any restriction |200000| +|subscriptionRedeliveryTrackerEnabled| Enable subscription message redelivery tracker |true| +subscriptionExpirationTimeMinutes | How long to delete inactive subscriptions from last consuming.

    Setting this configuration to a value **greater than 0** deletes inactive subscriptions automatically.
    Setting this configuration to **0** does not delete inactive subscriptions automatically.

    Since this configuration takes effect on all topics, if there is even one topic whose subscriptions should not be deleted automatically, you need to set it to 0.
    Instead, you can set a subscription expiration time for each **namespace** using the [`pulsar-admin namespaces set-subscription-expiration-time options` command](http://pulsar.apache.org/tools/pulsar-admin/2.6.0-SNAPSHOT/#-em-set-subscription-expiration-time-em-). | 0 | +|maxConcurrentLookupRequest| Max number of concurrent lookup request broker allows to throttle heavy incoming lookup traffic |50000| +|maxConcurrentTopicLoadRequest| Max number of concurrent topic loading request broker allows to control number of zk-operations |5000| +|authenticationEnabled| Enable authentication |false| +|authenticationProviders| Autentication provider name list, which is comma separated list of class names || +| authenticationRefreshCheckSeconds | Interval of time for checking for expired authentication credentials | 60 | +|authorizationEnabled| Enforce authorization |false| +|superUserRoles| Role names that are treated as “super-user”, meaning they will be able to do all admin operations and publish/consume from all topics || +|brokerClientAuthenticationPlugin| Authentication settings of the broker itself. Used when the broker connects to other brokers, either in same or other clusters || +|brokerClientAuthenticationParameters||| +|athenzDomainNames| Supported Athenz provider domain names(comma separated) for authentication || +|exposePreciseBacklogInPrometheus| Enable expose the precise backlog stats, set false to use published counter and consumed counter to calculate, this would be more efficient but may be inaccurate. |false| +|schemaRegistryStorageClassName|The schema storage implementation used by this broker.|org.apache.pulsar.broker.service.schema.BookkeeperSchemaStorageFactory| +|isSchemaValidationEnforced|Enforce schema validation on following cases: if a producer without a schema attempts to produce to a topic with schema, the producer will be failed to connect. PLEASE be carefully on using this, since non-java clients don't support schema. If this setting is enabled, then non-java clients fail to produce.|false| +|offloadersDirectory|The directory for all the offloader implementations.|./offloaders| +|bookkeeperMetadataServiceUri| Metadata service uri that bookkeeper is used for loading corresponding metadata driver and resolving its metadata service location. This value can be fetched using `bookkeeper shell whatisinstanceid` command in BookKeeper cluster. For example: zk+hierarchical://localhost:2181/ledgers. The metadata service uri list can also be semicolon separated values like below: zk+hierarchical://zk1:2181;zk2:2181;zk3:2181/ledgers || +|bookkeeperClientAuthenticationPlugin| Authentication plugin to use when connecting to bookies || +|bookkeeperClientAuthenticationParametersName| BookKeeper auth plugin implementatation specifics parameters name and values || +|bookkeeperClientAuthenticationParameters||| +|bookkeeperClientTimeoutInSeconds| Timeout for BK add / read operations |30| +|bookkeeperClientSpeculativeReadTimeoutInMillis| Speculative reads are initiated if a read request doesn’t complete within a certain time Using a value of 0, is disabling the speculative reads |0| +|bookkeeperNumberOfChannelsPerBookie| Number of channels per bookie |16| +|bookkeeperClientHealthCheckEnabled| Enable bookies health check. Bookies that have more than the configured number of failure within the interval will be quarantined for some time. During this period, new ledgers won’t be created on these bookies |true| +|bookkeeperClientHealthCheckIntervalSeconds||60| +|bookkeeperClientHealthCheckErrorThresholdPerInterval||5| +|bookkeeperClientHealthCheckQuarantineTimeInSeconds ||1800| +|bookkeeperClientRackawarePolicyEnabled| Enable rack-aware bookie selection policy. BK will chose bookies from different racks when forming a new bookie ensemble |true| +|bookkeeperClientRegionawarePolicyEnabled| Enable region-aware bookie selection policy. BK will chose bookies from different regions and racks when forming a new bookie ensemble. If enabled, the value of bookkeeperClientRackawarePolicyEnabled is ignored |false| +|bookkeeperClientMinNumRacksPerWriteQuorum| Minimum number of racks per write quorum. BK rack-aware bookie selection policy will try to get bookies from at least 'bookkeeperClientMinNumRacksPerWriteQuorum' racks for a write quorum. |2| +|bookkeeperClientEnforceMinNumRacksPerWriteQuorum| Enforces rack-aware bookie selection policy to pick bookies from 'bookkeeperClientMinNumRacksPerWriteQuorum' racks for a writeQuorum. If BK can't find bookie then it would throw BKNotEnoughBookiesException instead of picking random one. |false| +|bookkeeperClientReorderReadSequenceEnabled| Enable/disable reordering read sequence on reading entries. |false| +|bookkeeperClientIsolationGroups| Enable bookie isolation by specifying a list of bookie groups to choose from. Any bookie outside the specified groups will not be used by the broker || +|bookkeeperClientSecondaryIsolationGroups| Enable bookie secondary-isolation group if bookkeeperClientIsolationGroups doesn't have enough bookie available. || +|bookkeeperClientMinAvailableBookiesInIsolationGroups| Minimum bookies that should be available as part of bookkeeperClientIsolationGroups else broker will include bookkeeperClientSecondaryIsolationGroups bookies in isolated list. || +|bookkeeperClientGetBookieInfoIntervalSeconds| Set the interval to periodically check bookie info |86400| +|bookkeeperClientGetBookieInfoRetryIntervalSeconds| Set the interval to retry a failed bookie info lookup |60| +|bookkeeperEnableStickyReads | Enable/disable having read operations for a ledger to be sticky to a single bookie. If this flag is enabled, the client will use one single bookie (by preference) to read all entries for a ledger. | true | +|managedLedgerDefaultEnsembleSize| Number of bookies to use when creating a ledger |2| +|managedLedgerDefaultWriteQuorum| Number of copies to store for each message |2| +|managedLedgerDefaultAckQuorum| Number of guaranteed copies (acks to wait before write is complete) |2| +|managedLedgerCacheSizeMB| Amount of memory to use for caching data payload in managed ledger. This memory is allocated from JVM direct memory and it’s shared across all the topics running in the same broker. By default, uses 1/5th of available direct memory || +|managedLedgerCacheCopyEntries| Whether we should make a copy of the entry payloads when inserting in cache| false| +|managedLedgerCacheEvictionWatermark| Threshold to which bring down the cache level when eviction is triggered |0.9| +|managedLedgerCacheEvictionFrequency| Configure the cache eviction frequency for the managed ledger cache (evictions/sec) | 100.0 | +|managedLedgerCacheEvictionTimeThresholdMillis| All entries that have stayed in cache for more than the configured time, will be evicted | 1000 | +|managedLedgerCursorBackloggedThreshold| Configure the threshold (in number of entries) from where a cursor should be considered 'backlogged' and thus should be set as inactive. | 1000| +|managedLedgerDefaultMarkDeleteRateLimit| Rate limit the amount of writes per second generated by consumer acking the messages |1.0| +|managedLedgerMaxEntriesPerLedger| Max number of entries to append to a ledger before triggering a rollover. A ledger rollover is triggered on these conditions:
    • Either the max rollover time has been reached
    • or max entries have been written to the ledged and at least min-time has passed
    |50000| +|managedLedgerMinLedgerRolloverTimeMinutes| Minimum time between ledger rollover for a topic |10| +|managedLedgerMaxLedgerRolloverTimeMinutes| Maximum time before forcing a ledger rollover for a topic |240| +|managedLedgerCursorMaxEntriesPerLedger| Max number of entries to append to a cursor ledger |50000| +|managedLedgerCursorRolloverTimeInSeconds| Max time before triggering a rollover on a cursor ledger |14400| +|managedLedgerMaxUnackedRangesToPersist| Max number of “acknowledgment holes” that are going to be persistently stored. When acknowledging out of order, a consumer will leave holes that are supposed to be quickly filled by acking all the messages. The information of which messages are acknowledged is persisted by compressing in “ranges” of messages that were acknowledged. After the max number of ranges is reached, the information will only be tracked in memory and messages will be redelivered in case of crashes. |1000| +|autoSkipNonRecoverableData| Skip reading non-recoverable/unreadable data-ledger under managed-ledger’s list.It helps when data-ledgers gets corrupted at bookkeeper and managed-cursor is stuck at that ledger. |false| +|loadBalancerEnabled| Enable load balancer |true| +|loadBalancerPlacementStrategy| Strategy to assign a new bundle weightedRandomSelection || +|loadBalancerReportUpdateThresholdPercentage| Percentage of change to trigger load report update |10| +|loadBalancerReportUpdateMaxIntervalMinutes| maximum interval to update load report |15| +|loadBalancerHostUsageCheckIntervalMinutes| Frequency of report to collect |1| +|loadBalancerSheddingIntervalMinutes| Load shedding interval. Broker periodically checks whether some traffic should be offload from some over-loaded broker to other under-loaded brokers |30| +|loadBalancerSheddingGracePeriodMinutes| Prevent the same topics to be shed and moved to other broker more that once within this timeframe |30| +|loadBalancerBrokerMaxTopics| Usage threshold to allocate max number of topics to broker |50000| +|loadBalancerBrokerUnderloadedThresholdPercentage| Usage threshold to determine a broker as under-loaded |1| +|loadBalancerBrokerOverloadedThresholdPercentage| Usage threshold to determine a broker as over-loaded |85| +|loadBalancerResourceQuotaUpdateIntervalMinutes| Interval to update namespace bundle resource quotat |15| +|loadBalancerBrokerComfortLoadLevelPercentage| Usage threshold to determine a broker is having just right level of load |65| +|loadBalancerAutoBundleSplitEnabled| enable/disable namespace bundle auto split |false| +|loadBalancerNamespaceBundleMaxTopics| maximum topics in a bundle, otherwise bundle split will be triggered |1000| +|loadBalancerNamespaceBundleMaxSessions| maximum sessions (producers + consumers) in a bundle, otherwise bundle split will be triggered |1000| +|loadBalancerNamespaceBundleMaxMsgRate| maximum msgRate (in + out) in a bundle, otherwise bundle split will be triggered |1000| +|loadBalancerNamespaceBundleMaxBandwidthMbytes| maximum bandwidth (in + out) in a bundle, otherwise bundle split will be triggered |100| +|loadBalancerNamespaceMaximumBundles| maximum number of bundles in a namespace |128| +|replicationMetricsEnabled| Enable replication metrics |true| +|replicationConnectionsPerBroker| Max number of connections to open for each broker in a remote cluster More connections host-to-host lead to better throughput over high-latency links. |16| +|replicationProducerQueueSize| Replicator producer queue size |1000| +|replicatorPrefix| Replicator prefix used for replicator producer name and cursor name pulsar.repl|| +|replicationTlsEnabled| Enable TLS when talking with other clusters to replicate messages |false| +|brokerServicePurgeInactiveFrequencyInSeconds|Deprecated. Use `brokerDeleteInactiveTopicsFrequencySeconds`.|60| +|transactionCoordinatorEnabled|Whether to enable transaction coordinator in broker.|true| +|transactionMetadataStoreProviderClassName| |org.apache.pulsar.transaction.coordinator.impl.InMemTransactionMetadataStoreProvider| +|defaultRetentionTimeInMinutes| Default message retention time |0| +|defaultRetentionSizeInMB| Default retention size |0| +|keepAliveIntervalSeconds| How often to check whether the connections are still alive |30| +|bootstrapNamespaces| The bootstrap name. | N/A | +|loadManagerClassName| Name of load manager to use |org.apache.pulsar.broker.loadbalance.impl.SimpleLoadManagerImpl| +|supportedNamespaceBundleSplitAlgorithms| Supported algorithms name for namespace bundle split |[range_equally_divide,topic_count_equally_divide]| +|defaultNamespaceBundleSplitAlgorithm| Default algorithm name for namespace bundle split |range_equally_divide| +|managedLedgerOffloadDriver| The directory for all the offloader implementations +`offloadersDirectory=./offloaders`. Driver to use to offload old data to long term storage (Possible values: S3, aws-s3, google-cloud-storage). When using google-cloud-storage, Make sure both Google Cloud Storage and Google Cloud Storage JSON API are enabled for the project (check from Developers Console -> Api&auth -> APIs). || +|managedLedgerOffloadMaxThreads| Maximum number of thread pool threads for ledger offloading |2| +|managedLedgerOffloadPrefetchRounds|The maximum prefetch rounds for ledger reading for offloading.|1| +|managedLedgerUnackedRangesOpenCacheSetEnabled| Use Open Range-Set to cache unacknowledged messages |true| +|managedLedgerOffloadDeletionLagMs|Delay between a ledger being successfully offloaded to long term storage and the ledger being deleted from bookkeeper | 14400000| +|managedLedgerOffloadAutoTriggerSizeThresholdBytes|The number of bytes before triggering automatic offload to long term storage |-1 (disabled)| +|s3ManagedLedgerOffloadRegion| For Amazon S3 ledger offload, AWS region || +|s3ManagedLedgerOffloadBucket| For Amazon S3 ledger offload, Bucket to place offloaded ledger into || +|s3ManagedLedgerOffloadServiceEndpoint| For Amazon S3 ledger offload, Alternative endpoint to connect to (useful for testing) || +|s3ManagedLedgerOffloadMaxBlockSizeInBytes| For Amazon S3 ledger offload, Max block size in bytes. (64MB by default, 5MB minimum) |67108864| +|s3ManagedLedgerOffloadReadBufferSizeInBytes| For Amazon S3 ledger offload, Read buffer size in bytes (1MB by default) |1048576| +|gcsManagedLedgerOffloadRegion|For Google Cloud Storage ledger offload, region where offload bucket is located. Go to this page for more details: https://cloud.google.com/storage/docs/bucket-locations .|N/A| +|gcsManagedLedgerOffloadBucket|For Google Cloud Storage ledger offload, Bucket to place offloaded ledger into.|N/A| +|gcsManagedLedgerOffloadMaxBlockSizeInBytes|For Google Cloud Storage ledger offload, the maximum block size in bytes. (64MB by default, 5MB minimum)|67108864| +|gcsManagedLedgerOffloadReadBufferSizeInBytes|For Google Cloud Storage ledger offload, Read buffer size in bytes. (1MB by default)|1048576| +|gcsManagedLedgerOffloadServiceAccountKeyFile|For Google Cloud Storage, path to json file containing service account credentials. For more details, see the "Service Accounts" section of https://support.google.com/googleapi/answer/6158849 .|N/A| +|fileSystemProfilePath|For File System Storage, file system profile path.|../conf/filesystem_offload_core_site.xml| +|fileSystemURI|For File System Storage, file system uri.|N/A| +|s3ManagedLedgerOffloadRole| For Amazon S3 ledger offload, provide a role to assume before writing to s3 || +|s3ManagedLedgerOffloadRoleSessionName| For Amazon S3 ledger offload, provide a role session name when using a role |pulsar-s3-offload| +| acknowledgmentAtBatchIndexLevelEnabled | Enable or disable the batch index acknowledgement. | false | +|enableReplicatedSubscriptions|Whether to enable tracking of replicated subscriptions state across clusters.|true| +|replicatedSubscriptionsSnapshotFrequencyMillis|The frequency of snapshots for replicated subscriptions tracking.|1000| +|replicatedSubscriptionsSnapshotTimeoutSeconds|The timeout for building a consistent snapshot for tracking replicated subscriptions state.|30| +|replicatedSubscriptionsSnapshotMaxCachedPerSubscription|The maximum number of snapshot to be cached per subscription.|10| +|maxMessagePublishBufferSizeInMB|The maximum memory size for broker handling messages sent from producers. If the processing message size exceeds this value, broker stops reading data from the connection. The processing messages means messages are sent to broker but broker have not sent response to the client. Usually the message are waiting to be written to bookies. It's shared across all the topics running in the same broker. The value `-1` disables the memory limitation. By default, it is 50% of direct memory.|N/A| +|messagePublishBufferCheckIntervalInMillis|Interval between checks to see if message publish buffer size exceeds the maximum. Use `0` or negative number to disable the max publish buffer limiting.|100| +|retentionCheckIntervalInSeconds|Check between intervals to see if consumed ledgers need to be trimmed. Use 0 or negative number to disable the check.|120| +| maxMessageSize | Set the maximum size of a message. | 5242880 | +| preciseTopicPublishRateLimiterEnable | Enable precise topic publish rate limiting. | false | +| lazyCursorRecovery | Whether to recover cursors lazily when trying to recover a managed ledger backing a persistent topic. It can improve write availability of topics. The caveat is now when recovered ledger is ready to write we're not sure if all old consumers' last mark delete position(ack position) can be recovered or not. So user can make the trade off or have custom logic in application to checkpoint consumer state.| false | + + + +## Client + +The [`pulsar-client`](reference-cli-tools.md#pulsar-client) CLI tool can be used to publish messages to Pulsar and consume messages from Pulsar topics. This tool can be used in lieu of a client library. + +|Name|Description|Default| +|---|---|---| +|webServiceUrl| The web URL for the cluster. |http://localhost:8080/| +|brokerServiceUrl| The Pulsar protocol URL for the cluster. |pulsar://localhost:6650/| +|authPlugin| The authentication plugin. || +|authParams| The authentication parameters for the cluster, as a comma-separated string. || +|useTls| Whether or not TLS authentication will be enforced in the cluster. |false| +| tlsAllowInsecureConnection | Allow TLS connections to servers whose certificate cannot be verified to have been signed by a trusted certificate authority. | false | +| tlsEnableHostnameVerification | Whether the server hostname must match the common name of the certificate that is used by the server. | false | +|tlsTrustCertsFilePath||| +| useKeyStoreTls | Enable TLS with KeyStore type configuration in the broker. | false | +| tlsTrustStoreType | TLS TrustStore type configuration.
  • JKS
  • PKCS12 |JKS| +| tlsTrustStore | TLS TrustStore path. | | +| tlsTrustStorePassword | TLS TrustStore password. | | + + +## Service discovery + +|Name|Description|Default| +|---|---|---| +|zookeeperServers| Zookeeper quorum connection string (comma-separated) || +|zooKeeperCacheExpirySeconds|ZooKeeper cache expiry time in seconds|300 +|configurationStoreServers| Configuration store connection string (as a comma-separated list) || +|zookeeperSessionTimeoutMs| ZooKeeper session timeout |30000| +|servicePort| Port to use to server binary-proto request |6650| +|servicePortTls| Port to use to server binary-proto-tls request |6651| +|webServicePort| Port that discovery service listen on |8080| +|webServicePortTls| Port to use to server HTTPS request |8443| +|bindOnLocalhost| Control whether to bind directly on localhost rather than on normal hostname |false| +|authenticationEnabled| Enable authentication |false| +|authenticationProviders| Authentication provider name list, which is comma separated list of class names (comma-separated) || +|authorizationEnabled| Enforce authorization |false| +|superUserRoles| Role names that are treated as “super-user”, meaning they will be able to do all admin operations and publish/consume from all topics (comma-separated) || +|tlsEnabled| Enable TLS |false| +|tlsCertificateFilePath| Path for the TLS certificate file || +|tlsKeyFilePath| Path for the TLS private key file || + + + +## Log4j + +|Name|Default| +|---|---| +|pulsar.root.logger| WARN,CONSOLE| +|pulsar.log.dir| logs| +|pulsar.log.file| pulsar.log| +|log4j.rootLogger| ${pulsar.root.logger}| +|log4j.appender.CONSOLE| org.apache.log4j.ConsoleAppender| +|log4j.appender.CONSOLE.Threshold| DEBUG| +|log4j.appender.CONSOLE.layout| org.apache.log4j.PatternLayout| +|log4j.appender.CONSOLE.layout.ConversionPattern| %d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n| +|log4j.appender.ROLLINGFILE| org.apache.log4j.DailyRollingFileAppender| +|log4j.appender.ROLLINGFILE.Threshold| DEBUG| +|log4j.appender.ROLLINGFILE.File| ${pulsar.log.dir}/${pulsar.log.file}| +|log4j.appender.ROLLINGFILE.layout| org.apache.log4j.PatternLayout| +|log4j.appender.ROLLINGFILE.layout.ConversionPattern| %d{ISO8601} - %-5p [%t:%C{1}@%L] - %m%n| +|log4j.appender.TRACEFILE| org.apache.log4j.FileAppender| +|log4j.appender.TRACEFILE.Threshold| TRACE| +|log4j.appender.TRACEFILE.File| pulsar-trace.log| +|log4j.appender.TRACEFILE.layout| org.apache.log4j.PatternLayout| +|log4j.appender.TRACEFILE.layout.ConversionPattern| %d{ISO8601} - %-5p [%t:%C{1}@%L][%x] - %m%n| + +> Note: 'topic' in log4j2.appender is configurable. +> - If you want to append all logs to a single topic, set the same topic name. +> - If you want to append logs to different topics, you can set different topic names. + +## Log4j shell + +|Name|Default| +|---|---| +|bookkeeper.root.logger| ERROR,CONSOLE| +|log4j.rootLogger| ${bookkeeper.root.logger}| +|log4j.appender.CONSOLE| org.apache.log4j.ConsoleAppender| +|log4j.appender.CONSOLE.Threshold| DEBUG| +|log4j.appender.CONSOLE.layout| org.apache.log4j.PatternLayout| +|log4j.appender.CONSOLE.layout.ConversionPattern| %d{ABSOLUTE} %-5p %m%n| +|log4j.logger.org.apache.zookeeper| ERROR| +|log4j.logger.org.apache.bookkeeper| ERROR| +|log4j.logger.org.apache.bookkeeper.bookie.BookieShell| INFO| + + +## Standalone + +|Name|Description|Default| +|---|---|---| +|authenticateOriginalAuthData| If this flag is set to `true`, the broker authenticates the original Auth data; else it just accepts the originalPrincipal and authorizes it (if required). |false| +|zookeeperServers| The quorum connection string for local ZooKeeper || +|zooKeeperCacheExpirySeconds|ZooKeeper cache expiry time in seconds|300 +|configurationStoreServers| Configuration store connection string (as a comma-separated list) || +|brokerServicePort| The port on which the standalone broker listens for connections |6650| +|webServicePort| THe port used by the standalone broker for HTTP requests |8080| +|bindAddress| The hostname or IP address on which the standalone service binds |0.0.0.0| +|advertisedAddress| The hostname or IP address that the standalone service advertises to the outside world. If not set, the value of `InetAddress.getLocalHost().getHostName()` is used. || +| numIOThreads | Number of threads to use for Netty IO | 2 * Runtime.getRuntime().availableProcessors() | +| numHttpServerThreads | Number of threads to use for HTTP requests processing | 2 * Runtime.getRuntime().availableProcessors()| +|isRunningStandalone|This flag controls features that are meant to be used when running in standalone mode.|N/A| +|clusterName| The name of the cluster that this broker belongs to. |standalone| +| failureDomainsEnabled | Enable cluster's failure-domain which can distribute brokers into logical region. | false | +|zooKeeperSessionTimeoutMillis| The ZooKeeper session timeout, in milliseconds. |30000| +|zooKeeperOperationTimeoutSeconds|ZooKeeper operation timeout in seconds.|30| +|brokerShutdownTimeoutMs| The time to wait for graceful broker shutdown. After this time elapses, the process will be killed. |60000| +|skipBrokerShutdownOnOOM| Flag to skip broker shutdown when broker handles Out of memory error. |false| +|backlogQuotaCheckEnabled| Enable the backlog quota check, which enforces a specified action when the quota is reached. |true| +|backlogQuotaCheckIntervalInSeconds| How often to check for topics that have reached the backlog quota. |60| +|backlogQuotaDefaultLimitGB| The default per-topic backlog quota limit. Being less than 0 means no limitation. By default, it is -1. |-1| +|ttlDurationDefaultInSeconds| The default ttl for namespaces if ttl is not configured at namespace policies. |0| +|brokerDeleteInactiveTopicsEnabled| Enable the deletion of inactive topics. |true| +|brokerDeleteInactiveTopicsFrequencySeconds| How often to check for inactive topics, in seconds. |60| +| maxPendingPublishdRequestsPerConnection | Maximum pending publish requests per connection to avoid keeping large number of pending requests in memory | 1000| +|messageExpiryCheckIntervalInMinutes| How often to proactively check and purged expired messages. |5| +|activeConsumerFailoverDelayTimeMillis| How long to delay rewinding cursor and dispatching messages when active consumer is changed. |1000| +| subscriptionExpirationTimeMinutes | How long to delete inactive subscriptions from last consumption. When it is set to 0, inactive subscriptions are not deleted automatically | 0 | +| subscriptionRedeliveryTrackerEnabled | Enable subscription message redelivery tracker to send redelivery count to consumer. | true | +|subscriptionKeySharedEnable|Whether to enable the Key_Shared subscription.|true| +| subscriptionKeySharedUseConsistentHashing | In the Key_Shared subscription mode, with default AUTO_SPLIT mode, use splitting ranges or consistent hashing to reassign keys to new consumers. | false | +| subscriptionKeySharedConsistentHashingReplicaPoints | In the Key_Shared subscription mode, the number of points in the consistent-hashing ring. The greater the number, the more equal the assignment of keys to consumers. | 100 | +| subscriptionExpiryCheckIntervalInMinutes | How frequently to proactively check and purge expired subscription |5 | +| brokerDeduplicationEnabled | Set the default behavior for message deduplication in the broker. This can be overridden per-namespace. If it is enabled, the broker rejects messages that are already stored in the topic. | false | +| brokerDeduplicationMaxNumberOfProducers | Maximum number of producer information that it's going to be persisted for deduplication purposes | 10000 | +| brokerDeduplicationEntriesInterval | Number of entries after which a deduplication information snapshot is taken. A greater interval leads to less snapshots being taken though it would increase the topic recovery time, when the entries published after the snapshot need to be replayed. | 1000 | +| brokerDeduplicationProducerInactivityTimeoutMinutes | The time of inactivity (in minutes) after which the broker discards deduplication information related to a disconnected producer. | 360 | +| defaultNumberOfNamespaceBundles | When a namespace is created without specifying the number of bundles, this value is used as the default setting.| 4 | +|clientLibraryVersionCheckEnabled| Enable checks for minimum allowed client library version. |false| +|clientLibraryVersionCheckAllowUnversioned| Allow client libraries with no version information |true| +|statusFilePath| The path for the file used to determine the rotation status for the broker when responding to service discovery health checks |/usr/local/apache/htdocs| +|maxUnackedMessagesPerConsumer| The maximum number of unacknowledged messages allowed to be received by consumers on a shared subscription. The broker will stop sending messages to a consumer once this limit is reached or until the consumer begins acknowledging messages. A value of 0 disables the unacked message limit check and thus allows consumers to receive messages without any restrictions. |50000| +|maxUnackedMessagesPerSubscription| The same as above, except per subscription rather than per consumer. |200000| +| maxUnackedMessagesPerBroker | Maximum number of unacknowledged messages allowed per broker. Once this limit reaches, the broker stops dispatching messages to all shared subscriptions which has a higher number of unacknowledged messages until subscriptions start acknowledging messages back and unacknowledged messages count reaches to limit/2. When the value is set to 0, unacknowledged message limit check is disabled and broker does not block dispatchers. | 0 | +| maxUnackedMessagesPerSubscriptionOnBrokerBlocked | Once the broker reaches maxUnackedMessagesPerBroker limit, it blocks subscriptions which have higher unacknowledged messages than this percentage limit and subscription does not receive any new messages until that subscription acknowledges messages back. | 0.16 | +|maxNumPartitionsPerPartitionedTopic|Max number of partitions per partitioned topic. Use 0 or negative number to disable the check|0| +|zookeeperSessionExpiredPolicy|There are two policies when ZooKeeper session expired happens, "shutdown" and "reconnect". If it is set to "shutdown" policy, when ZooKeeper session expired happens, the broker is shutdown. If it is set to "reconnect" policy, the broker tries to reconnect to ZooKeeper server and re-register metadata to ZooKeeper. Note: the "reconnect" policy is an experiment feature.|shutdown| +| topicPublisherThrottlingTickTimeMillis | Tick time to schedule task that checks topic publish rate limiting across all topics. A lower value can improve accuracy while throttling publish but it uses more CPU to perform frequent check. (Disable publish throttling with value 0) | 10| +| brokerPublisherThrottlingTickTimeMillis | Tick time to schedule task that checks broker publish rate limiting across all topics. A lower value can improve accuracy while throttling publish but it uses more CPU to perform frequent check. When the value is set to 0, publish throttling is disabled. |50 | +| brokerPublisherThrottlingMaxMessageRate | Maximum rate (in 1 second) of messages allowed to publish for a broker if the message rate limiting is enabled. When the value is set to 0, message rate limiting is disabled. | 0| +| brokerPublisherThrottlingMaxByteRate | Maximum rate (in 1 second) of bytes allowed to publish for a broker if the byte rate limiting is enabled. When the value is set to 0, the byte rate limiting is disabled. | 0 | +|subscribeThrottlingRatePerConsumer|Too many subscribe requests from a consumer can cause broker rewinding consumer cursors and loading data from bookies, hence causing high network bandwidth usage. When the positive value is set, broker will throttle the subscribe requests for one consumer. Otherwise, the throttling will be disabled. By default, throttling is disabled.|0| +|subscribeRatePeriodPerConsumerInSecond|Rate period for {subscribeThrottlingRatePerConsumer}. By default, it is 30s.|30| +| dispatchThrottlingRatePerTopicInMsg | Default messages (per second) dispatch throttling-limit for every topic. When the value is set to 0, default message dispatch throttling-limit is disabled. |0 | +| dispatchThrottlingRatePerTopicInByte | Default byte (per second) dispatch throttling-limit for every topic. When the value is set to 0, default byte dispatch throttling-limit is disabled. | 0| +| dispatchThrottlingRateRelativeToPublishRate | Enable dispatch rate-limiting relative to publish rate. | false | +|dispatchThrottlingRatePerSubscriptionInMsg|The defaulted number of message dispatching throttling-limit for a subscription. The value of 0 disables message dispatch-throttling.|0| +|dispatchThrottlingRatePerSubscriptionInByte|The default number of message-bytes dispatching throttling-limit for a subscription. +The value of 0 disables message-byte dispatch-throttling.|0| +| dispatchThrottlingOnNonBacklogConsumerEnabled | Enable dispatch-throttling for both caught up consumers as well as consumers who have backlogs. | true | +|dispatcherMaxReadBatchSize|The maximum number of entries to read from BookKeeper. By default, it is 100 entries.|100| +|dispatcherMaxReadSizeBytes|The maximum size in bytes of entries to read from BookKeeper. By default, it is 5MB.|5242880| +|dispatcherMinReadBatchSize|The minimum number of entries to read from BookKeeper. By default, it is 1 entry. When there is an error occurred on reading entries from bookkeeper, the broker will backoff the batch size to this minimum number.|1| +|dispatcherMaxRoundRobinBatchSize|The maximum number of entries to dispatch for a shared subscription. By default, it is 20 entries.|20| +| preciseDispatcherFlowControl | Precise dispathcer flow control according to history message number of each entry. | false | +| maxConcurrentLookupRequest | Maximum number of concurrent lookup request that the broker allows to throttle heavy incoming lookup traffic. | 50000 | +| maxConcurrentTopicLoadRequest | Maximum number of concurrent topic loading request that the broker allows to control the number of zk-operations. | 5000 | +| maxConcurrentNonPersistentMessagePerConnection | Maximum number of concurrent non-persistent message that can be processed per connection. | 1000 | +| numWorkerThreadsForNonPersistentTopic | Number of worker threads to serve non-persistent topic. | 8 | +| enablePersistentTopics | Enable broker to load persistent topics. | true | +| enableNonPersistentTopics | Enable broker to load non-persistent topics. | true | +| maxProducersPerTopic | Maximum number of producers allowed to connect to topic. Once this limit reaches, the broker rejects new producers until the number of connected producers decreases. When the value is set to 0, maxProducersPerTopic-limit check is disabled. | 0 | +| maxConsumersPerTopic | Maximum number of consumers allowed to connect to topic. Once this limit reaches, the broker rejects new consumers until the number of connected consumers decreases. When the value is set to 0, maxConsumersPerTopic-limit check is disabled. | 0 | +| maxConsumersPerSubscription | Maximum number of consumers allowed to connect to subscription. Once this limit reaches, the broker rejects new consumers until the number of connected consumers decreases. When the value is set to 0, maxConsumersPerSubscription-limit check is disabled. | 0 | +| maxNumPartitionsPerPartitionedTopic | Maximum number of partitions per partitioned topic. When the value is set to a negative number or is set to 0, the check is disabled. | 0 | +| tlsCertRefreshCheckDurationSec | TLS certificate refresh duration in seconds. When the value is set to 0, check the TLS certificate on every new connection. | 300 | +| tlsCertificateFilePath | Path for the TLS certificate file. | | +| tlsKeyFilePath | Path for the TLS private key file. | | +| tlsTrustCertsFilePath | Path for the trusted TLS certificate file.| | +| tlsAllowInsecureConnection | Accept untrusted TLS certificate from the client. If it is set to true, a client with a certificate which cannot be verified with the 'tlsTrustCertsFilePath' certificate is allowed to connect to the server, though the certificate is not be used for client authentication. | false | +| tlsProtocols | Specify the TLS protocols the broker uses to negotiate during TLS handshake. | | +| tlsCiphers | Specify the TLS cipher the broker uses to negotiate during TLS Handshake. | | +| tlsRequireTrustedClientCertOnConnect | Trusted client certificates are required for to connect TLS. Reject the Connection if the client certificate is not trusted. In effect, this requires that all connecting clients perform TLS client authentication. | false | +| tlsEnabledWithKeyStore | Enable TLS with KeyStore type configuration in broker. | false | +| tlsProvider | TLS Provider for KeyStore type. | | +| tlsKeyStoreType | TLS KeyStore type configuration in the broker.
  • JKS
  • PKCS12 |JKS| +| tlsKeyStore | TLS KeyStore path in the broker. | | +| tlsKeyStorePassword | TLS KeyStore password for the broker. | | +| tlsTrustStoreType | TLS TrustStore type configuration in the broker
  • JKS
  • PKCS12 |JKS| +| tlsTrustStore | TLS TrustStore path in the broker. | | +| tlsTrustStorePassword | TLS TrustStore password for the broker. | | +| brokerClientTlsEnabledWithKeyStore | Configure whether the internal client uses the KeyStore type to authenticate with Pulsar brokers. | false | +| brokerClientSslProvider | The TLS Provider used by the internal client to authenticate with other Pulsar brokers. | | +| brokerClientTlsTrustStoreType | TLS TrustStore type configuration for the internal client to authenticate with Pulsar brokers.
  • JKS
  • PKCS12 | JKS | +| brokerClientTlsTrustStore | TLS TrustStore path for the internal client to authenticate with Pulsar brokers. | | +| brokerClientTlsTrustStorePassword | TLS TrustStore password for the internal client to authenticate with Pulsar brokers. | | +| brokerClientTlsCiphers | Specify the TLS cipher that the internal client uses to negotiate during TLS Handshake. | | +| brokerClientTlsProtocols | Specify the TLS protocols that the broker uses to negotiate during TLS handshake. | +| systemTopicEnabled | Enable/Disable system topics. | false | +| topicLevelPoliciesEnabled | Enable or disable topic level policies. Topic level policies depends on the system topic. Please enable the system topic first. | false | +| proxyRoles | Role names that are treated as "proxy roles". If the broker sees a request with role as proxyRoles, it demands to see a valid original principal. | | +| authenticateOriginalAuthData | If this flag is set, the broker authenticates the original Auth data. Otherwise, it just accepts the originalPrincipal and authorizes it (if required). | false | +|authenticationEnabled| Enable authentication for the broker. |false| +|authenticationProviders| A comma-separated list of class names for authentication providers. |false| +|authorizationEnabled| Enforce authorization in brokers. |false| +| authorizationProvider | Authorization provider fully qualified class-name. | org.apache.pulsar.broker.authorization.PulsarAuthorizationProvider | +| authorizationAllowWildcardsMatching | Allow wildcard matching in authorization. Wildcard matching is applicable only when the wildcard-character (*) presents at the **first** or **last** position. | false | +|superUserRoles| Role names that are treated as “superusers.” Superusers are authorized to perform all admin tasks. | | +|brokerClientAuthenticationPlugin| The authentication settings of the broker itself. Used when the broker connects to other brokers either in the same cluster or from other clusters. | | +|brokerClientAuthenticationParameters| The parameters that go along with the plugin specified using brokerClientAuthenticationPlugin. | | +|athenzDomainNames| Supported Athenz authentication provider domain names as a comma-separated list. | | +| anonymousUserRole | When this parameter is not empty, unauthenticated users perform as anonymousUserRole. | | +|tokenAuthClaim| Specify the token claim that will be used as the authentication "principal" or "role". The "subject" field will be used if this is left blank || +|tokenAudienceClaim| The token audience "claim" name, e.g. "aud". It is used to get the audience from token. If it is not set, the audience is not verified. || +| tokenAudience | The token audience stands for this broker. The field `tokenAudienceClaim` of a valid token need contains this parameter.| | +|saslJaasClientAllowedIds|This is a regexp, which limits the range of possible ids which can connect to the Broker using SASL. By default, it is set to `SaslConstants.JAAS_CLIENT_ALLOWED_IDS_DEFAULT`, which is ".*pulsar.*", so only clients whose id contains 'pulsar' are allowed to connect.|N/A| +|saslJaasBrokerSectionName|Service Principal, for login context name. By default, it is set to `SaslConstants.JAAS_DEFAULT_BROKER_SECTION_NAME`, which is "Broker".|N/A| +|httpMaxRequestSize|If the value is larger than 0, it rejects all HTTP requests with bodies larged than the configured limit.|-1| +|exposePreciseBacklogInPrometheus| Enable expose the precise backlog stats, set false to use published counter and consumed counter to calculate, this would be more efficient but may be inaccurate. |false| +|bookkeeperMetadataServiceUri|Metadata service uri is what BookKeeper used for loading corresponding metadata driver and resolving its metadata service location. This value can be fetched using `bookkeeper shell whatisinstanceid` command in BookKeeper cluster. For example: `zk+hierarchical://localhost:2181/ledgers`. The metadata service uri list can also be semicolon separated values like: `zk+hierarchical://zk1:2181;zk2:2181;zk3:2181/ledgers`.|N/A| +|bookkeeperClientAuthenticationPlugin| Authentication plugin to be used when connecting to bookies (BookKeeper servers). || +|bookkeeperClientAuthenticationParametersName| BookKeeper authentication plugin implementation parameters and values. || +|bookkeeperClientAuthenticationParameters| Parameters associated with the bookkeeperClientAuthenticationParametersName || +|bookkeeperClientTimeoutInSeconds| Timeout for BookKeeper add and read operations. |30| +|bookkeeperClientSpeculativeReadTimeoutInMillis| Speculative reads are initiated if a read request doesn’t complete within a certain time. A value of 0 disables speculative reads. |0| +|bookkeeperUseV2WireProtocol|Use older Bookkeeper wire protocol with bookie.|true| +|bookkeeperClientHealthCheckEnabled| Enable bookie health checks. |true| +|bookkeeperClientHealthCheckIntervalSeconds| The time interval, in seconds, at which health checks are performed. New ledgers are not created during health checks. |60| +|bookkeeperClientHealthCheckErrorThresholdPerInterval| Error threshold for health checks. |5| +|bookkeeperClientHealthCheckQuarantineTimeInSeconds| If bookies have more than the allowed number of failures within the time interval specified by bookkeeperClientHealthCheckIntervalSeconds |1800| +|bookkeeperGetBookieInfoIntervalSeconds|Specify options for the GetBookieInfo check. This setting helps ensure the list of bookies that are up to date on the brokers.|86400| +|bookkeeperGetBookieInfoRetryIntervalSeconds|Specify options for the GetBookieInfo check. This setting helps ensure the list of bookies that are up to date on the brokers.|60| +|bookkeeperClientRackawarePolicyEnabled| |true| +|bookkeeperClientRegionawarePolicyEnabled| |false| +|bookkeeperClientMinNumRacksPerWriteQuorum| |2| +|bookkeeperClientMinNumRacksPerWriteQuorum| |false| +|bookkeeperClientReorderReadSequenceEnabled| |false| +|bookkeeperClientIsolationGroups||| +|bookkeeperClientSecondaryIsolationGroups| Enable bookie secondary-isolation group if bookkeeperClientIsolationGroups doesn't have enough bookie available. || +|bookkeeperClientMinAvailableBookiesInIsolationGroups| Minimum bookies that should be available as part of bookkeeperClientIsolationGroups else broker will include bookkeeperClientSecondaryIsolationGroups bookies in isolated list. || +| bookkeeperTLSProviderFactoryClass | Set the client security provider factory class name. | org.apache.bookkeeper.tls.TLSContextFactory | +| bookkeeperTLSClientAuthentication | Enable TLS authentication with bookie. | false | +| bookkeeperTLSKeyFileType | Supported type: PEM, JKS, PKCS12. | PEM | +| bookkeeperTLSTrustCertTypes | Supported type: PEM, JKS, PKCS12. | PEM | +| bookkeeperTLSKeyStorePasswordPath | Path to file containing keystore password, if the client keystore is password protected. | | bookkeeperTLSTrustStorePasswordPath | Path to file containing truststore password, if the client truststore is password protected. | | +| bookkeeperTLSKeyFilePath | Path for the TLS private key file. | | +| bookkeeperTLSCertificateFilePath | Path for the TLS certificate file. | | +| bookkeeperTLSTrustCertsFilePath | Path for the trusted TLS certificate file. | | +| bookkeeperDiskWeightBasedPlacementEnabled | Enable/Disable disk weight based placement. | false | +| bookkeeperExplicitLacIntervalInMills | Set the interval to check the need for sending an explicit LAC. When the value is set to 0, no explicit LAC is sent. | 0 | +| bookkeeperClientExposeStatsToPrometheus | Expose BookKeeper client managed ledger stats to Prometheus. | false | +|managedLedgerDefaultEnsembleSize| |1| +|managedLedgerDefaultWriteQuorum| |1| +|managedLedgerDefaultAckQuorum| |1| +| managedLedgerDigestType | Default type of checksum to use when writing to BookKeeper. | CRC32C | +| managedLedgerNumWorkerThreads | Number of threads to be used for managed ledger tasks dispatching. | 8 | +| managedLedgerNumSchedulerThreads | Number of threads to be used for managed ledger scheduled tasks. | 8 | +|managedLedgerCacheSizeMB| |N/A| +|managedLedgerCacheCopyEntries| Whether to copy the entry payloads when inserting in cache.| false| +|managedLedgerCacheEvictionWatermark| |0.9| +|managedLedgerCacheEvictionFrequency| Configure the cache eviction frequency for the managed ledger cache (evictions/sec) | 100.0 | +|managedLedgerCacheEvictionTimeThresholdMillis| All entries that have stayed in cache for more than the configured time, will be evicted | 1000 | +|managedLedgerCursorBackloggedThreshold| Configure the threshold (in number of entries) from where a cursor should be considered 'backlogged' and thus should be set as inactive. | 1000| +|managedLedgerUnackedRangesOpenCacheSetEnabled| Use Open Range-Set to cache unacknowledged messages |true| +|managedLedgerDefaultMarkDeleteRateLimit| |0.1| +|managedLedgerMaxEntriesPerLedger| |50000| +|managedLedgerMinLedgerRolloverTimeMinutes| |10| +|managedLedgerMaxLedgerRolloverTimeMinutes| |240| +|managedLedgerCursorMaxEntriesPerLedger| |50000| +|managedLedgerCursorRolloverTimeInSeconds| |14400| +| managedLedgerMaxSizePerLedgerMbytes | Maximum ledger size before triggering a rollover for a topic. | 2048 | +| managedLedgerMaxUnackedRangesToPersist | Maximum number of "acknowledgment holes" that are going to be persistently stored. When acknowledging out of order, a consumer leaves holes that are supposed to be quickly filled by acknowledging all the messages. The information of which messages are acknowledged is persisted by compressing in "ranges" of messages that were acknowledged. After the max number of ranges is reached, the information is only tracked in memory and messages are redelivered in case of crashes. | 10000 | +| managedLedgerMaxUnackedRangesToPersistInZooKeeper | Maximum number of "acknowledgment holes" that can be stored in Zookeeper. If the number of unacknowledged message range is higher than this limit, the broker persists unacknowledged ranges into bookkeeper to avoid additional data overhead into Zookeeper. | 1000 | +|autoSkipNonRecoverableData| |false| +| managedLedgerMetadataOperationsTimeoutSeconds | Operation timeout while updating managed-ledger metadata. | 60 | +| managedLedgerReadEntryTimeoutSeconds | Read entries timeout when the broker tries to read messages from BookKeeper. | 0 | +| managedLedgerAddEntryTimeoutSeconds | Add entry timeout when the broker tries to publish message to BookKeeper. | 0 | +| managedLedgerNewEntriesCheckDelayInMillis | New entries check delay for the cursor under the managed ledger. If no new messages in the topic, the cursor tries to check again after the delay time. For consumption latency sensitive scenario, you can set the value to a smaller value or 0. Of course, a smaller value may degrade consumption throughput.|10 ms| +| managedLedgerPrometheusStatsLatencyRolloverSeconds | Managed ledger prometheus stats latency rollover seconds. | 60 | +| managedLedgerTraceTaskExecution | Whether to trace managed ledger task execution time. | true | +|managedLedgerNewEntriesCheckDelayInMillis|New entries check delay for the cursor under the managed ledger. If no new messages in the topic, the cursor will try to check again after the delay time. For consumption latency sensitive scenario, it can be set to a smaller value or 0. A smaller value degrades consumption throughput. By default, it is 10ms.|10| +|loadBalancerEnabled| |false| +|loadBalancerPlacementStrategy| |weightedRandomSelection| +|loadBalancerReportUpdateThresholdPercentage| |10| +|loadBalancerReportUpdateMaxIntervalMinutes| |15| +|loadBalancerHostUsageCheckIntervalMinutes| |1| +|loadBalancerSheddingIntervalMinutes| |30| +|loadBalancerSheddingGracePeriodMinutes| |30| +|loadBalancerBrokerMaxTopics| |50000| +|loadBalancerBrokerUnderloadedThresholdPercentage| |1| +|loadBalancerBrokerOverloadedThresholdPercentage| |85| +|loadBalancerResourceQuotaUpdateIntervalMinutes| |15| +|loadBalancerBrokerComfortLoadLevelPercentage| |65| +|loadBalancerAutoBundleSplitEnabled| |false| +| loadBalancerAutoUnloadSplitBundlesEnabled | Enable/Disable automatic unloading of split bundles. | true | +|loadBalancerNamespaceBundleMaxTopics| |1000| +|loadBalancerNamespaceBundleMaxSessions| |1000| +|loadBalancerNamespaceBundleMaxMsgRate| |1000| +|loadBalancerNamespaceBundleMaxBandwidthMbytes| |100| +|loadBalancerNamespaceMaximumBundles| |128| +| loadBalancerBrokerThresholdShedderPercentage | The broker resource usage threshold. When the broker resource usage is greater than the pulsar cluster average resource usage, the threshold shedder is triggered to offload bundles from the broker. It only takes effect in the ThresholdSheddler strategy. | 10 | +| loadBalancerHistoryResourcePercentage | The history usage when calculating new resource usage. It only takes effect in the ThresholdSheddler strategy. | 0.9 | +| loadBalancerBandwithInResourceWeight | The BandWithIn usage weight when calculating new resource usage. It only takes effect in the ThresholdSheddler strategy. | 1.0 | +| loadBalancerBandwithOutResourceWeight | The BandWithOut usage weight when calculating new resource usage. It only takes effect in the ThresholdSheddler strategy. | 1.0 | +| loadBalancerCPUResourceWeight | The CPU usage weight when calculating new resource usage. It only takes effect in the ThresholdSheddler strategy. | 1.0 | +| loadBalancerMemoryResourceWeight | The heap memory usage weight when calculating new resource usage. It only takes effect in the ThresholdSheddler strategy. | 1.0 | +| loadBalancerDirectMemoryResourceWeight | The direct memory usage weight when calculating new resource usage. It only takes effect in the ThresholdSheddler strategy. | 1.0 | +| loadBalancerBundleUnloadMinThroughputThreshold | Bundle unload minimum throughput threshold. Avoid bundle unload frequently. It only takes effect in the ThresholdSheddler strategy. | 10 | +|replicationMetricsEnabled| |true| +|replicationConnectionsPerBroker| |16| +|replicationProducerQueueSize| |1000| +| replicationPolicyCheckDurationSeconds | Duration to check replication policy to avoid replicator inconsistency due to missing ZooKeeper watch. When the value is set to 0, disable checking replication policy. | 600 | +|defaultRetentionTimeInMinutes| |0| +|defaultRetentionSizeInMB| |0| +|keepAliveIntervalSeconds| |30| + + + + + +## WebSocket + +|Name|Description|Default| +|---|---|---| +|configurationStoreServers ||| +|zooKeeperSessionTimeoutMillis| |30000| +|zooKeeperCacheExpirySeconds|ZooKeeper cache expiry time in seconds|300 +|serviceUrl||| +|serviceUrlTls||| +|brokerServiceUrl||| +|brokerServiceUrlTls||| +|webServicePort||8080| +|webServicePortTls||8443| +|bindAddress||0.0.0.0| +|clusterName ||| +|authenticationEnabled||false| +|authenticationProviders||| +|authorizationEnabled||false| +|superUserRoles ||| +|brokerClientAuthenticationPlugin||| +|brokerClientAuthenticationParameters||| +|tlsEnabled||false| +|tlsAllowInsecureConnection||false| +|tlsCertificateFilePath||| +|tlsKeyFilePath ||| +|tlsTrustCertsFilePath||| + + +## Pulsar proxy + +The [Pulsar proxy](concepts-architecture-overview.md#pulsar-proxy) can be configured in the `conf/proxy.conf` file. + + +|Name|Description|Default| +|---|---|---| +|forwardAuthorizationCredentials| Forward client authorization credentials to Broker for re-authorization, and make sure authentication is enabled for this to take effect. |false| +|zookeeperServers| The ZooKeeper quorum connection string (as a comma-separated list) || +|configurationStoreServers| Configuration store connection string (as a comma-separated list) || +| brokerServiceURL | The service URL pointing to the broker cluster. | | +| brokerServiceURLTLS | The TLS service URL pointing to the broker cluster | | +| brokerWebServiceURL | The Web service URL pointing to the broker cluster | | +| brokerWebServiceURLTLS | The TLS Web service URL pointing to the broker cluster | | +| functionWorkerWebServiceURL | The Web service URL pointing to the function worker cluster. It is only configured when you setup function workers in a separate cluster. | | +| functionWorkerWebServiceURLTLS | The TLS Web service URL pointing to the function worker cluster. It is only configured when you setup function workers in a separate cluster. | | +|brokerServiceURL|If service discovery is disabled, this url should point to the discovery service provider.|N/A| +|brokerServiceURLTLS|If service discovery is disabled, this url should point to the discovery service provider.|N/A| +|brokerWebServiceURL|This settings are unnecessary if `zookeeperServers` is specified.|N/A| +|brokerWebServiceURLTLS|This settings are unnecessary if `zookeeperServers` is specified.|N/A| +|functionWorkerWebServiceURL|If function workers are setup in a separate cluster, configure the this setting to point to the function workers cluster.|N/A| +|functionWorkerWebServiceURLTLS|If function workers are setup in a separate cluster, configure the this setting to point to the function workers cluster.|N/A| +|zookeeperSessionTimeoutMs| ZooKeeper session timeout (in milliseconds) |30000| +|zooKeeperCacheExpirySeconds|ZooKeeper cache expiry time in seconds|300 +|advertisedAddress|Hostname or IP address the service advertises to the outside world. If not set, the value of `InetAddress.getLocalHost().getHostname()` is used.|N/A| +|servicePort| The port to use for server binary Protobuf requests |6650| +|servicePortTls| The port to use to server binary Protobuf TLS requests |6651| +|statusFilePath| Path for the file used to determine the rotation status for the proxy instance when responding to service discovery health checks || +|advertisedAddress|Hostname or IP address the service advertises to the outside world.|`InetAddress.getLocalHost().getHostname()`| +| proxyLogLevel | Proxy log level
  • 0: Do not log any TCP channel information.
  • 1: Parse and log any TCP channel information and command information without message body.
  • 2: Parse and log channel information, command information and message body.| 0 | +|authenticationEnabled| Whether authentication is enabled for the Pulsar proxy |false| +|authenticateMetricsEndpoint| Whether the '/metrics' endpoint requires authentication. Defaults to true. 'authenticationEnabled' must also be set for this to take effect. |true| +|authenticationProviders| Authentication provider name list (a comma-separated list of class names) || +|authorizationEnabled| Whether authorization is enforced by the Pulsar proxy |false| +|authorizationProvider| Authorization provider as a fully qualified class name |org.apache.pulsar.broker.authorization.PulsarAuthorizationProvider| +| anonymousUserRole | When this parameter is not empty, unauthenticated users perform as anonymousUserRole. | | +|brokerClientAuthenticationPlugin| The authentication plugin used by the Pulsar proxy to authenticate with Pulsar brokers || +|brokerClientAuthenticationParameters| The authentication parameters used by the Pulsar proxy to authenticate with Pulsar brokers || +|brokerClientTrustCertsFilePath| The path to trusted certificates used by the Pulsar proxy to authenticate with Pulsar brokers || +|superUserRoles| Role names that are treated as “super-users,” meaning that they will be able to perform all admin || +|forwardAuthorizationCredentials| Whether client authorization credentials are forwared to the broker for re-authorization. Authentication must be enabled via authenticationEnabled=true for this to take effect. |false| +|maxConcurrentInboundConnections| Max concurrent inbound connections. The proxy will reject requests beyond that. |10000| +|maxConcurrentLookupRequests| Max concurrent outbound connections. The proxy will error out requests beyond that. |50000| +|tlsEnabledInProxy| Deprecated - use `servicePortTls` and `webServicePortTls` instead. |false| +|tlsEnabledWithBroker| Whether TLS is enabled when communicating with Pulsar brokers. |false| +| tlsCertRefreshCheckDurationSec | TLS certificate refresh duration in seconds. If the value is set 0, check TLS certificate every new connection. | 300 | +|tlsCertificateFilePath| Path for the TLS certificate file || +|tlsKeyFilePath| Path for the TLS private key file || +|tlsTrustCertsFilePath| Path for the trusted TLS certificate pem file || +|tlsHostnameVerificationEnabled| Whether the hostname is validated when the proxy creates a TLS connection with brokers |false| +|tlsRequireTrustedClientCertOnConnect| Whether client certificates are required for TLS. Connections are rejected if the client certificate isn’t trusted. |false| +|tlsProtocols|Specify the tls protocols the broker will use to negotiate during TLS Handshake. Multiple values can be specified, separated by commas. Example:- ```TLSv1.2```, ```TLSv1.1```, ```TLSv1``` || +|tlsCiphers|Specify the tls cipher the broker will use to negotiate during TLS Handshake. Multiple values can be specified, separated by commas. Example:- ```TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256```|| +| httpReverseProxyConfigs | HTTP directs to redirect to non-pulsar services | | +| httpOutputBufferSize | HTTP output buffer size. The amount of data that will be buffered for HTTP requests before it is flushed to the channel. A larger buffer size may result in higher HTTP throughput though it may take longer for the client to see data. If using HTTP streaming via the reverse proxy, this should be set to the minimum value (1) so that clients see the data as soon as possible. | 32768 | +| httpNumThreads | Number of threads to use for HTTP requests processing| 2 * Runtime.getRuntime().availableProcessors() | +|tokenSecretKey| Configure the secret key to be used to validate auth tokens. The key can be specified like: `tokenSecretKey=data:;base64,xxxxxxxxx` or `tokenSecretKey=file:///my/secret.key`|| +|tokenPublicKey| Configure the public key to be used to validate auth tokens. The key can be specified like: `tokenPublicKey=data:;base64,xxxxxxxxx` or `tokenPublicKey=file:///my/secret.key`|| +|tokenAuthClaim| Specify the token claim that will be used as the authentication "principal" or "role". The "subject" field will be used if this is left blank || +|tokenAudienceClaim| The token audience "claim" name, e.g. "aud". It is used to get the audience from token. If it is not set, the audience is not verified. || +| tokenAudience | The token audience stands for this broker. The field `tokenAudienceClaim` of a valid token need contains this parameter.| | +| proxyLogLevel | Set the Pulsar Proxy log level.
  • If the value is set to 0, no TCP channel information is logged.
  • If the value is set to 1, only the TCP channel information and command information (without message body) are parsed and logged.
  • If the value is set to 2, all TCP channel information, command information, and message body are parsed and logged. | 0 | + +## ZooKeeper + +ZooKeeper handles a broad range of essential configuration- and coordination-related tasks for Pulsar. The default configuration file for ZooKeeper is in the `conf/zookeeper.conf` file in your Pulsar installation. The following parameters are available: + + +|Name|Description|Default| +|---|---|---| +|tickTime| The tick is the basic unit of time in ZooKeeper, measured in milliseconds and used to regulate things like heartbeats and timeouts. tickTime is the length of a single tick. |2000| +|initLimit| The maximum time, in ticks, that the leader ZooKeeper server allows follower ZooKeeper servers to successfully connect and sync. The tick time is set in milliseconds using the tickTime parameter. |10| +|syncLimit| The maximum time, in ticks, that a follower ZooKeeper server is allowed to sync with other ZooKeeper servers. The tick time is set in milliseconds using the tickTime parameter. |5| +|dataDir| The location where ZooKeeper will store in-memory database snapshots as well as the transaction log of updates to the database. |data/zookeeper| +|clientPort| The port on which the ZooKeeper server will listen for connections. |2181| +|admin.enableServer|The port at which the admin listens.|true| +|admin.serverPort|The port at which the admin listens.|9990| +|autopurge.snapRetainCount| In ZooKeeper, auto purge determines how many recent snapshots of the database stored in dataDir to retain within the time interval specified by autopurge.purgeInterval (while deleting the rest). |3| +|autopurge.purgeInterval| The time interval, in hours, by which the ZooKeeper database purge task is triggered. Setting to a non-zero number will enable auto purge; setting to 0 will disable. Read this guide before enabling auto purge. |1| +|forceSync|Requires updates to be synced to media of the transaction log before finishing processing the update. If this option is set to 'no', ZooKeeper will not require updates to be synced to the media. WARNING: it's not recommended to run a production ZK cluster with `forceSync` disabled.|yes| +|maxClientCnxns| The maximum number of client connections. Increase this if you need to handle more ZooKeeper clients. |60| + + + + +In addition to the parameters in the table above, configuring ZooKeeper for Pulsar involves adding +a `server.N` line to the `conf/zookeeper.conf` file for each node in the ZooKeeper cluster, where `N` is the number of the ZooKeeper node. Here's an example for a three-node ZooKeeper cluster: + +```properties +server.1=zk1.us-west.example.com:2888:3888 +server.2=zk2.us-west.example.com:2888:3888 +server.3=zk3.us-west.example.com:2888:3888 +``` + +> We strongly recommend consulting the [ZooKeeper Administrator's Guide](https://zookeeper.apache.org/doc/current/zookeeperAdmin.html) for a more thorough and comprehensive introduction to ZooKeeper configuration diff --git a/site2/website/versioned_docs/version-2.7.0/reference-metrics.md b/site2/website/versioned_docs/version-2.7.0/reference-metrics.md new file mode 100644 index 00000000000000..51b47733f666fe --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/reference-metrics.md @@ -0,0 +1,404 @@ +--- +id: version-2.7.0-reference-metrics +title: Pulsar Metrics +sidebar_label: Pulsar Metrics +original_id: reference-metrics +--- + + + +Pulsar exposes the following metrics in Prometheus format. You can monitor your clusters with those metrics. + +* [ZooKeeper](#zookeeper) +* [BookKeeper](#bookkeeper) +* [Broker](#broker) +* [Pulsar Functions](#pulsar-functions) +* [Proxy](#proxy) +* [Pulsar SQL Worker](#pulsar-sql-worker) + +The following types of metrics are available: + +- [Counter](https://prometheus.io/docs/concepts/metric_types/#counter): a cumulative metric that represents a single monotonically increasing counter. The value increases by default. You can reset the value to zero or restart your cluster. +- [Gauge](https://prometheus.io/docs/concepts/metric_types/#gauge): a metric that represents a single numerical value that can arbitrarily go up and down. +- [Histogram](https://prometheus.io/docs/concepts/metric_types/#histogram): a histogram samples observations (usually things like request durations or response sizes) and counts them in configurable buckets. +- [Summary](https://prometheus.io/docs/concepts/metric_types/#summary): similar to a histogram, a summary samples observations (usually things like request durations and response sizes). While it also provides a total count of observations and a sum of all observed values, it calculates configurable quantiles over a sliding time window. + +## ZooKeeper + +The ZooKeeper metrics are exposed under "/metrics" at port `8000`. You can use a different port by configuring the `stats_server_port` system property. + +### Server metrics + +| Name | Type | Description | +|---|---|---| +| zookeeper_server_znode_count | Gauge | The number of z-nodes stored. | +| zookeeper_server_data_size_bytes | Gauge | The total size of all of z-nodes stored. | +| zookeeper_server_connections | Gauge | The number of currently opened connections. | +| zookeeper_server_watches_count | Gauge | The number of watchers registered. | +| zookeeper_server_ephemerals_count | Gauge | The number of ephemeral z-nodes. | + +### Request metrics + +| Name | Type | Description | +|---|---|---| +| zookeeper_server_requests | Counter | The total number of requests received by a particular server. | +| zookeeper_server_requests_latency_ms | Summary | The requests latency calculated in milliseconds.
    Available labels: *type* (write, read).
    • *write*: the requests that write data to ZooKeeper.
    • *read*: the requests that read data from ZooKeeper.
    | + +## BookKeeper + +The BookKeeper metrics are exposed under "/metrics" at port `8000`. You can change the port by updating `prometheusStatsHttpPort` +in the `bookkeeper.conf` configuration file. + +### Server metrics + +| Name | Type | Description | +|---|---|---| +| bookie_SERVER_STATUS | Gauge | The server status for bookie server.
    • 1: the bookie is running in writable mode.
    • 0: the bookie is running in readonly mode.
    | +| bookkeeper_server_ADD_ENTRY_count | Counter | The total number of ADD_ENTRY requests received at the bookie. The `success` label is used to distinguish successes and failures. | +| bookkeeper_server_READ_ENTRY_count | Counter | The total number of READ_ENTRY requests received at the bookie. The `success` label is used to distinguish successes and failures. | +| bookie_WRITE_BYTES | Counter | The total number of bytes written to the bookie. | +| bookie_READ_BYTES | Counter | The total number of bytes read from the bookie. | +| bookkeeper_server_ADD_ENTRY_REQUEST | Histogram | The histogram of request latency of ADD_ENTRY requests at the bookie. The `success` label is used to distinguish successes and failures. | +| bookkeeper_server_READ_ENTRY_REQUEST | Histogram | The histogram of request latency of READ_ENTRY requests at the bookie. The `success` label is used to distinguish successes and failures. | + +### Journal metrics + +| Name | Type | Description | +|---|---|---| +| bookie_journal_JOURNAL_SYNC_count | Counter | The total number of journal fsync operations happening at the bookie. The `success` label is used to distinguish successes and failures. | +| bookie_journal_JOURNAL_QUEUE_SIZE | Gauge | The total number of requests pending in the journal queue. | +| bookie_journal_JOURNAL_FORCE_WRITE_QUEUE_SIZE | Gauge | The total number of force write (fsync) requests pending in the force-write queue. | +| bookie_journal_JOURNAL_CB_QUEUE_SIZE | Gauge | The total number of callbacks pending in the callback queue. | +| bookie_journal_JOURNAL_ADD_ENTRY | Histogram | The histogram of request latency of adding entries to the journal. | +| bookie_journal_JOURNAL_SYNC | Histogram | The histogram of fsync latency of syncing data to the journal disk. | + +### Storage metrics + +| Name | Type | Description | +|---|---|---| +| bookie_ledgers_count | Gauge | The total number of ledgers stored in the bookie. | +| bookie_entries_count | Gauge | The total number of entries stored in the bookie. | +| bookie_write_cache_size | Gauge | The bookie write cache size (in bytes). | +| bookie_read_cache_size | Gauge | The bookie read cache size (in bytes). | +| bookie_DELETED_LEDGER_COUNT | Counter | The total number of ledgers deleted since the bookie has started. | +| bookie_ledger_writable_dirs | Gauge | The number of writable directories in the bookie. | + +## Broker + +The broker metrics are exposed under "/metrics" at port `8080`. You can change the port by updating `webServicePort` to a different port +in the `broker.conf` configuration file. + +All the metrics exposed by a broker are labelled with `cluster=${pulsar_cluster}`. The name of Pulsar cluster is the value of `${pulsar_cluster}`, which you have configured in the `broker.conf` file. + +The following metrics are available for broker: + +* [Namespace metrics](#namespace-metrics) + * [Replication metrics](#replication-metrics) +* [Topic metrics](#topic-metrics) + * [Replication metrics](#replication-metrics-1) +* [ManagedLedgerCache metrics](#managedledgercache-metrics) +* [ManagedLedger metrics](#managedledger-metrics) +* [LoadBalancing metrics](#loadbalancing-metrics) + * [BundleUnloading metrics](#bundleunloading-metrics) + * [BundleSplit metrics](#bundlesplit-metrics) +* [Subscription metrics](#subscription-metrics) +* [Consumer metrics](#consumer-metrics) +* [ManagedLedger bookie client metrics](#managed-ledger-bookie-client-metrics) + +### Namespace metrics + +> Namespace metrics are only exposed when `exposeTopicLevelMetricsInPrometheus` is set to `false`. + +All the namespace metrics are labelled with the following labels: + +- *cluster*: `cluster=${pulsar_cluster}`. `${pulsar_cluster}` is the cluster name that you configured in `broker.conf`. +- *namespace*: `namespace=${pulsar_namespace}`. `${pulsar_namespace}` is the namespace name. + +| Name | Type | Description | +|---|---|---| +| pulsar_topics_count | Gauge | The number of Pulsar topics of the namespace owned by this broker. | +| pulsar_subscriptions_count | Gauge | The number of Pulsar subscriptions of the namespace served by this broker. | +| pulsar_producers_count | Gauge | The number of active producers of the namespace connected to this broker. | +| pulsar_consumers_count | Gauge | The number of active consumers of the namespace connected to this broker. | +| pulsar_rate_in | Gauge | The total message rate of the namespace coming into this broker (messages/second). | +| pulsar_rate_out | Gauge | The total message rate of the namespace going out from this broker (messages/second). | +| pulsar_throughput_in | Gauge | The total throughput of the namespace coming into this broker (bytes/second). | +| pulsar_throughput_out | Gauge | The total throughput of the namespace going out from this broker (bytes/second). | +| pulsar_storage_size | Gauge | The total storage size of the topics in this namespace owned by this broker (bytes). | +| pulsar_storage_backlog_size | Gauge | The total backlog size of the topics of this namespace owned by this broker (messages). | +| pulsar_storage_offloaded_size | Gauge | The total amount of the data in this namespace offloaded to the tiered storage (bytes). | +| pulsar_storage_write_rate | Gauge | The total message batches (entries) written to the storage for this namespace (message batches / second). | +| pulsar_storage_read_rate | Gauge | The total message batches (entries) read from the storage for this namespace (message batches / second). | +| pulsar_subscription_delayed | Gauge | The total message batches (entries) are delayed for dispatching. | +| pulsar_storage_write_latency_le_* | Histogram | The entry rate of a namespace that the storage write latency is smaller with a given threshold.
    Available thresholds:
    • pulsar_storage_write_latency_le_0_5: <= 0.5ms
    • pulsar_storage_write_latency_le_1: <= 1ms
    • pulsar_storage_write_latency_le_5: <= 5ms
    • pulsar_storage_write_latency_le_10: <= 10ms
    • pulsar_storage_write_latency_le_20: <= 20ms
    • pulsar_storage_write_latency_le_50: <= 50ms
    • pulsar_storage_write_latency_le_100: <= 100ms
    • pulsar_storage_write_latency_le_200: <= 200ms
    • pulsar_storage_write_latency_le_1000: <= 1s
    • pulsar_storage_write_latency_le_overflow: > 1s
    | +| pulsar_entry_size_le_* | Histogram | The entry rate of a namespace that the entry size is smaller with a given threshold.
    Available thresholds:
    • pulsar_entry_size_le_128: <= 128 bytes
    • pulsar_entry_size_le_512: <= 512 bytes
    • pulsar_entry_size_le_1_kb: <= 1 KB
    • pulsar_entry_size_le_2_kb: <= 2 KB
    • pulsar_entry_size_le_4_kb: <= 4 KB
    • pulsar_entry_size_le_16_kb: <= 16 KB
    • pulsar_entry_size_le_100_kb: <= 100 KB
    • pulsar_entry_size_le_1_mb: <= 1 MB
    • pulsar_entry_size_le_overflow: > 1 MB
    | + +#### Replication metrics + +If a namespace is configured to be replicated among multiple Pulsar clusters, the corresponding replication metrics is also exposed when `replicationMetricsEnabled` is enabled. + +All the replication metrics are also labelled with `remoteCluster=${pulsar_remote_cluster}`. + +| Name | Type | Description | +|---|---|---| +| pulsar_replication_rate_in | Gauge | The total message rate of the namespace replicating from remote cluster (messages/second). | +| pulsar_replication_rate_out | Gauge | The total message rate of the namespace replicating to remote cluster (messages/second). | +| pulsar_replication_throughput_in | Gauge | The total throughput of the namespace replicating from remote cluster (bytes/second). | +| pulsar_replication_throughput_out | Gauge | The total throughput of the namespace replicating to remote cluster (bytes/second). | +| pulsar_replication_backlog | Gauge | The total backlog of the namespace replicating to remote cluster (messages). | + +### Topic metrics + +> Topic metrics are only exposed when `exposeTopicLevelMetricsInPrometheus` is set to `true`. + +All the topic metrics are labelled with the following labels: + +- *cluster*: `cluster=${pulsar_cluster}`. `${pulsar_cluster}` is the cluster name that you configured in `broker.conf`. +- *namespace*: `namespace=${pulsar_namespace}`. `${pulsar_namespace}` is the namespace name. +- *topic*: `topic=${pulsar_topic}`. `${pulsar_topic}` is the topic name. + +| Name | Type | Description | +|---|---|---| +| pulsar_subscriptions_count | Gauge | The number of Pulsar subscriptions of the topic served by this broker. | +| pulsar_producers_count | Gauge | The number of active producers of the topic connected to this broker. | +| pulsar_consumers_count | Gauge | The number of active consumers of the topic connected to this broker. | +| pulsar_rate_in | Gauge | The total message rate of the topic coming into this broker (messages/second). | +| pulsar_rate_out | Gauge | The total message rate of the topic going out from this broker (messages/second). | +| pulsar_throughput_in | Gauge | The total throughput of the topic coming into this broker (bytes/second). | +| pulsar_throughput_out | Gauge | The total throughput of the topic going out from this broker (bytes/second). | +| pulsar_storage_size | Gauge | The total storage size of the topics in this topic owned by this broker (bytes). | +| pulsar_storage_backlog_size | Gauge | The total backlog size of the topics of this topic owned by this broker (messages). | +| pulsar_storage_offloaded_size | Gauge | The total amount of the data in this topic offloaded to the tiered storage (bytes). | +| pulsar_storage_backlog_quota_limit | Gauge | The total amount of the data in this topic that limit the backlog quota (bytes). | +| pulsar_storage_write_rate | Gauge | The total message batches (entries) written to the storage for this topic (message batches / second). | +| pulsar_storage_read_rate | Gauge | The total message batches (entries) read from the storage for this topic (message batches / second). | +| pulsar_subscription_delayed | Gauge | The total message batches (entries) are delayed for dispatching. | +| pulsar_storage_write_latency_le_* | Histogram | The entry rate of a topic that the storage write latency is smaller with a given threshold.
    Available thresholds:
    • pulsar_storage_write_latency_le_0_5: <= 0.5ms
    • pulsar_storage_write_latency_le_1: <= 1ms
    • pulsar_storage_write_latency_le_5: <= 5ms
    • pulsar_storage_write_latency_le_10: <= 10ms
    • pulsar_storage_write_latency_le_20: <= 20ms
    • pulsar_storage_write_latency_le_50: <= 50ms
    • pulsar_storage_write_latency_le_100: <= 100ms
    • pulsar_storage_write_latency_le_200: <= 200ms
    • pulsar_storage_write_latency_le_1000: <= 1s
    • pulsar_storage_write_latency_le_overflow: > 1s
    | +| pulsar_entry_size_le_* | Histogram | The entry rate of a topic that the entry size is smaller with a given threshold.
    Available thresholds:
    • pulsar_entry_size_le_128: <= 128 bytes
    • pulsar_entry_size_le_512: <= 512 bytes
    • pulsar_entry_size_le_1_kb: <= 1 KB
    • pulsar_entry_size_le_2_kb: <= 2 KB
    • pulsar_entry_size_le_4_kb: <= 4 KB
    • pulsar_entry_size_le_16_kb: <= 16 KB
    • pulsar_entry_size_le_100_kb: <= 100 KB
    • pulsar_entry_size_le_1_mb: <= 1 MB
    • pulsar_entry_size_le_overflow: > 1 MB
    | +| pulsar_in_bytes_total | Counter | The total number of bytes received for this topic | +| pulsar_in_messages_total | Counter | The total number of messages received for this topic | +| pulsar_out_bytes_total | Counter | The total number of bytes read from this topic | +| pulsar_out_messages_total | Counter | The total number of messages read from this topic | + +#### Replication metrics + +If a namespace that a topic belongs to is configured to be replicated among multiple Pulsar clusters, the corresponding replication metrics is also exposed when `replicationMetricsEnabled` is enabled. + +All the replication metrics are labelled with `remoteCluster=${pulsar_remote_cluster}`. + +| Name | Type | Description | +|---|---|---| +| pulsar_replication_rate_in | Gauge | The total message rate of the topic replicating from remote cluster (messages/second). | +| pulsar_replication_rate_out | Gauge | The total message rate of the topic replicating to remote cluster (messages/second). | +| pulsar_replication_throughput_in | Gauge | The total throughput of the topic replicating from remote cluster (bytes/second). | +| pulsar_replication_throughput_out | Gauge | The total throughput of the topic replicating to remote cluster (bytes/second). | +| pulsar_replication_backlog | Gauge | The total backlog of the topic replicating to remote cluster (messages). | + +### ManagedLedgerCache metrics +All the ManagedLedgerCache metrics are labelled with the following labels: +- cluster: cluster=${pulsar_cluster}. ${pulsar_cluster} is the cluster name that you have configured in the `broker.conf` file. + +| Name | Type | Description | +| --- | --- | --- | +| pulsar_ml_cache_evictions | Gauge | The number of cache evictions during the last minute. | +| pulsar_ml_cache_hits_rate | Gauge | The number of cache hits per second. | +| pulsar_ml_cache_hits_throughput | Gauge | The amount of data is retrieved from the cache in byte/s | +| pulsar_ml_cache_misses_rate | Gauge | The number of cache misses per second | +| pulsar_ml_cache_misses_throughput | Gauge | The amount of data is retrieved from the cache in byte/s | +| pulsar_ml_cache_pool_active_allocations | Gauge | The number of currently active allocations in direct arena | +| pulsar_ml_cache_pool_active_allocations_huge | Gauge | The number of currently active huge allocation in direct arena | +| pulsar_ml_cache_pool_active_allocations_normal | Gauge | The number of currently active normal allocations in direct arena | +| pulsar_ml_cache_pool_active_allocations_small | Gauge | The number of currently active small allocations in direct arena | +| pulsar_ml_cache_pool_active_allocations_tiny | Gauge | The number of currently active tiny allocations in direct arena | +| pulsar_ml_cache_pool_allocated | Gauge | The total allocated memory of chunk lists in direct arena | +| pulsar_ml_cache_pool_used | Gauge | The total used memory of chunk lists in direct arena | +| pulsar_ml_cache_used_size | Gauge | The size in byte used to store the entries payloads | +| pulsar_ml_count | Gauge | The number of currently opened managed ledgers | + +### ManagedLedger metrics +All the managedLedger metrics are labelled with the following labels: +- cluster: cluster=${pulsar_cluster}. ${pulsar_cluster} is the cluster name that you have configured in the `broker.conf` file. +- namespace: namespace=${pulsar_namespace}. ${pulsar_namespace} is the namespace name. +- quantile: quantile=${quantile}. Quantile is only for `Histogram` type metric, and represents the threshold for given Buckets. + +| Name | Type | Description | +| --- | --- | --- | +| pulsar_ml_AddEntryBytesRate | Gauge | The bytes/s rate of messages added | +| pulsar_ml_AddEntryErrors | Gauge | The number of addEntry requests that failed | +| pulsar_ml_AddEntryLatencyBuckets | Histogram | The add entry latency of a ledger with a given quantile (threshold).
    Available quantile:
    • quantile="0.0_0.5" is AddEntryLatency between (0.0ms, 0.5ms]
    • quantile="0.5_1.0" is AddEntryLatency between (0.5ms, 1.0ms]
    • quantile="1.0_5.0" is AddEntryLatency between (1ms, 5ms]
    • quantile="5.0_10.0" is AddEntryLatency between (5ms, 10ms]
    • quantile="10.0_20.0" is AddEntryLatency between (10ms, 20ms]
    • quantile="20.0_50.0" is AddEntryLatency between (20ms, 50ms]
    • quantile="50.0_100.0" is AddEntryLatency between (50ms, 100ms]
    • quantile="100.0_200.0" is AddEntryLatency between (100ms, 200ms]
    • quantile="200.0_1000.0" is AddEntryLatency between (200ms, 1s]
    | +| pulsar_ml_AddEntryLatencyBuckets_OVERFLOW | Gauge | The add entry latency > 1s | +| pulsar_ml_AddEntryMessagesRate | Gauge | The msg/s rate of messages added | +| pulsar_ml_AddEntrySucceed | Gauge | The number of addEntry requests that succeeded | +| pulsar_ml_EntrySizeBuckets | Histogram | The add entry size of a ledger with given quantile.
    Available quantile:
    • quantile="0.0_128.0" is EntrySize between (0byte, 128byte]
    • quantile="128.0_512.0" is EntrySize between (128byte, 512byte]
    • quantile="512.0_1024.0" is EntrySize between (512byte, 1KB]
    • quantile="1024.0_2048.0" is EntrySize between (1KB, 2KB]
    • quantile="2048.0_4096.0" is EntrySize between (2KB, 4KB]
    • quantile="4096.0_16384.0" is EntrySize between (4KB, 16KB]
    • quantile="16384.0_102400.0" is EntrySize between (16KB, 100KB]
    • quantile="102400.0_1232896.0" is EntrySize between (100KB, 1MB]
    | +| pulsar_ml_EntrySizeBuckets_OVERFLOW |Gauge | The add entry size > 1MB | +| pulsar_ml_LedgerSwitchLatencyBuckets | Histogram | The ledger switch latency with given quantile.
    Available quantile:
    • quantile="0.0_0.5" is EntrySize between (0ms, 0.5ms]
    • quantile="0.5_1.0" is EntrySize between (0.5ms, 1ms]
    • quantile="1.0_5.0" is EntrySize between (1ms, 5ms]
    • quantile="5.0_10.0" is EntrySize between (5ms, 10ms]
    • quantile="10.0_20.0" is EntrySize between (10ms, 20ms]
    • quantile="20.0_50.0" is EntrySize between (20ms, 50ms]
    • quantile="50.0_100.0" is EntrySize between (50ms, 100ms]
    • quantile="100.0_200.0" is EntrySize between (100ms, 200ms]
    • quantile="200.0_1000.0" is EntrySize between (200ms, 1000ms]
    | +| pulsar_ml_LedgerSwitchLatencyBuckets_OVERFLOW | Gauge | The ledger switch latency > 1s | +| pulsar_ml_MarkDeleteRate | Gauge | The rate of mark-delete ops/s | +| pulsar_ml_NumberOfMessagesInBacklog | Gauge | The number of backlog messages for all the consumers | +| pulsar_ml_ReadEntriesBytesRate | Gauge | The bytes/s rate of messages read | +| pulsar_ml_ReadEntriesErrors | Gauge | The number of readEntries requests that failed | +| pulsar_ml_ReadEntriesRate | Gauge | The msg/s rate of messages read | +| pulsar_ml_ReadEntriesSucceeded | Gauge | The number of readEntries requests that succeeded | +| pulsar_ml_StoredMessagesSize | Gauge | The total size of the messages in active ledgers (accounting for the multiple copies stored) | + +### LoadBalancing metrics +All the loadbalancing metrics are labelled with the following labels: +- cluster: cluster=${pulsar_cluster}. ${pulsar_cluster} is the cluster name that you have configured in the `broker.conf` file. +- broker: broker=${broker}. ${broker} is the IP address of the broker +- metric: metric="loadBalancing". + +| Name | Type | Description | +| --- | --- | --- | +| pulsar_lb_bandwidth_in_usage | Gauge | The broker bandwith in usage | +| pulsar_lb_bandwidth_out_usage | Gauge | The broker bandwith out usage | +| pulsar_lb_cpu_usage | Gauge | The broker cpu usage | +| pulsar_lb_directMemory_usage | Gauge | The broker process direct memory usage | +| pulsar_lb_memory_usage | Gauge | The broker process memory usage | + +#### BundleUnloading metrics +All the bundleUnloading metrics are labelled with the following labels: +- cluster: cluster=${pulsar_cluster}. ${pulsar_cluster} is the cluster name that you have configured in the `broker.conf` file. +- metric: metric="bundleUnloading". + +| Name | Type | Description | +| --- | --- | --- | +| pulsar_lb_unload_broker_count | Counter | Unload broker count in this bundle unloading | +| pulsar_lb_unload_bundle_count | Counter | Bundle unload count in this bundle unloading | + +#### BundleSplit metrics +All the bundleUnloading metrics are labelled with the following labels: +- cluster: cluster=${pulsar_cluster}. ${pulsar_cluster} is the cluster name that you have configured in the `broker.conf` file. +- metric: metric="bundlesSplit". + +| Name | Type | Description | +| --- | --- | --- | +| pulsar_lb_bundles_split_count | Counter | bundle split count in this bundle splitting check interval | + +### Subscription metrics + +> Subscription metrics are only exposed when `exposeTopicLevelMetricsInPrometheus` is set to `true`. + +All the subscription metrics are labelled with the following labels: + +- *cluster*: `cluster=${pulsar_cluster}`. `${pulsar_cluster}` is the cluster name that you have configured in the `broker.conf` file. +- *namespace*: `namespace=${pulsar_namespace}`. `${pulsar_namespace}` is the namespace name. +- *topic*: `topic=${pulsar_topic}`. `${pulsar_topic}` is the topic name. +- *subscription*: `subscription=${subscription}`. `${subscription}` is the topic subscription name. + +| Name | Type | Description | +|---|---|---| +| pulsar_subscription_back_log | Gauge | The total backlog of a subscription (messages). | +| pulsar_subscription_delayed | Gauge | The total number of messages are delayed to be dispatched for a subscription (messages). | +| pulsar_subscription_msg_rate_redeliver | Gauge | The total message rate for message being redelivered (messages/second). | +| pulsar_subscription_unacked_messages | Gauge | The total number of unacknowledged messages of a subscription (messages). | +| pulsar_subscription_blocked_on_unacked_messages | Gauge | Indicate whether a subscription is blocked on unacknowledged messages or not.
    • 1 means the subscription is blocked on waiting unacknowledged messages to be acked.
    • 0 means the subscription is not blocked on waiting unacknowledged messages to be acked.
    | +| pulsar_subscription_msg_rate_out | Gauge | The total message dispatch rate for a subscription (messages/second). | +| pulsar_subscription_msg_throughput_out | Gauge | The total message dispatch throughput for a subscription (bytes/second). | + +### Consumer metrics + +> Consumer metrics are only exposed when both `exposeTopicLevelMetricsInPrometheus` and `exposeConsumerLevelMetricsInPrometheus` are set to `true`. + +All the consumer metrics are labelled with the following labels: + +- *cluster*: `cluster=${pulsar_cluster}`. `${pulsar_cluster}` is the cluster name that you have configured in the `broker.conf` file. +- *namespace*: `namespace=${pulsar_namespace}`. `${pulsar_namespace}` is the namespace name. +- *topic*: `topic=${pulsar_topic}`. `${pulsar_topic}` is the topic name. +- *subscription*: `subscription=${subscription}`. `${subscription}` is the topic subscription name. +- *consumer_name*: `consumer_name=${consumer_name}`. `${consumer_name}` is the topic consumer name. +- *consumer_id*: `consumer_id=${consumer_id}`. `${consumer_id}` is the topic consumer id. + +| Name | Type | Description | +|---|---|---| +| pulsar_consumer_msg_rate_redeliver | Gauge | The total message rate for message being redelivered (messages/second). | +| pulsar_consumer_unacked_messages | Gauge | The total number of unacknowledged messages of a consumer (messages). | +| pulsar_consumer_blocked_on_unacked_messages | Gauge | Indicate whether a consumer is blocked on unacknowledged messages or not.
    • 1 means the consumer is blocked on waiting unacknowledged messages to be acked.
    • 0 means the consumer is not blocked on waiting unacknowledged messages to be acked.
    | +| pulsar_consumer_msg_rate_out | Gauge | The total message dispatch rate for a consumer (messages/second). | +| pulsar_consumer_msg_throughput_out | Gauge | The total message dispatch throughput for a consumer (bytes/second). | +| pulsar_consumer_available_permits | Gauge | The available permits for for a consumer. | + +### Managed ledger bookie client metrics + +All the managed ledger bookie client metrics are labelled with the following labels: + +- *cluster*: `cluster=${pulsar_cluster}`. `${pulsar_cluster}` is the cluster name that you have configured in the `broker.conf` file. + +| Name | Type | Description | +| --- | --- | --- | +| pulsar_managedLedger_client_bookkeeper_ml_scheduler_completed_tasks_* | Gauge | The number of tasks the scheduler executor execute completed.
    The number of metrics determined by the scheduler executor thread number configured by `managedLedgerNumSchedulerThreads` in `broker.conf`.
    | +| pulsar_managedLedger_client_bookkeeper_ml_scheduler_queue_* | Gauge | The number of tasks queued in the scheduler executor's queue.
    The number of metrics determined by scheduler executor's thread number configured by `managedLedgerNumSchedulerThreads` in `broker.conf`.
    | +| pulsar_managedLedger_client_bookkeeper_ml_scheduler_total_tasks_* | Gauge | The total number of tasks the scheduler executor received.
    The number of metrics determined by scheduler executor's thread number configured by `managedLedgerNumSchedulerThreads` in `broker.conf`.
    | +| pulsar_managedLedger_client_bookkeeper_ml_workers_completed_tasks_* | Gauge | The number of tasks the worker executor execute completed.
    The number of metrics determined by the number of worker task thread number configured by `managedLedgerNumWorkerThreads` in `broker.conf`
    | +| pulsar_managedLedger_client_bookkeeper_ml_workers_queue_* | Gauge | The number of tasks queued in the worker executor's queue.
    The number of metrics determined by scheduler executor's thread number configured by `managedLedgerNumWorkerThreads` in `broker.conf`.
    | +| pulsar_managedLedger_client_bookkeeper_ml_workers_total_tasks_* | Gauge | The total number of tasks the worker executor received.
    The number of metrics determined by worker executor's thread number configured by `managedLedgerNumWorkerThreads` in `broker.conf`.
    | +| pulsar_managedLedger_client_bookkeeper_ml_scheduler_task_execution | Summary | The scheduler task execution latency calculated in milliseconds. | +| pulsar_managedLedger_client_bookkeeper_ml_scheduler_task_queued | Summary | The scheduler task queued latency calculated in milliseconds. | +| pulsar_managedLedger_client_bookkeeper_ml_workers_task_execution | Summary | The worker task execution latency calculated in milliseconds. | +| pulsar_managedLedger_client_bookkeeper_ml_workers_task_queued | Summary | The worker task queued latency calculated in milliseconds. | + +## Pulsar Functions + +All the Pulsar Functions metrics are labelled with the following labels: + +- *cluster*: `cluster=${pulsar_cluster}`. `${pulsar_cluster}` is the cluster name that you have configured in the `broker.conf` file. +- *namespace*: `namespace=${pulsar_namespace}`. `${pulsar_namespace}` is the namespace name. + +| Name | Type | Description | +|---|---|---| +| pulsar_function_processed_successfully_total | Counter | Total number of messages processed successfully. | +| pulsar_function_processed_successfully_total_1min | Counter | Total number of messages processed successfully in the last 1 minute. | +| pulsar_function_system_exceptions_total | Counter | Total number of system exceptions. | +| pulsar_function_system_exceptions_total_1min | Counter | Total number of system exceptions in the last 1 minute. | +| pulsar_function_user_exceptions_total | Counter | Total number of user exceptions. | +| pulsar_function_user_exceptions_total_1min | Counter | Total number of user exceptions in the last 1 minute. | +| pulsar_function_process_latency_ms | Summary | Process latency in milliseconds. | +| pulsar_function_process_latency_ms_1min | Summary | Process latency in milliseconds in the last 1 minute. | +| pulsar_function_last_invocation | Gauge | The timestamp of the last invocation of the function. | +| pulsar_function_received_total | Counter | Total number of messages received from source. | +| pulsar_function_received_total_1min | Counter | Total number of messages received from source in the last 1 minute. | + +## Proxy + +All the proxy metrics are labelled with the following labels: + +- *cluster*: `cluster=${pulsar_cluster}`. `${pulsar_cluster}` is the cluster name that you have configured in the `broker.conf` file. +- *kubernetes_pod_name*: `kubernetes_pod_name=${kubernetes_pod_name}`. `${kubernetes_pod_name}` is the kubernetes pod name. + +| Name | Type | Description | +|---|---|---| +| pulsar_proxy_active_connections | Gauge | Number of connections currently active in the proxy. | +| pulsar_proxy_new_connections | Counter | Counter of connections being opened in the proxy. | +| pulsar_proxy_rejected_connections | Counter | Counter for connections rejected due to throttling. | +| pulsar_proxy_binary_ops | Counter | Counter of proxy operations. | +| pulsar_proxy_binary_bytes | Counter | Counter of proxy bytes. | + +## Pulsar SQL Worker + +| Name | Type | Description | +|---|---|---| +| split_bytes_read | Counter | Number of bytes read from BookKeeper. | +| split_num_messages_deserialized | Counter | Number of messages deserialized. | +| split_num_record_deserialized | Counter | Number of records deserialized. | +| split_bytes_read_per_query | Summary | Total number of bytes read per query. | +| split_entry_deserialize_time | Summary | Time spent on derserializing entries. | +| split_entry_deserialize_time_per_query | Summary | Time spent on derserializing entries per query. | +| split_entry_queue_dequeue_wait_time | Summary | Time spend on waiting to get entry from entry queue because it is empty. | +| split_entry_queue_dequeue_wait_time_per_query | Summary | Total time spent on waiting to get entry from entry queue per query. | +| split_message_queue_dequeue_wait_time_per_query | Summary | Time spent on waiting to dequeue from message queue because is is empty per query. | +| split_message_queue_enqueue_wait_time | Summary | Time spent on waiting for message queue enqueue because the message queue is full. | +| split_message_queue_enqueue_wait_time_per_query | Summary | Time spent on waiting for message queue enqueue because the message queue is full per query. | +| split_num_entries_per_batch | Summary | Number of entries per batch. | +| split_num_entries_per_query | Summary | Number of entries per query. | +| split_num_messages_deserialized_per_entry | Summary | Number of messages deserialized per entry. | +| split_num_messages_deserialized_per_query | Summary | Number of messages deserialized per query. | +| split_read_attempts | Summary | Number of read attempts (fail if queues are full). | +| split_read_attempts_per_query | Summary | Number of read attempts per query. | +| split_read_latency_per_batch | Summary | Latency of reads per batch. | +| split_read_latency_per_query | Summary | Total read latency per query. | +| split_record_deserialize_time | Summary | Time spent on deserializing message to record. For example, Avro, JSON, and so on. | +| split_record_deserialize_time_per_query | Summary | Time spent on deserializing message to record per query. | +| split_total_execution_time | Summary | Total execution time . | \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/reference-pulsar-admin.md b/site2/website/versioned_docs/version-2.7.0/reference-pulsar-admin.md new file mode 100644 index 00000000000000..86a1ac730eb472 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/reference-pulsar-admin.md @@ -0,0 +1,2567 @@ +--- +id: version-2.7.0-pulsar-admin +title: Pulsar admin CLI +sidebar_label: Pulsar Admin CLI +original_id: pulsar-admin +--- + +The `pulsar-admin` tool enables you to manage Pulsar installations, including clusters, brokers, namespaces, tenants, and more. + +Usage +```bash +$ pulsar-admin command +``` + +Commands +* `broker-stats` +* `brokers` +* `clusters` +* `functions` +* `functions-worker` +* `namespaces` +* `ns-isolation-policy` +* `sources` + + For more information, see [here](io-cli.md#sources) +* `sinks` + + For more information, see [here](io-cli.md#sinks) +* `topics` +* `tenants` +* `resource-quotas` +* `schemas` + +## `broker-stats` + +Operations to collect broker statistics + +```bash +$ pulsar-admin broker-stats subcommand +``` + +Subcommands +* `allocator-stats` +* `topics(destinations)` +* `mbeans` +* `monitoring-metrics` +* `load-report` + + +### `allocator-stats` + +Dump allocator stats + +Usage +```bash +$ pulsar-admin broker-stats allocator-stats allocator-name +``` + +### `topics(destinations)` + +Dump topic stats + +Usage +```bash +$ pulsar-admin broker-stats topics options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-i`, `--indent`|Indent JSON output|false| + +### `mbeans` + +Dump Mbean stats + +Usage +```bash +$ pulsar-admin broker-stats mbeans options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-i`, `--indent`|Indent JSON output|false| + + +### `monitoring-metrics` + +Dump metrics for monitoring + +Usage +```bash +$ pulsar-admin broker-stats monitoring-metrics options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-i`, `--indent`|Indent JSON output|false| + + +### `load-report` + +Dump broker load-report + +Usage +```bash +$ pulsar-admin broker-stats load-report +``` + + +## `brokers` + +Operations about brokers + +```bash +$ pulsar-admin brokers subcommand +``` + +Subcommands +* `list` +* `namespaces` +* `update-dynamic-config` +* `list-dynamic-config` +* `get-all-dynamic-config` +* `get-internal-config` +* `get-runtime-config` +* `healthcheck` + +### `list` +List active brokers of the cluster + +Usage +```bash +$ pulsar-admin brokers list cluster-name +``` + +### `namespaces` +List namespaces owned by the broker + +Usage +```bash +$ pulsar-admin brokers namespaces cluster-name options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--url`|The URL for the broker|| + + +### `update-dynamic-config` +Update a broker's dynamic service configuration + +Usage +```bash +$ pulsar-admin brokers update-dynamic-config options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--config`|Service configuration parameter name|| +|`--value`|Value for the configuration parameter value specified using the `--config` flag|| + + +### `list-dynamic-config` +Get list of updatable configuration name + +Usage +```bash +$ pulsar-admin brokers list-dynamic-config +``` + +### `delete-dynamic-config` +Delete dynamic-serviceConfiguration of broker + +Usage +```bash +$ pulsar-admin brokers delete-dynamic-config options +``` + +Options + +|Flag|Description|Default| +|---|---|---| +|`--config`|Service configuration parameter name|| + + +### `get-all-dynamic-config` +Get all overridden dynamic-configuration values + +Usage +```bash +$ pulsar-admin brokers get-all-dynamic-config +``` + +### `get-internal-config` +Get internal configuration information + +Usage +```bash +$ pulsar-admin brokers get-internal-config +``` + +### `get-runtime-config` +Get runtime configuration values + +Usage +```bash +$ pulsar-admin brokers get-runtime-config +``` + +### `healthcheck` +Run a health check against the broker + +Usage +```bash +$ pulsar-admin brokers healthcheck +``` + + +## `clusters` +Operations about clusters + +Usage +```bash +$ pulsar-admin clusters subcommand +``` + +Subcommands +* `get` +* `create` +* `update` +* `delete` +* `list` +* `update-peer-clusters` +* `get-peer-clusters` +* `get-failure-domain` +* `create-failure-domain` +* `update-failure-domain` +* `delete-failure-domain` +* `list-failure-domains` + + +### `get` +Get the configuration data for the specified cluster + +Usage +```bash +$ pulsar-admin clusters get cluster-name +``` + +### `create` +Provisions a new cluster. This operation requires Pulsar super-user privileges. + +Usage +```bash +$ pulsar-admin clusters create cluster-name options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--broker-url`|The URL for the broker service.|| +|`--broker-url-secure`|The broker service URL for a secure connection|| +|`--url`|service-url|| +|`--url-secure`|service-url for secure connection|| + + +### `update` +Update the configuration for a cluster + +Usage +```bash +$ pulsar-admin clusters update cluster-name options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--broker-url`|The URL for the broker service.|| +|`--broker-url-secure`|The broker service URL for a secure connection|| +|`--url`|service-url|| +|`--url-secure`|service-url for secure connection|| + + +### `delete` +Deletes an existing cluster + +Usage +```bash +$ pulsar-admin clusters delete cluster-name +``` + +### `list` +List the existing clusters + +Usage +```bash +$ pulsar-admin clusters list +``` + +### `update-peer-clusters` +Update peer cluster names + +Usage +```bash +$ pulsar-admin clusters update-peer-clusters cluster-name options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--peer-clusters`|Comma separated peer cluster names (Pass empty string "" to delete list)|| + +### `get-peer-clusters` +Get list of peer clusters + +Usage +```bash +$ pulsar-admin clusters get-peer-clusters +``` + +### `get-failure-domain` +Get the configuration brokers of a failure domain + +Usage +```bash +$ pulsar-admin clusters get-failure-domain cluster-name options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--domain-name`|The failure domain name, which is a logical domain under a Pulsar cluster|| + +### `create-failure-domain` +Create a new failure domain for a cluster (updates it if already created) + +Usage +```bash +$ pulsar-admin clusters create-failure-domain cluster-name options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--broker-list`|Comma separated broker list|| +|`--domain-name`|The failure domain name, which is a logical domain under a Pulsar cluster|| + +### `update-failure-domain` +Update failure domain for a cluster (creates a new one if not exist) + +Usage +```bash +$ pulsar-admin clusters update-failure-domain cluster-name options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--broker-list`|Comma separated broker list|| +|`--domain-name`|The failure domain name, which is a logical domain under a Pulsar cluster|| + +### `delete-failure-domain` +Delete an existing failure domain + +Usage +```bash +$ pulsar-admin clusters delete-failure-domain cluster-name options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--domain-name`|The failure domain name, which is a logical domain under a Pulsar cluster|| + +### `list-failure-domains` +List the existing failure domains for a cluster + +Usage +```bash +$ pulsar-admin clusters list-failure-domains cluster-name +``` + + +## `functions` + +A command-line interface for Pulsar Functions + +Usage +```bash +$ pulsar-admin functions subcommand +``` + +Subcommands +* `localrun` +* `create` +* `delete` +* `update` +* `get` +* `restart` +* `stop` +* `start` +* `status` +* `stats` +* `list` +* `querystate` +* `putstate` +* `trigger` + + +### `localrun` +Run the Pulsar Function locally (rather than deploying it to the Pulsar cluster) + + +Usage +```bash +$ pulsar-admin functions localrun options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--cpu`|The cpu in cores that need to be allocated per function instance(applicable only to docker runtime)|| +|`--ram`|The ram in bytes that need to be allocated per function instance(applicable only to process/docker runtime)|| +|`--disk`|The disk in bytes that need to be allocated per function instance(applicable only to docker runtime)|| +|`--auto-ack`|Whether or not the framework will automatically acknowledge messages|| +|`--subs-name`|Pulsar source subscription name if user wants a specific subscription-name for input-topic consumer|| +|`--broker-service-url `|The URL of the Pulsar broker|| +|`--classname`|The function's class name|| +|`--custom-serde-inputs`|The map of input topics to SerDe class names (as a JSON string)|| +|`--custom-schema-inputs`|The map of input topics to Schema class names (as a JSON string)|| +|`--client-auth-params`|Client authentication param|| +|`--client-auth-plugin`|Client authentication plugin using which function-process can connect to broker|| +|`--function-config-file`|The path to a YAML config file specifying the function's configuration|| +|`--hostname-verification-enabled`|Enable hostname verification|false| +|`--instance-id-offset`|Start the instanceIds from this offset|0| +|`--inputs`|The function's input topic or topics (multiple topics can be specified as a comma-separated list)|| +|`--log-topic`|The topic to which the function's logs are produced|| +|`--jar`|Path to the jar file for the function (if the function is written in Java). It also supports url-path [http/https/file (file protocol assumes that file already exists on worker host)] from which worker can download the package.|| +|`--name`|The function's name|| +|`--namespace`|The function's namespace|| +|`--output`|The function's output topic (If none is specified, no output is written)|| +|`--output-serde-classname`|The SerDe class to be used for messages output by the function|| +|`--parallelism`|The function’s parallelism factor, i.e. the number of instances of the function to run|1| +|`--processing-guarantees`|The processing guarantees (aka delivery semantics) applied to the function. Possible Values: [ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE]|ATLEAST_ONCE| +|`--py`|Path to the main Python file/Python Wheel file for the function (if the function is written in Python)|| +|`--schema-type`|The builtin schema type or custom schema class name to be used for messages output by the function|| +|`--sliding-interval-count`|The number of messages after which the window slides|| +|`--sliding-interval-duration-ms`|The time duration after which the window slides|| +|`--state-storage-service-url`|The URL for the state storage service. By default, it it set to the service URL of the Apache BookKeeper. This service URL must be added manually when the Pulsar Function runs locally. || +|`--tenant`|The function’s tenant|| +|`--topics-pattern`|The topic pattern to consume from list of topics under a namespace that match the pattern. [--input] and [--topic-pattern] are mutually exclusive. Add SerDe class name for a pattern in --custom-serde-inputs (supported for java fun only)|| +|`--user-config`|User-defined config key/values|| +|`--window-length-count`|The number of messages per window|| +|`--window-length-duration-ms`|The time duration of the window in milliseconds|| +|`--dead-letter-topic`|The topic where all messages which could not be processed successfully are sent|| +|`--fqfn`|The Fully Qualified Function Name (FQFN) for the function|| +|`--max-message-retries`|How many times should we try to process a message before giving up|| +|`--retain-ordering`|Function consumes and processes messages in order|| +|`--timeout-ms`|The message timeout in milliseconds|| +|`--tls-allow-insecure`|Allow insecure tls connection|false| +|`--tls-trust-cert-path`|The tls trust cert file path|| +|`--use-tls`|Use tls connection|false| +|`--producer-config`| The custom producer configuration (as a JSON string) | | + + +### `create` +Create a Pulsar Function in cluster mode (i.e. deploy it on a Pulsar cluster) + +Usage +``` +$ pulsar-admin functions create options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--cpu`|The cpu in cores that need to be allocated per function instance(applicable only to docker runtime)|| +|`--ram`|The ram in bytes that need to be allocated per function instance(applicable only to process/docker runtime)|| +|`--disk`|The disk in bytes that need to be allocated per function instance(applicable only to docker runtime)|| +|`--auto-ack`|Whether or not the framework will automatically acknowledge messages|| +|`--subs-name`|Pulsar source subscription name if user wants a specific subscription-name for input-topic consumer|| +|`--classname`|The function's class name|| +|`--custom-serde-inputs`|The map of input topics to SerDe class names (as a JSON string)|| +|`--custom-schema-inputs`|The map of input topics to Schema class names (as a JSON string)|| +|`--function-config-file`|The path to a YAML config file specifying the function's configuration|| +|`--inputs`|The function's input topic or topics (multiple topics can be specified as a comma-separated list)|| +|`--log-topic`|The topic to which the function's logs are produced|| +|`--jar`|Path to the jar file for the function (if the function is written in Java). It also supports url-path [http/https/file (file protocol assumes that file already exists on worker host)] from which worker can download the package.|| +|`--name`|The function's name|| +|`--namespace`|The function’s namespace|| +|`--output`|The function's output topic (If none is specified, no output is written)|| +|`--output-serde-classname`|The SerDe class to be used for messages output by the function|| +|`--parallelism`|The function’s parallelism factor, i.e. the number of instances of the function to run|1| +|`--processing-guarantees`|The processing guarantees (aka delivery semantics) applied to the function. Possible Values: [ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE]|ATLEAST_ONCE| +|`--py`|Path to the main Python file/Python Wheel file for the function (if the function is written in Python)|| +|`--schema-type`|The builtin schema type or custom schema class name to be used for messages output by the function|| +|`--sliding-interval-count`|The number of messages after which the window slides|| +|`--sliding-interval-duration-ms`|The time duration after which the window slides|| +|`--tenant`|The function’s tenant|| +|`--topics-pattern`|The topic pattern to consume from list of topics under a namespace that match the pattern. [--input] and [--topic-pattern] are mutually exclusive. Add SerDe class name for a pattern in --custom-serde-inputs (supported for java fun only)|| +|`--user-config`|User-defined config key/values|| +|`--window-length-count`|The number of messages per window|| +|`--window-length-duration-ms`|The time duration of the window in milliseconds|| +|`--dead-letter-topic`|The topic where all messages which could not be processed|| +|`--fqfn`|The Fully Qualified Function Name (FQFN) for the function|| +|`--max-message-retries`|How many times should we try to process a message before giving up|| +|`--retain-ordering`|Function consumes and processes messages in order|| +|`--timeout-ms`|The message timeout in milliseconds|| +|`--producer-config`| The custom producer configuration (as a JSON string) | | + + +### `delete` +Delete a Pulsar Function that's running on a Pulsar cluster + +Usage +```bash +$ pulsar-admin functions delete options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--fqfn`|The Fully Qualified Function Name (FQFN) for the function|| +|`--name`|The function's name|| +|`--namespace`|The function's namespace|| +|`--tenant`|The function's tenant|| + + +### `update` +Update a Pulsar Function that's been deployed to a Pulsar cluster + +Usage +```bash +$ pulsar-admin functions update options +``` + + +Options +|Flag|Description|Default| +|---|---|---| +|`--cpu`|The cpu in cores that need to be allocated per function instance(applicable only to docker runtime)|| +|`--ram`|The ram in bytes that need to be allocated per function instance(applicable only to process/docker runtime)|| +|`--disk`|The disk in bytes that need to be allocated per function instance(applicable only to docker runtime)|| +|`--auto-ack`|Whether or not the framework will automatically acknowledge messages|| +|`--subs-name`|Pulsar source subscription name if user wants a specific subscription-name for input-topic consumer|| +|`--classname`|The function's class name|| +|`--custom-serde-inputs`|The map of input topics to SerDe class names (as a JSON string)|| +|`--custom-schema-inputs`|The map of input topics to Schema class names (as a JSON string)|| +|`--function-config-file`|The path to a YAML config file specifying the function's configuration|| +|`--inputs`|The function's input topic or topics (multiple topics can be specified as a comma-separated list)|| +|`--log-topic`|The topic to which the function's logs are produced|| +|`--jar`|Path to the jar file for the function (if the function is written in Java). It also supports url-path [http/https/file (file protocol assumes that file already exists on worker host)] from which worker can download the package.|| +|`--name`|The function's name|| +|`--namespace`|The function’s namespace|| +|`--output`|The function's output topic (If none is specified, no output is written)|| +|`--output-serde-classname`|The SerDe class to be used for messages output by the function|| +|`--parallelism`|The function’s parallelism factor, i.e. the number of instances of the function to run|1| +|`--processing-guarantees`|The processing guarantees (aka delivery semantics) applied to the function. Possible Values: [ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE]|ATLEAST_ONCE| +|`--py`|Path to the main Python file/Python Wheel file for the function (if the function is written in Python)|| +|`--schema-type`|The builtin schema type or custom schema class name to be used for messages output by the function|| +|`--sliding-interval-count`|The number of messages after which the window slides|| +|`--sliding-interval-duration-ms`|The time duration after which the window slides|| +|`--tenant`|The function’s tenant|| +|`--topics-pattern`|The topic pattern to consume from list of topics under a namespace that match the pattern. [--input] and [--topic-pattern] are mutually exclusive. Add SerDe class name for a pattern in --custom-serde-inputs (supported for java fun only)|| +|`--user-config`|User-defined config key/values|| +|`--window-length-count`|The number of messages per window|| +|`--window-length-duration-ms`|The time duration of the window in milliseconds|| +|`--dead-letter-topic`|The topic where all messages which could not be processed|| +|`--fqfn`|The Fully Qualified Function Name (FQFN) for the function|| +|`--max-message-retries`|How many times should we try to process a message before giving up|| +|`--retain-ordering`|Function consumes and processes messages in order|| +|`--timeout-ms`|The message timeout in milliseconds|| +|`--producer-config`| The custom producer configuration (as a JSON string) | | + + +### `get` +Fetch information about a Pulsar Function + +Usage +```bash +$ pulsar-admin functions get options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--fqfn`|The Fully Qualified Function Name (FQFN) for the function|| +|`--name`|The function's name|| +|`--namespace`|The function's namespace|| +|`--tenant`|The function's tenant|| + + +### `restart` +Restart function instance + +Usage +```bash +$ pulsar-admin functions restart options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--fqfn`|The Fully Qualified Function Name (FQFN) for the function|| +|`--instance-id`|The function instanceId (restart all instances if instance-id is not provided)|| +|`--name`|The function's name|| +|`--namespace`|The function's namespace|| +|`--tenant`|The function's tenant|| + + +### `stop` +Stops function instance + +Usage +```bash +$ pulsar-admin functions stop options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--fqfn`|The Fully Qualified Function Name (FQFN) for the function|| +|`--instance-id`|The function instanceId (stop all instances if instance-id is not provided)|| +|`--name`|The function's name|| +|`--namespace`|The function's namespace|| +|`--tenant`|The function's tenant|| + + +### `start` +Starts a stopped function instance + +Usage +```bash +$ pulsar-admin functions start options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--fqfn`|The Fully Qualified Function Name (FQFN) for the function|| +|`--instance-id`|The function instanceId (start all instances if instance-id is not provided)|| +|`--name`|The function's name|| +|`--namespace`|The function's namespace|| +|`--tenant`|The function's tenant|| + + +### `status` +Check the current status of a Pulsar Function + +Usage +```bash +$ pulsar-admin functions status options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--fqfn`|The Fully Qualified Function Name (FQFN) for the function|| +|`--instance-id`|The function instanceId (Get-status of all instances if instance-id is not provided)|| +|`--name`|The function's name|| +|`--namespace`|The function's namespace|| +|`--tenant`|The function's tenant|| + + +### `stats` +Get the current stats of a Pulsar Function + +Usage +```bash +$ pulsar-admin functions stats options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--fqfn`|The Fully Qualified Function Name (FQFN) for the function|| +|`--instance-id`|The function instanceId (Get-stats of all instances if instance-id is not provided)|| +|`--name`|The function's name|| +|`--namespace`|The function's namespace|| +|`--tenant`|The function's tenant|| + +### `list` +List all of the Pulsar Functions running under a specific tenant and namespace + +Usage +```bash +$ pulsar-admin functions list options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--namespace`|The function's namespace|| +|`--tenant`|The function's tenant|| + + +### `querystate` +Fetch the current state associated with a Pulsar Function running in cluster mode + +Usage +```bash +$ pulsar-admin functions querystate options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--fqfn`|The Fully Qualified Function Name (FQFN) for the function|| +|`-k`, `--key`|The key for the state you want to fetch|| +|`--name`|The function's name|| +|`--namespace`|The function's namespace|| +|`--tenant`|The function's tenant|| +|`-w`, `--watch`|Watch for changes in the value associated with a key for a Pulsar Function|false| + +### `putstate` +Put a key/value pair to the state associated with a Pulsar Function + +Usage +```bash +$ pulsar-admin functions putstate options +``` + +Options + +|Flag|Description|Default| +|---|---|---| +|`--fqfn`|The Fully Qualified Function Name (FQFN) for the Pulsar Function|| +|`--name`|The name of a Pulsar Function|| +|`--namespace`|The namespace of a Pulsar Function|| +|`--tenant`|The tenant of a Pulsar Function|| +|`-s`, `--state`|The FunctionState that needs to be put|| + +### `trigger` +Triggers the specified Pulsar Function with a supplied value + +Usage +```bash +$ pulsar-admin functions trigger options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--fqfn`|The Fully Qualified Function Name (FQFN) for the function|| +|`--name`|The function's name|| +|`--namespace`|The function's namespace|| +|`--tenant`|The function's tenant|| +|`--topic`|The specific topic name that the function consumes from that you want to inject the data to|| +|`--trigger-file`|The path to the file that contains the data with which you'd like to trigger the function|| +|`--trigger-value`|The value with which you want to trigger the function|| + + +## `functions-worker` +Operations to collect function-worker statistics + +```bash +$ pulsar-admin functions-worker subcommand +``` + +Subcommands + +* `function-stats` +* `get-cluster` +* `get-cluster-leader` +* `get-function-assignments` +* `monitoring-metrics` + +### `function-stats` + +Dump all functions stats running on this broker + +Usage +```bash +$ pulsar-admin functions-worker function-stats +``` + +### `get-cluster` + +Get all workers belonging to this cluster + +Usage +```bash +$ pulsar-admin functions-worker get-cluster +``` + +### `get-cluster-leader` + +Get the leader of the worker cluster + +Usage +```bash +$ pulsar-admin functions-worker get-cluster-leader +``` + +### `get-function-assignments` + +Get the assignments of the functions across the worker cluster + +Usage +```bash +$ pulsar-admin functions-worker get-function-assignments +``` + +### `monitoring-metrics` + +Dump metrics for Monitoring + +Usage +```bash +$ pulsar-admin functions-worker monitoring-metrics +``` + +## `namespaces` + +Operations for managing namespaces + + +```bash +$ pulsar-admin namespaces subcommand +``` + +Subcommands +* `list` +* `topics` +* `policies` +* `create` +* `delete` +* `set-deduplication` +* `set-auto-topic-creation` +* `remove-auto-topic-creation` +* `set-auto-subscription-creation` +* `remove-auto-subscription-creation` +* `permissions` +* `grant-permission` +* `revoke-permission` +* `grant-subscription-permission` +* `revoke-subscription-permission` +* `set-clusters` +* `get-clusters` +* `get-backlog-quotas` +* `set-backlog-quota` +* `remove-backlog-quota` +* `get-persistence` +* `set-persistence` +* `get-message-ttl` +* `set-message-ttl` +* `get-anti-affinity-group` +* `set-anti-affinity-group` +* `get-anti-affinity-namespaces` +* `delete-anti-affinity-group` +* `get-retention` +* `set-retention` +* `unload` +* `split-bundle` +* `set-dispatch-rate` +* `get-dispatch-rate` +* `set-replicator-dispatch-rate` +* `get-replicator-dispatch-rate` +* `set-subscribe-rate` +* `get-subscribe-rate` +* `set-subscription-dispatch-rate` +* `get-subscription-dispatch-rate` +* `clear-backlog` +* `unsubscribe` +* `set-encryption-required` +* `set-delayed-delivery` +* `get-delayed-delivery` +* `set-subscription-auth-mode` +* `get-max-producers-per-topic` +* `set-max-producers-per-topic` +* `get-max-consumers-per-topic` +* `set-max-consumers-per-topic` +* `get-max-consumers-per-subscription` +* `set-max-consumers-per-subscription` +* `get-max-unacked-messages-per-subscription` +* `set-max-unacked-messages-per-subscription` +* `get-max-unacked-messages-per-consumer` +* `set-max-unacked-messages-per-consumer` +* `get-compaction-threshold` +* `set-compaction-threshold` +* `get-offload-threshold` +* `set-offload-threshold` +* `get-offload-deletion-lag` +* `set-offload-deletion-lag` +* `clear-offload-deletion-lag` +* `get-schema-autoupdate-strategy` +* `set-schema-autoupdate-strategy` +* `set-offload-policies` +* `get-offload-policies` + + +### `list` +Get the namespaces for a tenant + +Usage +```bash +$ pulsar-admin namespaces list tenant-name +``` + +### `topics` +Get the list of topics for a namespace + +Usage +```bash +$ pulsar-admin namespaces topics tenant/namespace +``` + +### `policies` +Get the configuration policies of a namespace + +Usage +```bash +$ pulsar-admin namespaces policies tenant/namespace +``` + +### `create` +Create a new namespace + +Usage +```bash +$ pulsar-admin namespaces create tenant/namespace options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-b`, `--bundles`|The number of bundles to activate|0| +|`-c`, `--clusters`|List of clusters this namespace will be assigned|| + + +### `delete` +Deletes a namespace. The namespace needs to be empty + +Usage +```bash +$ pulsar-admin namespaces delete tenant/namespace +``` + +### `set-deduplication` +Enable or disable message deduplication on a namespace + +Usage +```bash +$ pulsar-admin namespaces set-deduplication tenant/namespace options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--enable`, `-e`|Enable message deduplication on the specified namespace|false| +|`--disable`, `-d`|Disable message deduplication on the specified namespace|false| + +### `set-auto-topic-creation` +Enable or disable autoTopicCreation for a namespace, overriding broker settings + +Usage +```bash +$ pulsar-admin namespaces set-auto-topic-creation tenant/namespace options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--enable`, `-e`|Enable allowAutoTopicCreation on namespace|false| +|`--disable`, `-d`|Disable allowAutoTopicCreation on namespace|false| +|`--type`, `-t`|Type of topic to be auto-created. Possible values: (partitioned, non-partitioned)|non-partitioned| +|`--num-partitions`, `-n`|Default number of partitions of topic to be auto-created, applicable to partitioned topics only|| + +### `remove-auto-topic-creation` +Remove override of autoTopicCreation for a namespace + +Usage +```bash +$ pulsar-admin namespaces remove-auto-topic-creation tenant/namespace +``` + +### `set-auto-subscription-creation` +Enable autoSubscriptionCreation for a namespace, overriding broker settings + +Usage +```bash +$ pulsar-admin namespaces set-auto-subscription-creation tenant/namespace options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--enable`, `-e`|Enable allowAutoSubscriptionCreation on namespace|false| + +### `remove-auto-subscription-creation` +Remove override of autoSubscriptionCreation for a namespace + +Usage +```bash +$ pulsar-admin namespaces remove-auto-subscription-creation tenant/namespace +``` + +### `permissions` +Get the permissions on a namespace + +Usage +```bash +$ pulsar-admin namespaces permissions tenant/namespace +``` + +### `grant-permission` +Grant permissions on a namespace + +Usage +```bash +$ pulsar-admin namespaces grant-permission tenant/namespace options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--actions`|Actions to be granted (`produce` or `consume`)|| +|`--role`|The client role to which to grant the permissions|| + + +### `revoke-permission` +Revoke permissions on a namespace + +Usage +```bash +$ pulsar-admin namespaces revoke-permission tenant/namespace options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--role`|The client role to which to revoke the permissions|| + +### `grant-subscription-permission` +Grant permissions to access subscription admin-api + +Usage +```bash +$ pulsar-admin namespaces grant-subscription-permission tenant/namespace options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--roles`|The client roles to which to grant the permissions (comma separated roles)|| +|`--subscription`|The subscription name for which permission will be granted to roles|| + +### `revoke-subscription-permission` +Revoke permissions to access subscription admin-api + +Usage +```bash +$ pulsar-admin namespaces revoke-subscription-permission tenant/namespace options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--role`|The client role to which to revoke the permissions|| +|`--subscription`|The subscription name for which permission will be revoked to roles|| + +### `set-clusters` +Set replication clusters for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-clusters tenant/namespace options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-c`, `--clusters`|Replication clusters ID list (comma-separated values)|| + + +### `get-clusters` +Get replication clusters for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-clusters tenant/namespace +``` + +### `get-backlog-quotas` +Get the backlog quota policies for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-backlog-quotas tenant/namespace +``` + +### `set-backlog-quota` +Set a backlog quota policy for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-backlog-quota tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-l`, `--limit`|The backlog size limit (for example `10M` or `16G`)|| +|`-p`, `--policy`|The retention policy to enforce when the limit is reached. The valid options are: `producer_request_hold`, `producer_exception` or `consumer_backlog_eviction`| + +Example +```bash +$ pulsar-admin namespaces set-backlog-quota my-tenant/my-ns \ +--limit 2G \ +--policy producer_request_hold +``` + +### `remove-backlog-quota` +Remove a backlog quota policy from a namespace + +Usage +```bash +$ pulsar-admin namespaces remove-backlog-quota tenant/namespace +``` + +### `get-persistence` +Get the persistence policies for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-persistence tenant/namespace +``` + +### `set-persistence` +Set the persistence policies for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-persistence tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-a`, `--bookkeeper-ack-quorum`|The number of acks (guaranteed copies) to wait for each entry|0| +|`-e`, `--bookkeeper-ensemble`|The number of bookies to use for a topic|0| +|`-w`, `--bookkeeper-write-quorum`|How many writes to make of each entry|0| +|`-r`, `--ml-mark-delete-max-rate`|Throttling rate of mark-delete operation (0 means no throttle)|| + + +### `get-message-ttl` +Get the message TTL for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-message-ttl tenant/namespace +``` + +### `set-message-ttl` +Set the message TTL for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-message-ttl tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-ttl`, `--messageTTL`|Message TTL in seconds|0| + +### `get-anti-affinity-group` +Get Anti-affinity group name for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-anti-affinity-group tenant/namespace +``` + +### `set-anti-affinity-group` +Set Anti-affinity group name for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-anti-affinity-group tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-g`, `--group`|Anti-affinity group name|| + +### `get-anti-affinity-namespaces` +Get Anti-affinity namespaces grouped with the given anti-affinity group name + +Usage +```bash +$ pulsar-admin namespaces get-anti-affinity-namespaces options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-c`, `--cluster`|Cluster name|| +|`-g`, `--group`|Anti-affinity group name|| +|`-p`, `--tenant`|Tenant is only used for authorization. Client has to be admin of any of the tenant to access this api|| + +### `delete-anti-affinity-group` +Remove Anti-affinity group name for a namespace + +Usage +```bash +$ pulsar-admin namespaces delete-anti-affinity-group tenant/namespace +``` + +### `get-retention` +Get the retention policy that is applied to each topic within the specified namespace + +Usage +```bash +$ pulsar-admin namespaces get-retention tenant/namespace +``` + +### `set-retention` +Set the retention policy for each topic within the specified namespace + +Usage +```bash +$ pulsar-admin namespaces set-retention tenant/namespace +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-s`, `--size`|The retention size limits (for example 10M, 16G or 3T) for each topic in the namespace. 0 means no retention and -1 means infinite size retention|| +|`-t`, `--time`|The retention time in minutes, hours, days, or weeks. Examples: 100m, 13h, 2d, 5w. 0 means no retention and -1 means infinite time retention|| + + +### `unload` +Unload a namespace or namespace bundle from the current serving broker. + +Usage +```bash +$ pulsar-admin namespaces unload tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-b`, `--bundle`|{start-boundary}_{end-boundary} (e.g. 0x00000000_0xffffffff)|| + +### `split-bundle` +Split a namespace-bundle from the current serving broker + +Usage +```bash +$ pulsar-admin namespaces split-bundle tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-b`, `--bundle`|{start-boundary}_{end-boundary} (e.g. 0x00000000_0xffffffff)|| +|`-u`, `--unload`|Unload newly split bundles after splitting old bundle|false| + +### `set-dispatch-rate` +Set message-dispatch-rate for all topics of the namespace + +Usage +```bash +$ pulsar-admin namespaces set-dispatch-rate tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-bd`, `--byte-dispatch-rate`|The byte dispatch rate (default -1 will be overwrite if not passed)|-1| +|`-dt`, `--dispatch-rate-period`|The dispatch rate period in second type (default 1 second will be overwrite if not passed)|1| +|`-md`, `--msg-dispatch-rate`|The message dispatch rate (default -1 will be overwrite if not passed)|-1| + +### `get-dispatch-rate` +Get configured message-dispatch-rate for all topics of the namespace (Disabled if value < 0) + +Usage +```bash +$ pulsar-admin namespaces get-dispatch-rate tenant/namespace +``` + +### `set-replicator-dispatch-rate` +Set replicator message-dispatch-rate for all topics of the namespace + +Usage +```bash +$ pulsar-admin namespaces set-replicator-dispatch-rate tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-bd`, `--byte-dispatch-rate`|The byte dispatch rate (default -1 will be overwrite if not passed)|-1| +|`-dt`, `--dispatch-rate-period`|The dispatch rate period in second type (default 1 second will be overwrite if not passed)|1| +|`-md`, `--msg-dispatch-rate`|The message dispatch rate (default -1 will be overwrite if not passed)|-1| + +### `get-replicator-dispatch-rate` +Get replicator configured message-dispatch-rate for all topics of the namespace (Disabled if value < 0) + +Usage +```bash +$ pulsar-admin namespaces get-replicator-dispatch-rate tenant/namespace +``` + +### `set-subscribe-rate` +Set subscribe-rate per consumer for all topics of the namespace + +Usage +```bash +$ pulsar-admin namespaces set-subscribe-rate tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-sr`, `--subscribe-rate`|The subscribe rate (default -1 will be overwrite if not passed)|-1| +|`-st`, `--subscribe-rate-period`|The subscribe rate period in second type (default 30 second will be overwrite if not passed)|30| + +### `get-subscribe-rate` +Get configured subscribe-rate per consumer for all topics of the namespace + +Usage +```bash +$ pulsar-admin namespaces get-subscribe-rate tenant/namespace +``` + +### `set-subscription-dispatch-rate` +Set subscription message-dispatch-rate for all subscription of the namespace + +Usage +```bash +$ pulsar-admin namespaces set-subscription-dispatch-rate tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-bd`, `--byte-dispatch-rate`|The byte dispatch rate (default -1 will be overwrite if not passed)|-1| +|`-dt`, `--dispatch-rate-period`|The dispatch rate period in second type (default 1 second will be overwrite if not passed)|1| +|`-md`, `--sub-msg-dispatch-rate`|The message dispatch rate (default -1 will be overwrite if not passed)|-1| + +### `get-subscription-dispatch-rate` +Get subscription configured message-dispatch-rate for all topics of the namespace (Disabled if value < 0) + +Usage +```bash +$ pulsar-admin namespaces get-subscription-dispatch-rate tenant/namespace +``` + +### `clear-backlog` +Clear the backlog for a namespace + +Usage +```bash +$ pulsar-admin namespaces clear-backlog tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-b`, `--bundle`|{start-boundary}_{end-boundary} (e.g. 0x00000000_0xffffffff)|| +|`-force`, `--force`|Whether to force a clear backlog without prompt|false| +|`-s`, `--sub`|The subscription name|| + + +### `unsubscribe` +Unsubscribe the given subscription on all destinations on a namespace + +Usage +```bash +$ pulsar-admin namespaces unsubscribe tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-b`, `--bundle`|{start-boundary}_{end-boundary} (e.g. 0x00000000_0xffffffff)|| +|`-s`, `--sub`|The subscription name|| + +### `set-encryption-required` +Enable or disable message encryption required for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-encryption-required tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-d`, `--disable`|Disable message encryption required|false| +|`-e`, `--enable`|Enable message encryption required|false| + +### `set-delayed-delivery` +Set the delayed delivery policy on a namespace + +Usage +```bash +$ pulsar-admin namespaces set-delayed-delivery tenant/namespace options +``` + +Options + +|Flag|Description|Default| +|----|---|---| +|`-d`, `--disable`|Disable delayed delivery messages|false| +|`-e`, `--enable`|Enable delayed delivery messages|false| +|`-t`, `--time`|The tick time for when retrying on delayed delivery messages|1s| + + +### `get-delayed-delivery` +Get the delayed delivery policy on a namespace + +Usage +```bash +$ pulsar-admin namespaces get-delayed-delivery-time tenant/namespace +``` + +Options + +|Flag|Description|Default| +|----|---|---| +|`-t`, `--time`|The tick time for when retrying on delayed delivery messages|1s| + + +### `set-subscription-auth-mode` +Set subscription auth mode on a namespace + +Usage +```bash +$ pulsar-admin namespaces set-subscription-auth-mode tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-m`, `--subscription-auth-mode`|Subscription authorization mode for Pulsar policies. Valid options are: [None, Prefix]|| + +### `get-max-producers-per-topic` +Get maxProducersPerTopic for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-max-producers-per-topic tenant/namespace +``` + +### `set-max-producers-per-topic` +Set maxProducersPerTopic for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-max-producers-per-topic tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-p`, `--max-producers-per-topic`|maxProducersPerTopic for a namespace|0| + +### `get-max-consumers-per-topic` +Get maxConsumersPerTopic for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-max-consumers-per-topic tenant/namespace +``` + +### `set-max-consumers-per-topic` +Set maxConsumersPerTopic for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-max-consumers-per-topic tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-c`, `--max-consumers-per-topic`|maxConsumersPerTopic for a namespace|0| + +### `get-max-consumers-per-subscription` +Get maxConsumersPerSubscription for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-max-consumers-per-subscription tenant/namespace +``` + +### `set-max-consumers-per-subscription` +Set maxConsumersPerSubscription for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-max-consumers-per-subscription tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-c`, `--max-consumers-per-subscription`|maxConsumersPerSubscription for a namespace|0| + +### `get-max-unacked-messages-per-subscription` +Get maxUnackedMessagesPerSubscription for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-max-unacked-messages-per-subscription tenant/namespace +``` + +### `set-max-unacked-messages-per-subscription` +Set maxUnackedMessagesPerSubscription for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-max-unacked-messages-per-subscription tenant/namespace options +``` + +Options + +|Flag|Description|Default| +|----|---|---| +|`-c`, `--max-unacked-messages-per-subscription`|maxUnackedMessagesPerSubscription for a namespace|-1| + +### `get-max-unacked-messages-per-consumer` +Get maxUnackedMessagesPerConsumer for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-max-unacked-messages-per-consumer tenant/namespace +``` + +### `set-max-unacked-messages-per-consumer` +Set maxUnackedMessagesPerConsumer for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-max-unacked-messages-per-consumer tenant/namespace options +``` + +Options + +|Flag|Description|Default| +|----|---|---| +|`-c`, `--max-unacked-messages-per-consumer`|maxUnackedMessagesPerConsumer for a namespace|-1| + + +### `get-compaction-threshold` +Get compactionThreshold for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-compaction-threshold tenant/namespace +``` + +### `set-compaction-threshold` +Set compactionThreshold for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-compaction-threshold tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-t`, `--threshold`|Maximum number of bytes in a topic backlog before compaction is triggered (eg: 10M, 16G, 3T). 0 disables automatic compaction|0| + + +### `get-offload-threshold` +Get offloadThreshold for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-offload-threshold tenant/namespace +``` + +### `set-offload-threshold` +Set offloadThreshold for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-offload-threshold tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-s`, `--size`|Maximum number of bytes stored in the pulsar cluster for a topic before data will start being automatically offloaded to longterm storage (eg: 10M, 16G, 3T, 100). Negative values disable automatic offload. 0 triggers offloading as soon as possible.|-1| + +### `get-offload-deletion-lag` +Get offloadDeletionLag, in minutes, for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-offload-deletion-lag tenant/namespace +``` + +### `set-offload-deletion-lag` +Set offloadDeletionLag for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-offload-deletion-lag tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-l`, `--lag`|Duration to wait after offloading a ledger segment, before deleting the copy of that segment from cluster local storage. (eg: 10m, 5h, 3d, 2w).|-1| + +### `clear-offload-deletion-lag` +Clear offloadDeletionLag for a namespace + +Usage +```bash +$ pulsar-admin namespaces clear-offload-deletion-lag tenant/namespace +``` + +### `get-schema-autoupdate-strategy` +Get the schema auto-update strategy for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-schema-autoupdate-strategy tenant/namespace +``` + +### `set-schema-autoupdate-strategy` +Set the schema auto-update strategy for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-schema-autoupdate-strategy tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-c`, `--compatibility`|Compatibility level required for new schemas created via a Producer. Possible values (Full, Backward, Forward, None).|Full| +|`-d`, `--disabled`|Disable automatic schema updates.|false| + +### `get-publish-rate` +Get the message publish rate for each topic in a namespace, in bytes as well as messages per second + +Usage +```bash +$ pulsar-admin namespaces get-publish-rate tenant/namespace +``` + +### `set-publish-rate` +Set the message publish rate for each topic in a namespace + +Usage +```bash +$ pulsar-admin namespaces set-publish-rate tenant/namespace options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-m`, `--msg-publish-rate`|Threshold for number of messages per second per topic in the namespace (-1 implies not set, 0 for no limit).|-1| +|`-b`, `--byte-publish-rate`|Threshold for number of bytes per second per topic in the namespace (-1 implies not set, 0 for no limit).|-1| + +## `ns-isolation-policy` +Operations for managing namespace isolation policies. + +Usage +```bash +$ pulsar-admin ns-isolation-policy subcommand +``` + +Subcommands +* `set` +* `get` +* `list` +* `delete` +* `brokers` +* `broker` + +### `set` +Create/update a namespace isolation policy for a cluster. This operation requires Pulsar superuser privileges. + +Usage +```bash +$ pulsar-admin ns-isolation-policy set cluster-name policy-name options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`--auto-failover-policy-params`|Comma-separated name=value auto failover policy parameters|[]| +|`--auto-failover-policy-type`|Auto failover policy type name. Currently available options: min_available.|[]| +|`--namespaces`|Comma-separated namespaces regex list|[]| +|`--primary`|Comma-separated primary broker regex list|[]| +|`--secondary`|Comma-separated secondary broker regex list|[]| + + +### `get` +Get the namespace isolation policy of a cluster. This operation requires Pulsar superuser privileges. + +Usage +```bash +$ pulsar-admin ns-isolation-policy get cluster-name policy-name +``` + +### `list` +List all namespace isolation policies of a cluster. This operation requires Pulsar superuser privileges. + +Usage +```bash +$ pulsar-admin ns-isolation-policy list cluster-name +``` + +### `delete` +Delete namespace isolation policy of a cluster. This operation requires superuser privileges. + +Usage +```bash +$ pulsar-admin ns-isolation-policy delete +``` + +### `brokers` +List all brokers with namespace-isolation policies attached to it. This operation requires Pulsar super-user privileges. + +Usage +```bash +$ pulsar-admin ns-isolation-policy brokers cluster-name +``` + +### `broker` +Get broker with namespace-isolation policies attached to it. This operation requires Pulsar super-user privileges. + +Usage +```bash +$ pulsar-admin ns-isolation-policy broker cluster-name options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`--broker`|Broker name to get namespace-isolation policies attached to it|| + +## `topics` +Operations for managing Pulsar topics (both persistent and non persistent) + +Usage +```bash +$ pulsar-admin topics subcommand +``` + +Subcommands +* `compact` +* `compaction-status` +* `offload` +* `offload-status` +* `create-partitioned-topic` +* `create-missed-partitions` +* `delete-partitioned-topic` +* `create` +* `get-partitioned-topic-metadata` +* `update-partitioned-topic` +* `list-partitioned-topics` +* `list` +* `terminate` +* `permissions` +* `grant-permission` +* `revoke-permission` +* `lookup` +* `bundle-range` +* `delete` +* `unload` +* `create-subscription` +* `subscriptions` +* `unsubscribe` +* `stats` +* `stats-internal` +* `info-internal` +* `partitioned-stats` +* `partitioned-stats-internal` +* `skip` +* `clear-backlog` +* `expire-messages` +* `expire-messages-all-subscriptions` +* `peek-messages` +* `reset-cursor` +* `get-message-by-id` +* `last-message-id` +* `get-backlog-quotas` +* `set-backlog-quota` +* `remove-backlog-quota` +* `get-persistence` +* `set-persistence` +* `remove-persistence` +* `get-message-ttl` +* `set-message-ttl` +* `remove-message-ttl` +* `get-deduplication` +* `set-deduplication` +* `remove-deduplication` + +### `compact` +Run compaction on the specified topic (persistent topics only) + +Usage +``` +$ pulsar-admin topics compact persistent://tenant/namespace/topic +``` + +### `compaction-status` +Check the status of a topic compaction (persistent topics only) + +Usage +```bash +$ pulsar-admin topics compaction-status persistent://tenant/namespace/topic +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-w`, `--wait-complete`|Wait for compaction to complete|false| + + +### `offload` +Trigger offload of data from a topic to long-term storage (e.g. Amazon S3) + +Usage +```bash +$ pulsar-admin topics offload persistent://tenant/namespace/topic options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-s`, `--size-threshold`|The maximum amount of data to keep in BookKeeper for the specific topic|| + + +### `offload-status` +Check the status of data offloading from a topic to long-term storage + +Usage +```bash +$ pulsar-admin topics offload-status persistent://tenant/namespace/topic op +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-w`, `--wait-complete`|Wait for compaction to complete|false| + + +### `create-partitioned-topic` +Create a partitioned topic. A partitioned topic must be created before producers can publish to it. + +> #### Note +> +> By default, after 60 seconds of creation, topics are considered inactive and deleted automatically to prevent from generating trash data. +> +> To disable this feature, set `brokerDeleteInactiveTopicsEnabled` to `false`. +> +> To change the frequency of checking inactive topics, set `brokerDeleteInactiveTopicsFrequencySeconds` to your desired value. +> +> For more information about these two parameters, see [here](reference-configuration.md#broker). + +Usage +```bash +$ pulsar-admin topics create-partitioned-topic {persistent|non-persistent}://tenant/namespace/topic options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-p`, `--partitions`|The number of partitions for the topic|0| + +### `create-missed-partitions` +Try to create partitions for partitioned topic. The partitions of partition topic has to be created, +can be used by repair partitions when topic auto creation is disabled + +Usage +```bash +$ pulsar-admin topics create-missed-partitions persistent://tenant/namespace/topic +``` + +### `delete-partitioned-topic` +Delete a partitioned topic. This will also delete all the partitions of the topic if they exist. + +Usage +```bash +$ pulsar-admin topics delete-partitioned-topic {persistent|non-persistent} +``` + +### `create` +Creates a non-partitioned topic. A non-partitioned topic must explicitly be created by the user if allowAutoTopicCreation or createIfMissing is disabled. + +> #### Note +> +> By default, after 60 seconds of creation, topics are considered inactive and deleted automatically to prevent from generating trash data. +> +> To disable this feature, set `brokerDeleteInactiveTopicsEnabled` to `false`. +> +> To change the frequency of checking inactive topics, set `brokerDeleteInactiveTopicsFrequencySeconds` to your desired value. +> +> For more information about these two parameters, see [here](reference-configuration.md#broker). + +Usage +```bash +$ pulsar-admin topics create {persistent|non-persistent}://tenant/namespace/topic +``` + +### `get-partitioned-topic-metadata` +Get the partitioned topic metadata. If the topic is not created or is a non-partitioned topic, this will return an empty topic with zero partitions. + +Usage +```bash +$ pulsar-admin topics get-partitioned-topic-metadata {persistent|non-persistent}://tenant/namespace/topic +``` + +### `update-partitioned-topic` +Update existing non-global partitioned topic. New updating number of partitions must be greater than existing number of partitions. + +Usage +```bash +$ pulsar-admin topics update-partitioned-topic {persistent|non-persistent}://tenant/namespace/topic options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-p`, `--partitions`|The number of partitions for the topic|0| + +### `list-partitioned-topics` +Get the list of partitioned topics under a namespace. + +Usage +```bash +$ pulsar-admin topics list-partitioned-topics tenant/namespace +``` + +### `list` +Get the list of topics under a namespace + +Usage +``` +$ pulsar-admin topics list tenant/cluster/namespace +``` + +### `terminate` +Terminate a topic (disallow further messages from being published on the topic) + +Usage +```bash +$ pulsar-admin topics terminate {persistent|non-persistent}://tenant/namespace/topic +``` + +### `permissions` +Get the permissions on a topic. Retrieve the effective permissions for a desination. These permissions are defined by the permissions set at the namespace level combined (union) with any eventual specific permissions set on the topic. + +Usage +```bash +$ pulsar-admin topics permissions topic +``` + +### `grant-permission` +Grant a new permission to a client role on a single topic + +Usage +```bash +$ pulsar-admin topics grant-permission {persistent|non-persistent}://tenant/namespace/topic options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--actions`|Actions to be granted (`produce` or `consume`)|| +|`--role`|The client role to which to grant the permissions|| + + +### `revoke-permission` +Revoke permissions to a client role on a single topic. If the permission was not set at the topic level, but rather at the namespace level, this operation will return an error (HTTP status code 412). + +Usage +```bash +$ pulsar-admin topics revoke-permission topic +``` + +### `lookup` +Look up a topic from the current serving broker + +Usage +```bash +$ pulsar-admin topics lookup topic +``` + +### `bundle-range` +Get the namespace bundle which contains the given topic + +Usage +```bash +$ pulsar-admin topics bundle-range topic +``` + +### `delete` +Delete a topic. The topic cannot be deleted if there are any active subscriptions or producers connected to the topic. + +Usage +```bash +$ pulsar-admin topics delete topic +``` + +### `unload` +Unload a topic + +Usage +```bash +$ pulsar-admin topics unload topic +``` + +### `create-subscription` +Create a new subscription on a topic. + +Usage +```bash +$ pulsar-admin topics create-subscription [options] persistent://tenant/namespace/topic +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-m`, `--messageId`|messageId where to create the subscription. It can be either 'latest', 'earliest' or (ledgerId:entryId)|latest| +|`-s`, `--subscription`|Subscription to reset position on|| + +### `subscriptions` +Get the list of subscriptions on the topic + +Usage +```bash +$ pulsar-admin topics subscriptions topic +``` + +### `unsubscribe` +Delete a durable subscriber from a topic + +Usage +```bash +$ pulsar-admin topics unsubscribe topic options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-s`, `--subscription`|The subscription to delete|| +|`-f`, `--force`|Disconnect and close all consumers and delete subscription forcefully|false| + + +### `stats` +Get the stats for the topic and its connected producers and consumers. All rates are computed over a 1-minute window and are relative to the last completed 1-minute period. + +Usage +```bash +$ pulsar-admin topics stats topic +``` + +> Note +> The unit of `storageSize` and `averageMsgSize` is Byte. + +### `stats-internal` +Get the internal stats for the topic + +Usage +```bash +$ pulsar-admin topics stats-internal topic +``` + +### `info-internal` +Get the internal metadata info for the topic + +Usage +```bash +$ pulsar-admin topics info-internal topic +``` + +### `partitioned-stats` +Get the stats for the partitioned topic and its connected producers and consumers. All rates are computed over a 1-minute window and are relative to the last completed 1-minute period. + +Usage +```bash +$ pulsar-admin topics partitioned-stats topic options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`--per-partition`|Get per-partition stats|false| + +### `partitioned-stats-internal` +Get the internal stats for the partitioned topic and its connected producers and consumers. All the rates are computed over a 1 minute window and are relative the last completed 1 minute period. + +Usage +```bash +$ pulsar-admin topics partitioned-stats-internal topic +``` + + +### `skip` +Skip some messages for the subscription + +Usage +```bash +$ pulsar-admin topics skip topic options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-n`, `--count`|The number of messages to skip|0| +|`-s`, `--subscription`|The subscription on which to skip messages|| + + +### `clear-backlog` +Clear backlog (skip all the messages) for the subscription + +Usage +```bash +$ pulsar-admin topics clear-backlog topic options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-s`, `--subscription`|The subscription to clear|| + + +### `expire-messages` +Expire messages that are older than the given expiry time (in seconds) for the subscription. + +Usage +```bash +$ pulsar-admin topics expire-messages topic options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-t`, `--expireTime`|Expire messages older than the time (in seconds)|0| +|`-s`, `--subscription`|The subscription to skip messages on|| + + +### `expire-messages-all-subscriptions` +Expire messages older than the given expiry time (in seconds) for all subscriptions + +Usage +```bash +$ pulsar-admin topics expire-messages-all-subscriptions topic options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-t`, `--expireTime`|Expire messages older than the time (in seconds)|0| + + +### `peek-messages` +Peek some messages for the subscription. + +Usage +```bash +$ pulsar-admin topics peek-messages topic options +``` + +Options +|Flag|Description|Default| +|---|---|---| +|`-n`, `--count`|The number of messages|0| +|`-s`, `--subscription`|Subscription to get messages from|| + + +### `reset-cursor` +Reset position for subscription to a position that is closest to timestamp or messageId. + +Usage +```bash +$ pulsar-admin topics reset-cursor topic options +``` + +Options + +|Flag|Description|Default| +|---|---|---| +|`-s`, `--subscription`|Subscription to reset position on|| +|`-t`, `--time`|The time in minutes to reset back to (or minutes, hours, days, weeks, etc.). Examples: `100m`, `3h`, `2d`, `5w`.|| +|`-m`, `--messageId`| The messageId to reset back to (ledgerId:entryId). || + +### `last-message-id` +Get the last commit message id of topic. + +Usage +```bash +$ pulsar-admin topics last-message-id persistent://tenant/namespace/topic +``` + +### `get-message-by-id` +Get message by ledger id and entry id + +Usage +```bash +$ pulsar-admin topics get-message-by-id topic options +``` + +Options + +|Flag|Description|Default| +|---|---|---| +|`-l`, `--ledgerId`|The ledger id |0| +|`-e`, `--entryId`|The entry id |0| + +### `get-backlog-quotas` +Get the backlog quota policies for a topic. + +Usage +```bash +$ pulsar-admin topics get-backlog-quotas tenant/namespace/topic +``` + +### `set-backlog-quota` +Set a backlog quota policy for a topic. + +Usage +```bash +$ pulsar-admin topics set-backlog-quota tenant/namespace/topic options +======= +### `get-deduplication` +Get a deduplication policy for a topic. + +Usage +```bash +$ pulsar-admin topics get-deduplication tenant/namespace/topic +``` + +### `set-deduplication` +Enable or disable message deduplication on a topic. + +Usage +```bash +$ pulsar-admin topics set-deduplication tenant/namespace/topic +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-l`, `--limit`|Size limit (eg: 10M, 16G)", required = true)|| +|`-p`, `--policy`|Retention policy to enforce when the limit is reached. The valid options are: `producer_request_hold`, `producer_exception` or `consumer_backlog_eviction`| + +### `remove-backlog-quota` +Remove a backlog quota policy from a topic. + +Usage +```bash +$ pulsar-admin topics remove-backlog-quota tenant/namespace/topic +``` + +### `get-persistence` +Get the persistence policies for a topic. + +Usage +```bash +$ pulsar-admin topics get-persistence tenant/namespace/topic +``` + +### `set-persistence` +Set the persistence policies for a topic. + +Usage +```bash +$ pulsar-admin topics set-persistence tenant/namespace/topic options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-e`, `--bookkeeper-ensemble`|Number of bookies to use for a topic|0| +|`-w`, `--bookkeeper-write-quorum`|How many writes to make of each entry|0| +|`-a`, `--bookkeeper-ack-quorum`|Number of acks (garanteed copies) to wait for each entry|0| +|`-r`, `--ml-mark-delete-max-rate`|Throttling rate of mark-delete operation (0 means no throttle)|| + +### `remove-persistence` +Remove the persistence policy for a topic. + +Usage +```bash +$ pulsar-admin topics remove-persistence tenant/namespace/topic +``` +### `get-message-ttl` +Get the message TTL for a topic. + +Usage +```bash +$ pulsar-admin topics get-message-ttl tenant/namespace/topic +``` + +### `set-message-ttl` +Set the message TTL for a topic. + +Usage +```bash +$ pulsar-admin topics set-message-ttl tenant/namespace/topic options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-ttl`, `--messageTTL`|Message TTL for a topic in second, allowed range from 1 to `Integer.MAX_VALUE` |0| + +### `remove-message-ttl` +Remove the message TTL for a topic. + +Usage +```bash +$ pulsar-admin topics remove-message-ttl tenant/namespace/topic +======= +|---|---|---| +|`--enable`, `-e`|Enable message deduplication on the specified topic.|false| +|`--disable`, `-d`|Disable message deduplication on the specified topic.|false| + +### `remove-deduplication` +Remove a deduplication policy from a topic. + +Usage +```bash +$ pulsar-admin topics remove-deduplication tenant/namespace/topic +``` + +## `tenants` +Operations for managing tenants + +Usage +```bash +$ pulsar-admin tenants subcommand +``` + +Subcommands +* `list` +* `get` +* `create` +* `update` +* `delete` + +### `list` +List the existing tenants + +Usage +```bash +$ pulsar-admin tenants list +``` + +### `get` +Gets the configuration of a tenant + +Usage +```bash +$ pulsar-admin tenants get tenant-name +``` + +### `create` +Creates a new tenant + +Usage +```bash +$ pulsar-admin tenants create tenant-name options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-r`, `--admin-roles`|Comma-separated admin roles|| +|`-c`, `--allowed-clusters`|Comma-separated allowed clusters|| + +### `update` +Updates a tenant + +Usage +```bash +$ pulsar-admin tenants update tenant-name options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-r`, `--admin-roles`|Comma-separated admin roles|| +|`-c`, `--allowed-clusters`|Comma-separated allowed clusters|| + + +### `delete` +Deletes an existing tenant + +Usage +```bash +$ pulsar-admin tenants delete tenant-name +``` + + +## `resource-quotas` +Operations for managing resource quotas + +Usage +```bash +$ pulsar-admin resource-quotas subcommand +``` + +Subcommands +* `get` +* `set` +* `reset-namespace-bundle-quota` + + +### `get` +Get the resource quota for a specified namespace bundle, or default quota if no namespace/bundle is specified. + +Usage +```bash +$ pulsar-admin resource-quotas get options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-b`, `--bundle`|A bundle of the form {start-boundary}_{end_boundary}. This must be specified together with -n/--namespace.|| +|`-n`, `--namespace`|The namespace|| + + +### `set` +Set the resource quota for the specified namespace bundle, or default quota if no namespace/bundle is specified. + +Usage +```bash +$ pulsar-admin resource-quotas set options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-bi`, `--bandwidthIn`|The expected inbound bandwidth (in bytes/second)|0| +|`-bo`, `--bandwidthOut`|Expected outbound bandwidth (in bytes/second)0| +|`-b`, `--bundle`|A bundle of the form {start-boundary}_{end_boundary}. This must be specified together with -n/--namespace.|| +|`-d`, `--dynamic`|Allow to be dynamically re-calculated (or not)|false| +|`-mem`, `--memory`|Expectred memory usage (in megabytes)|0| +|`-mi`, `--msgRateIn`|Expected incoming messages per second|0| +|`-mo`, `--msgRateOut`|Expected outgoing messages per second|0| +|`-n`, `--namespace`|The namespace as tenant/namespace, for example my-tenant/my-ns. Must be specified together with -b/--bundle.|| + + +### `reset-namespace-bundle-quota` +Reset the specifed namespace bundle's resource quota to a default value. + +Usage +```bash +$ pulsar-admin resource-quotas reset-namespace-bundle-quota options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-b`, `--bundle`|A bundle of the form {start-boundary}_{end_boundary}. This must be specified together with -n/--namespace.|| +|`-n`, `--namespace`|The namespace|| + + + +## `schemas` +Operations related to Schemas associated with Pulsar topics. + +Usage +``` +$ pulsar-admin schemas subcommand +``` + +Subcommands +* `upload` +* `delete` +* `get` +* `extract` + + +### `upload` +Upload the schema definition for a topic + +Usage +```bash +$ pulsar-admin schemas upload persistent://tenant/namespace/topic options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`--filename`|The path to the schema definition file. An example schema file is available under conf directory.|| + + +### `delete` +Delete the schema definition associated with a topic + +Usage +```bash +$ pulsar-admin schemas delete persistent://tenant/namespace/topic +``` + + +### `get` +Retrieve the schema definition assoicated with a topic (at a given version if version is supplied). + +Usage +```bash +$ pulsar-admin schemas get persistent://tenant/namespace/topic options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`--version`|The version of the schema definition to retrive for a topic.|| + +### `extract` +Provide the schema definition for a topic via Java class name contained in a JAR file + +Usage +```bash +$ pulsar-admin schemas extract persistent://tenant/namespace/topic options +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-c`, `--classname`|The Java class name|| +|`-j`, `--jar`|A path to the JAR file which contains the above Java class|| +|`-t`, `--type`|The type of the schema (avro or json)|| + + +### `get-offload-policies` +Get the offload policy for a namespace + +Usage +```bash +$ pulsar-admin namespaces get-offload-policies tenant/namespace +``` + +### `set-offload-policies` +Set the offload policy for a namespace + +Usage +```bash +$ pulsar-admin namespaces set-offload-policies tenant/namespace +``` + +Options +|Flag|Description|Default| +|----|---|---| +|`-d`, `--driver`|Driver to use to offload old data to long term storage,(Possible values: S3, aws-s3, google-cloud-storage)|| +|`-r`, `--region`|The long term storage region|| +|`-b`, `--bucket`|Bucket to place offloaded ledger into|| +|`-e`, `--endpoint`|Alternative endpoint to connect to|| +|`-i`, `--aws-id`|AWS Credential Id to use when using driver S3 or aws-s3|| +|`-s`, `--aws-secret`|AWS Credential Secret to use when using driver S3 or aws-s3|| +|`-mbs`, `--maxBlockSize`|Max block size|64MB| +|`-rbs`, `--readBufferSize`|Read buffer size|1MB| +|`-oat`, `--offloadAfterThreshold`|Offload after threshold size (eg: 1M, 5M)|| +|`-oae`, `--offloadAfterElapsed`|Offload after elapsed in millis (or minutes, hours,days,weeks eg: 100m, 3h, 2d, 5w).|| diff --git a/site2/website/versioned_docs/version-2.7.0/reference-terminology.md b/site2/website/versioned_docs/version-2.7.0/reference-terminology.md new file mode 100644 index 00000000000000..ad1d36819452f9 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/reference-terminology.md @@ -0,0 +1,167 @@ +--- +id: version-2.7.0-reference-terminology +title: Pulsar Terminology +sidebar_label: Terminology +original_id: reference-terminology +--- + +Here is a glossary of terms related to Apache Pulsar: + +### Concepts + +#### Pulsar + +Pulsar is a distributed messaging system originally created by Yahoo but now under the stewardship of the Apache Software Foundation. + +#### Message + +Messages are the basic unit of Pulsar. They're what [producers](#producer) publish to [topics](#topic) +and what [consumers](#consumer) then consume from topics. + +#### Topic + +A named channel used to pass messages published by [producers](#producer) to [consumers](#consumer) who +process those [messages](#message). + +#### Partitioned Topic + +A topic that is served by multiple Pulsar [brokers](#broker), which enables higher throughput. + +#### Namespace + +A grouping mechanism for related [topics](#topic). + +#### Namespace Bundle + +A virtual group of [topics](#topic) that belong to the same [namespace](#namespace). A namespace bundle +is defined as a range between two 32-bit hashes, such as 0x00000000 and 0xffffffff. + +#### Tenant + +An administrative unit for allocating capacity and enforcing an authentication/authorization scheme. + +#### Subscription + +A lease on a [topic](#topic) established by a group of [consumers](#consumer). Pulsar has three subscription +modes (exclusive, shared, and failover). + +#### Pub-Sub + +A messaging pattern in which [producer](#producer) proccesses publish messages on [topics](#topic) that +are then consumed (processed) by [consumer](#consumer) processes. + +#### Producer + +A process that publishes [messages](#message) to a Pulsar [topic](#topic). + +#### Consumer + +A process that establishes a subscription to a Pulsar [topic](#topic) and processes messages published +to that topic by [producers](#producer). + +#### Reader + +Pulsar readers are message processors much like Pulsar [consumers](#consumer) but with two crucial differences: + +- you can specify *where* on a topic readers begin processing messages (consumers always begin with the latest + available unacked message); +- readers don't retain data or acknowledge messages. + +#### Cursor + +The subscription position for a [consumer](#consumer). + +#### Acknowledgment (ack) + +A message sent to a Pulsar broker by a [consumer](#consumer) that a message has been successfully processed. +An acknowledgement (ack) is Pulsar's way of knowing that the message can be deleted from the system; +if no acknowledgement, then the message will be retained until it's processed. + +#### Negative Acknowledgment (nack) + +When an application fails to process a particular message, it can sends a "negative ack" to Pulsar +to signal that the message should be replayed at a later timer. (By default, failed messages are +replayed after a 1 minute delay) + +#### Unacknowledged + +A message that has been delivered to a consumer for processing but not yet confirmed as processed by the consumer. + +#### Retention Policy + +Size and time limits that you can set on a [namespace](#namespace) to configure retention of [messages](#message) +that have already been [acknowledged](#acknowledgement-ack). + +#### Multi-Tenancy + +The ability to isolate [namespaces](#namespace), specify quotas, and configure authentication and authorization +on a per-[tenant](#tenant) basis. + +### Architecture + +#### Standalone + +A lightweight Pulsar broker in which all components run in a single Java Virtual Machine (JVM) process. Standalone +clusters can be run on a single machine and are useful for development purposes. + +#### Cluster + +A set of Pulsar [brokers](#broker) and [BookKeeper](#bookkeeper) servers (aka [bookies](#bookie)). +Clusters can reside in different geographical regions and replicate messages to one another +in a process called [geo-replication](#geo-replication). + +#### Instance + +A group of Pulsar [clusters](#cluster) that act together as a single unit. + +#### Geo-Replication + +Replication of messages across Pulsar [clusters](#cluster), potentially in different datacenters +or geographical regions. + +#### Configuration Store + +Pulsar's configuration store (previously known as configuration store) is a ZooKeeper quorum that +is used for configuration-specific tasks. A multi-cluster Pulsar installation requires just one +configuration store across all [clusters](#cluster). + +#### Topic Lookup + +A service provided by Pulsar [brokers](#broker) that enables connecting clients to automatically determine +which Pulsar [cluster](#cluster) is responsible for a [topic](#topic) (and thus where message traffic for +the topic needs to be routed). + +#### Service Discovery + +A mechanism provided by Pulsar that enables connecting clients to use just a single URL to interact +with all the [brokers](#broker) in a [cluster](#cluster). + +#### Broker + +A stateless component of Pulsar [clusters](#cluster) that runs two other components: an HTTP server +exposing a REST interface for administration and topic lookup and a [dispatcher](#dispatcher) that +handles all message transfers. Pulsar clusters typically consist of multiple brokers. + +#### Dispatcher + +An asynchronous TCP server used for all data transfers in-and-out a Pulsar [broker](#broker). The Pulsar +dispatcher uses a custom binary protocol for all communications. + +### Storage + +#### BookKeeper + +[Apache BookKeeper](http://bookkeeper.apache.org/) is a scalable, low-latency persistent log storage +service that Pulsar uses to store data. + +#### Bookie + +Bookie is the name of an individual BookKeeper server. It is effectively the storage server of Pulsar. + +#### Ledger + +An append-only data structure in [BookKeeper](#bookkeeper) that is used to persistently store messages in Pulsar [topics](#topic). + +### Functions + +Pulsar Functions are lightweight functions that can consume messages from Pulsar topics, apply custom processing logic, and, if desired, publish results to topics. diff --git a/site2/website/versioned_docs/version-2.7.0/schema-get-started.md b/site2/website/versioned_docs/version-2.7.0/schema-get-started.md new file mode 100644 index 00000000000000..8cc85ea8e0173e --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/schema-get-started.md @@ -0,0 +1,95 @@ +--- +id: version-2.7.0-schema-get-started +title: Get started +sidebar_label: Get started +original_id: schema-get-started +--- + +This chapter introduces Pulsar schemas and explains why they are important. + +## Schema Registry + +Type safety is extremely important in any application built around a message bus like Pulsar. + +Producers and consumers need some kind of mechanism for coordinating types at the topic level to avoid various potential problems arise. For example, serialization and deserialization issues. + +Applications typically adopt one of the following approaches to guarantee type safety in messaging. Both approaches are available in Pulsar, and you're free to adopt one or the other or to mix and match on a per-topic basis. + +#### Note +> +> Currently, the Pulsar schema registry is only available for the [Java client](client-libraries-java.md), [CGo client](client-libraries-cgo.md), [Python client](client-libraries-python.md), and [C++ client](client-libraries-cpp.md). + +### Client-side approach + +Producers and consumers are responsible for not only serializing and deserializing messages (which consist of raw bytes) but also "knowing" which types are being transmitted via which topics. + +If a producer is sending temperature sensor data on the topic `topic-1`, consumers of that topic will run into trouble if they attempt to parse that data as moisture sensor readings. + +Producers and consumers can send and receive messages consisting of raw byte arrays and leave all type safety enforcement to the application on an "out-of-band" basis. + +### Server-side approach + +Producers and consumers inform the system which data types can be transmitted via the topic. + +With this approach, the messaging system enforces type safety and ensures that producers and consumers remain synced. + +Pulsar has a built-in **schema registry** that enables clients to upload data schemas on a per-topic basis. Those schemas dictate which data types are recognized as valid for that topic. + +## Why use schema + +When a schema is enabled, Pulsar does parse data, it takes bytes as inputs and sends bytes as outputs. While data has meaning beyond bytes, you need to parse data and might encounter parse exceptions which mainly occur in the following situations: + +* The field does not exist + +* The field type has changed (for example, `string` is changed to `int`) + +There are a few methods to prevent and overcome these exceptions, for example, you can catch exceptions when parsing errors, which makes code hard to maintain; or you can adopt a schema management system to perform schema evolution, not to break downstream applications, and enforces type safety to max extend in the language you are using, the solution is Pulsar Schema. + +Pulsar schema enables you to use language-specific types of data when constructing and handling messages from simple types like `string` to more complex application-specific types. + +**Example** + +You can use the _User_ class to define the messages sent to Pulsar topics. + +``` +public class User { + String name; + int age; +} +``` + +When constructing a producer with the _User_ class, you can specify a schema or not as below. + +### Without schema + +If you construct a producer without specifying a schema, then the producer can only produce messages of type `byte[]`. If you have a POJO class, you need to serialize the POJO into bytes before sending messages. + +**Example** + +``` +Producer producer = client.newProducer() + .topic(topic) + .create(); +User user = new User(“Tom”, 28); +byte[] message = … // serialize the `user` by yourself; +producer.send(message); +``` +### With schema + +If you construct a producer with specifying a schema, then you can send a class to a topic directly without worrying about how to serialize POJOs into bytes. + +**Example** + +This example constructs a producer with the _JSONSchema_, and you can send the _User_ class to topics directly without worrying about how to serialize it into bytes. + +``` +Producer producer = client.newProducer(JSONSchema.of(User.class)) + .topic(topic) + .create(); +User user = new User(“Tom”, 28); +producer.send(user); +``` + +### Summary + +When constructing a producer with a schema, you do not need to serialize messages into bytes, instead Pulsar schema does this job in the background. diff --git a/site2/website/versioned_docs/version-2.7.0/security-authorization.md b/site2/website/versioned_docs/version-2.7.0/security-authorization.md new file mode 100644 index 00000000000000..52ce1ec886463e --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/security-authorization.md @@ -0,0 +1,101 @@ +--- +id: version-2.7.0-security-authorization +title: Authentication and authorization in Pulsar +sidebar_label: Authorization and ACLs +original_id: security-authorization +--- + + +In Pulsar, the [authentication provider](security-overview.md#authentication-providers) is responsible for properly identifying clients and associating the clients with [role tokens](security-overview.md#role-tokens). If you only enable authentication, an authenticated role token has the ability to access all resources in the cluster. *Authorization* is the process that determines *what* clients are able to do. + +The role tokens with the most privileges are the *superusers*. The *superusers* can create and destroy tenants, along with having full access to all tenant resources. + +When a superuser creates a [tenant](reference-terminology.md#tenant), that tenant is assigned an admin role. A client with the admin role token can then create, modify and destroy namespaces, and grant and revoke permissions to *other role tokens* on those namespaces. + +## Broker and Proxy Setup + +### Enable authorization and assign superusers +You can enable the authorization and assign the superusers in the broker ([`conf/broker.conf`](reference-configuration.md#broker)) configuration files. + +```properties +authorizationEnabled=true +superUserRoles=my-super-user-1,my-super-user-2 +``` + +> A full list of parameters is available in the `conf/broker.conf` file. +> You can also find the default values for those parameters in [Broker Configuration](reference-configuration.md#broker). + +Typically, you use superuser roles for administrators, clients as well as broker-to-broker authorization. When you use [geo-replication](concepts-replication.md), every broker needs to be able to publish to all the other topics of clusters. + +You can also enable the authorization for the proxy in the proxy configuration file (`conf/proxy.conf`). Once you enable the authorization on the proxy, the proxy does an additional authorization check before forwarding the request to a broker. +If you enable authorization on the broker, the broker checks the authorization of the request when the broker receives the forwarded request. + +### Proxy Roles + +By default, the broker treats the connection between a proxy and the broker as a normal user connection. The broker authenticates the user as the role configured in `proxy.conf`(see ["Enable TLS Authentication on Proxies"](security-tls-authentication.md#enable-tls-authentication-on-proxies)). However, when the user connects to the cluster through a proxy, the user rarely requires the authentication. The user expects to be able to interact with the cluster as the role for which they have authenticated with the proxy. + +Pulsar uses *Proxy roles* to enable the authentication. Proxy roles are specified in the broker configuration file, [`conf/broker.conf`](reference-configuration.md#broker). If a client that is authenticated with a broker is one of its ```proxyRoles```, all requests from that client must also carry information about the role of the client that is authenticated with the proxy. This information is called the *original principal*. If the *original principal* is absent, the client is not able to access anything. + +You must authorize both the *proxy role* and the *original principal* to access a resource to ensure that the resource is accessible via the proxy. Administrators can take two approaches to authorize the *proxy role* and the *original principal*. + +The more secure approach is to grant access to the proxy roles each time you grant access to a resource. For example, if you have a proxy role named `proxy1`, when the superuser creats a tenant, you should specify `proxy1` as one of the admin roles. When a role is granted permissions to produce or consume from a namespace, if that client wants to produce or consume through a proxy, you should also grant `proxy1` the same permissions. + +Another approach is to make the proxy role a superuser. This allows the proxy to access all resources. The client still needs to authenticate with the proxy, and all requests made through the proxy have their role downgraded to the *original principal* of the authenticated client. However, if the proxy is compromised, a bad actor could get full access to your cluster. + +You can specify the roles as proxy roles in [`conf/broker.conf`](reference-configuration.md#broker). + +```properties +proxyRoles=my-proxy-role + +# if you want to allow superusers to use the proxy (see above) +superUserRoles=my-super-user-1,my-super-user-2,my-proxy-role +``` + +## Administer tenants + +Pulsar [instance](reference-terminology.md#instance) administrators or some kind of self-service portal typically provisions a Pulsar [tenant](reference-terminology.md#tenant). + +You can manage tenants using the [`pulsar-admin`](reference-pulsar-admin.md) tool. + +### Create a new tenant + +The following is an example tenant creation command: + +```shell +$ bin/pulsar-admin tenants create my-tenant \ + --admin-roles my-admin-role \ + --allowed-clusters us-west,us-east +``` + +This command creates a new tenant `my-tenant` that is allowed to use the clusters `us-west` and `us-east`. + +A client that successfully identifies itself as having the role `my-admin-role` is allowed to perform all administrative tasks on this tenant. + +The structure of topic names in Pulsar reflects the hierarchy between tenants, clusters, and namespaces: + +```shell +persistent://tenant/namespace/topic +``` + +### Manage permissions + +You can use [Pulsar Admin Tools](admin-api-permissions.md) for managing permission in Pulsar. + +### Pulsar admin authentication + +```java +PulsarAdmin admin = PulsarAdmin.builder() + .serviceHttpUrl("http://broker:8080") + .authentication("com.org.MyAuthPluginClass", "param1:value1") + .build(); +``` + +To use TLS: + +```java +PulsarAdmin admin = PulsarAdmin.builder() + .serviceHttpUrl("https://broker:8080") + .authentication("com.org.MyAuthPluginClass", "param1:value1") + .tlsTrustCertsFilePath("/path/to/trust/cert") + .build(); +``` diff --git a/site2/website/versioned_docs/version-2.7.0/security-bouncy-castle.md b/site2/website/versioned_docs/version-2.7.0/security-bouncy-castle.md new file mode 100644 index 00000000000000..85fc686238f1b6 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/security-bouncy-castle.md @@ -0,0 +1,139 @@ +--- +id: version-2.7.0-security-bouncy-castle +title: Bouncy Castle Providers +sidebar_label: Bouncy Castle Providers +original_id: security-bouncy-castle +--- + +## BouncyCastle Introduce + +`Bouncy Castle` is a Java library that complements the default Java Cryptographic Extension (JCE), +and it provides more cipher suites and algorithms than the default JCE provided by Sun. + +In addition to that, `Bouncy Castle` has lots of utilities for reading arcane formats like PEM and ASN.1 that no sane person would want to rewrite themselves. + +In Pulsar, security and crypto have dependencies on BouncyCastle Jars. For the detailed installing and configuring Bouncy Castle FIPS, see [BC FIPS Documentation](https://www.bouncycastle.org/documentation.html), especially the **User Guides** and **Security Policy** PDFs. + +`Bouncy Castle` provides both [FIPS](https://www.bouncycastle.org/fips_faq.html) and non-FIPS version. But in a JVM, you can not include both of the 2 versions, and you need to exclude the current version before include the other. + +In Pulsar, the security and crypto methods also depends on `Bouncy Castle`, especially in [TLS Authentication](security-tls-authentication.md) and [Transport Encryption](security-encryption.md). This document contains the configuration between BouncyCastle FIPS(BC-FIPS) and non-FIPS(BC-non-FIPS) version while using Pulsar. + +## How BouncyCastle modules packaged in Pulsar + +In Pulsar's `bouncy-castle` module, We provide 2 sub modules: `bouncy-castle-bc`(for non-FIPS version) and `bouncy-castle-bcfips`(for FIPS version), to package BC jars together to make the include and exclude of `Bouncy Castle` easier. + +To achieve this goal, we will need to package several `bouncy-castle` jars together into `bouncy-castle-bc` or `bouncy-castle-bcfips` jar. +Each of the original bouncy-castle jar is related with security, so BouncyCastle dutifully supplies signed of each JAR. +But when we do the re-package, Maven shade explodes the BouncyCastle jar file which puts the signatures into META-INF, +these signatures aren't valid for this new, uber-jar (signatures are only for the original BC jar). +Usually, You will meet error like `java.lang.SecurityException: Invalid signature file digest for Manifest main attributes`. + +You could exclude these signatures in mvn pom file to avoid above error, by +```access transformers +META-INF/*.SF +META-INF/*.DSA +META-INF/*.RSA +``` +But it can also lead to new, cryptic errors, e.g. `java.security.NoSuchAlgorithmException: PBEWithSHA256And256BitAES-CBC-BC SecretKeyFactory not available` +By explicitly specifying where to find the algorithm like this: `SecretKeyFactory.getInstance("PBEWithSHA256And256BitAES-CBC-BC","BC")` +It will get the real error: `java.security.NoSuchProviderException: JCE cannot authenticate the provider BC` + +So, we used a [executable packer plugin](https://github.com/nthuemmel/executable-packer-maven-plugin) that uses a jar-in-jar approach to preserve the BouncyCastle signature in a single, executable jar. + +### Include dependencies of BC-non-FIPS + +Pulsar module `bouncy-castle-bc`, which defined by `bouncy-castle/bc/pom.xml` contains the needed non-FIPS jars for Pulsar, and packaged as a jar-in-jar(need to provide `pkg`). + +```xml + + org.bouncycastle + bcpkix-jdk15on + ${bouncycastle.version} + + + + org.bouncycastle + bcprov-ext-jdk15on + ${bouncycastle.version} + +``` + +By using this `bouncy-castle-bc` module, you can easily include and exclude BouncyCastle non-FIPS jars. + +### Modules that include BC-non-FIPS module (`bouncy-castle-bc`) + +For Pulsar client, user need the bouncy-castle module, so `pulsar-client-original` will include the `bouncy-castle-bc` module, and have `pkg` set to reference the `jar-in-jar` package. +It is included as following example: +```xml + + org.apache.pulsar + bouncy-castle-bc + ${pulsar.version} + pkg + +``` + +By default `bouncy-castle-bc` already included in `pulsar-client-original`, And `pulsar-client-original` has been included in a lot of other modules like `pulsar-client-admin`, `pulsar-broker`. +But for the above shaded jar and signatures reason, we should not package Pulsar's `bouncy-castle` module into `pulsar-client-all` other shaded modules directly, such as `pulsar-client-shaded`, `pulsar-client-admin-shaded` and `pulsar-broker-shaded`. +So in the shaded modules, we will exclude the `bouncy-castle` modules. +```xml + + + org.apache.pulsar:pulsar-client-original + + ** + + + org/bouncycastle/** + + + +``` + +That means, `bouncy-castle` related jars are not shaded in these fat jars. + +### Module BC-FIPS (`bouncy-castle-bcfips`) + +Pulsar module `bouncy-castle-bcfips`, which defined by `bouncy-castle/bcfips/pom.xml` contains the needed FIPS jars for Pulsar. +Similar to `bouncy-castle-bc`, `bouncy-castle-bcfips` also packaged as a `jar-in-jar` package for easy include/exclude. + +```xml + + org.bouncycastle + bc-fips + ${bouncycastlefips.version} + + + + org.bouncycastle + bcpkix-fips + ${bouncycastlefips.version} + +``` + +### Exclude BC-non-FIPS and include BC-FIPS + +If you want to switch from BC-non-FIPS to BC-FIPS version, Here is an example for `pulsar-broker` module: +```xml + + org.apache.pulsar + pulsar-broker + ${pulsar.version} + + + org.apache.pulsar + bouncy-castle-bc + + + + + + org.apache.pulsar + bouncy-castle-bcfips + ${pulsar.version} + pkg + +``` + +For more example, you can reference module `bcfips-include-test`. + diff --git a/site2/website/versioned_docs/version-2.7.0/security-extending.md b/site2/website/versioned_docs/version-2.7.0/security-extending.md new file mode 100644 index 00000000000000..1bdbad2875b089 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/security-extending.md @@ -0,0 +1,196 @@ +--- +id: version-2.7.0-security-extending +title: Extending Authentication and Authorization in Pulsar +sidebar_label: Extending +original_id: security-extending +--- + +Pulsar provides a way to use custom authentication and authorization mechanisms. + +## Authentication + +Pulsar supports mutual TLS and Athenz authentication plugins. For how to use these authentication plugins, you can refer to the description in [Security](security-overview.md). + +You can use a custom authentication mechanism by providing the implementation in the form of two plugins. One plugin is for the Client library and the other plugin is for the Pulsar Proxy and/or Pulsar Broker to validate the credentials. + +### Client authentication plugin + +For the client library, you need to implement `org.apache.pulsar.client.api.Authentication`. By entering the command below you can pass this class when you create a Pulsar client: + +```java +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://localhost:6650") + .authentication(new MyAuthentication()) + .build(); +``` + +You can use 2 interfaces to implement on the client side: + * `Authentication` -> http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/Authentication.html + * `AuthenticationDataProvider` -> http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/AuthenticationDataProvider.html + + +This in turn needs to provide the client credentials in the form of `org.apache.pulsar.client.api.AuthenticationDataProvider`. This leaves the chance to return different kinds of authentication token for different types of connection or by passing a certificate chain to use for TLS. + + +You can find examples for client authentication providers at: + + * Mutual TLS Auth -- https://github.com/apache/pulsar/tree/master/pulsar-client/src/main/java/org/apache/pulsar/client/impl/auth + * Athenz -- https://github.com/apache/pulsar/tree/master/pulsar-client-auth-athenz/src/main/java/org/apache/pulsar/client/impl/auth + +### Proxy/Broker authentication plugin + +On the proxy/broker side, you need to configure the corresponding plugin to validate the credentials that the client sends. The Proxy and Broker can support multiple authentication providers at the same time. + +In `conf/broker.conf` you can choose to specify a list of valid providers: + +```properties +# Authentication provider name list, which is comma separated list of class names +authenticationProviders= +``` +To implement `org.apache.pulsar.broker.authentication.AuthenticationProvider` on one single interface: + +```java +/** + * Provider of authentication mechanism + */ +public interface AuthenticationProvider extends Closeable { + + /** + * Perform initialization for the authentication provider + * + * @param config + * broker config object + * @throws IOException + * if the initialization fails + */ + void initialize(ServiceConfiguration config) throws IOException; + + /** + * @return the authentication method name supported by this provider + */ + String getAuthMethodName(); + + /** + * Validate the authentication for the given credentials with the specified authentication data + * + * @param authData + * provider specific authentication data + * @return the "role" string for the authenticated connection, if the authentication was successful + * @throws AuthenticationException + * if the credentials are not valid + */ + String authenticate(AuthenticationDataSource authData) throws AuthenticationException; + +} +``` + +The following is the example for Broker authentication plugins: + + * Mutual TLS -- https://github.com/apache/pulsar/blob/master/pulsar-broker-common/src/main/java/org/apache/pulsar/broker/authentication/AuthenticationProviderTls.java + * Athenz -- https://github.com/apache/pulsar/blob/master/pulsar-broker-auth-athenz/src/main/java/org/apache/pulsar/broker/authentication/AuthenticationProviderAthenz.java + +## Authorization + +Authorization is the operation that checks whether a particular "role" or "principal" has permission to perform a certain operation. + +By default, you can use the embedded authorization provider provided by Pulsar. You can also configure a different authorization provider through a plugin. +Note that although the Authentication plugin is designed for use in both the Proxy and Broker, +the Authorization plugin is designed only for use on the Broker however the Proxy does perform some simple Authorization checks of Roles if authorization is enabled. + +To provide a custom provider, you need to implement the `org.apache.pulsar.broker.authorization.AuthorizationProvider` interface, put this class in the Pulsar broker classpath and configure the class in `conf/broker.conf`: + + ```properties + # Authorization provider fully qualified class-name + authorizationProvider=org.apache.pulsar.broker.authorization.PulsarAuthorizationProvider + ``` + +```java +/** + * Provider of authorization mechanism + */ +public interface AuthorizationProvider extends Closeable { + + /** + * Perform initialization for the authorization provider + * + * @param conf + * broker config object + * @param configCache + * pulsar zk configuration cache service + * @throws IOException + * if the initialization fails + */ + void initialize(ServiceConfiguration conf, ConfigurationCacheService configCache) throws IOException; + + /** + * Check if the specified role has permission to send messages to the specified fully qualified topic name. + * + * @param topicName + * the fully qualified topic name associated with the topic. + * @param role + * the app id used to send messages to the topic. + */ + CompletableFuture canProduceAsync(TopicName topicName, String role, + AuthenticationDataSource authenticationData); + + /** + * Check if the specified role has permission to receive messages from the specified fully qualified topic name. + * + * @param topicName + * the fully qualified topic name associated with the topic. + * @param role + * the app id used to receive messages from the topic. + * @param subscription + * the subscription name defined by the client + */ + CompletableFuture canConsumeAsync(TopicName topicName, String role, + AuthenticationDataSource authenticationData, String subscription); + + /** + * Check whether the specified role can perform a lookup for the specified topic. + * + * For that the caller needs to have producer or consumer permission. + * + * @param topicName + * @param role + * @return + * @throws Exception + */ + CompletableFuture canLookupAsync(TopicName topicName, String role, + AuthenticationDataSource authenticationData); + + /** + * + * Grant authorization-action permission on a namespace to the given client + * + * @param namespace + * @param actions + * @param role + * @param authDataJson + * additional authdata in json format + * @return CompletableFuture + * @completesWith
    + * IllegalArgumentException when namespace not found
    + * IllegalStateException when failed to grant permission + */ + CompletableFuture grantPermissionAsync(NamespaceName namespace, Set actions, String role, + String authDataJson); + + /** + * Grant authorization-action permission on a topic to the given client + * + * @param topicName + * @param role + * @param authDataJson + * additional authdata in json format + * @return CompletableFuture + * @completesWith
    + * IllegalArgumentException when namespace not found
    + * IllegalStateException when failed to grant permission + */ + CompletableFuture grantPermissionAsync(TopicName topicName, Set actions, String role, + String authDataJson); + +} + +``` diff --git a/site2/website/versioned_docs/version-2.7.0/security-oauth2.md b/site2/website/versioned_docs/version-2.7.0/security-oauth2.md new file mode 100644 index 00000000000000..7a40e565311151 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/security-oauth2.md @@ -0,0 +1,207 @@ +--- +id: version-2.7.0-security-oauth2 +title: Client authentication using OAuth 2.0 access tokens +sidebar_label: Authentication using OAuth 2.0 access tokens +original_id: security-oauth2 +--- + +Pulsar supports authenticating clients using OAuth 2.0 access tokens. You can use OAuth 2.0 access tokens to identify a Pulsar client and associate the Pulsar client with some "principal" (or "role"), which is permitted to do some actions, such as publishing messages to a topic or consume messages from a topic. + +This module is used to support the Pulsar client authentication plugin for OAuth 2.0. After communicating with the Oauth 2.0 server, the Pulsar client gets an `access token` from the Oauth 2.0 server, and passes this `access token` to the Pulsar broker to do the authentication. The broker can use the `org.apache.pulsar.broker.authentication.AuthenticationProviderToken`. Or, you can add your own `AuthenticationProvider` to make it with this module. + +## Authentication provider configuration + +This library allows you to authenticate the Pulsar client by using an access token that is obtained from an OAuth 2.0 authorization service, which acts as a _token issuer_. + +### Authentication types + +The authentication type determines how to obtain an access token through an OAuth 2.0 authorization flow. + +#### Note +> Currently, the Pulsar Java client only supports the `client_credentials` authentication type . + +#### Client credentials + +The following table lists parameters supported for the `client credentials` authentication type. + +| Parameter | Description | Example | Required or not | +| --- | --- | --- | --- | +| `type` | Oauth 2.0 authentication type. | `client_credentials` (default) | Optional | +| `issuerUrl` | URL of the authentication provider which allows the Pulsar client to obtain an access token | `https://accounts.google.com` | Required | +| `privateKey` | URL to a JSON credentials file | Support the following pattern formats:
  • `file:///path/to/file`
  • `file:/path/to/file`
  • `data:application/json;base64,` | Required | +| `audience` | An OAuth 2.0 "resource server" identifier for the Pulsar cluster | `https://broker.example.com` | Required | + +The credentials file contains service account credentials used with the client authentication type. The following shows an example of a credentials file `credentials_file.json`. + +```json +{ + "type": "client_credentials", + "client_id": "d9ZyX97q1ef8Cr81WHVC4hFQ64vSlDK3", + "client_secret": "on1uJ...k6F6R", + "client_email": "1234567890-abcdefghijklmnopqrstuvwxyz@developer.gserviceaccount.com", + "issuer_url": "https://accounts.google.com" +} +``` + +In the above example, the authentication type is set to `client_credentials` by default. And the fields "client_id" and "client_secret" are required. + +### Typical original OAuth2 request mapping + +The following shows a typical original OAuth2 request, which is used to obtain the access token from the OAuth2 server. + +```bash +curl --request POST \ + --url https://dev-kt-aa9ne.us.auth0.com \ + --header 'content-type: application/json' \ + --data '{ + "client_id":"Xd23RHsUnvUlP7wchjNYOaIfazgeHd9x", + "client_secret":"rT7ps7WY8uhdVuBTKWZkttwLdQotmdEliaM5rLfmgNibvqziZ-g07ZH52N_poGAb", + "audience":"https://dev-kt-aa9ne.us.auth0.com/api/v2/", + "grant_type":"client_credentials"}' +``` + +In the above example, the mapping relationship is shown as below. + +- The `issuerUrl` parameter in this plugin is mapped to `--url https://dev-kt-aa9ne.us.auth0.com`. +- The `privateKey` file parameter in this plugin should at least contains the `client_id` and `client_secret` fields. +- The `audience` parameter in this plugin is mapped to `"audience":"https://dev-kt-aa9ne.us.auth0.com/api/v2/"`. + +## Client Configuration + +You can use the OAuth2 authentication provider with the following Pulsar clients. + +### Java + +You can use the factory method to configure authentication for Pulsar Java client. + +```java +String issuerUrl = "https://dev-kt-aa9ne.us.auth0.com"; +String credentialsUrl = "file:///path/to/KeyFile.json"; +String audience = "https://dev-kt-aa9ne.us.auth0.com/api/v2/"; + +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://broker.example.com:6650/") + .authentication( + AuthenticationFactoryOAuth2.clientCredentials(issuerUrl, credentialsUrl, audience)) + .build(); +``` + +In addition, you can also use the encoded parameters to configure authentication for Pulsar Java client. + +```java +Authentication auth = AuthenticationFactory + .create(AuthenticationOAuth2.class.getName(), "{"type":"client_credentials","privateKey":"./key/path/..","issuerUrl":"...","audience":"..."}"); +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://broker.example.com:6650/") + .authentication(auth) + .build(); +``` + +### C++ client + +The C++ client is similar to the Java client. You need to provide parameters of `issuerUrl`, `private_key` (the credentials file path), and the audience. + +```c++ +#include + +pulsar::ClientConfiguration config; +std::string params = R"({ + "issuer_url": "https://dev-kt-aa9ne.us.auth0.com", + "private_key": "../../pulsar-broker/src/test/resources/authentication/token/cpp_credentials_file.json", + "audience": "https://dev-kt-aa9ne.us.auth0.com/api/v2/"})"; + +config.setAuth(pulsar::AuthOauth2::create(params)); + +pulsar::Client client("pulsar://broker.example.com:6650/", config); +``` + +### Go client + +To enable OAuth2 authentication in Go client, you need to configure OAuth2 authentication. +This example shows how to configure OAuth2 authentication in Go client. + +```go +oauth := pulsar.NewAuthenticationOAuth2(map[string]string{ + "type": "client_credentials", + "issuerUrl": "https://dev-kt-aa9ne.us.auth0.com", + "audience": "https://dev-kt-aa9ne.us.auth0.com/api/v2/", + "privateKey": "/path/to/privateKey", + "clientId": "0Xx...Yyxeny", + }) +client, err := pulsar.NewClient(pulsar.ClientOptions{ + URL: "puslar://my-cluster:6650", + Authentication: oauth, +}) +``` + +### Python client + +To enable OAuth2 authentication in Python client, you need to configure OAuth2 authentication. +This example shows how to configure OAuth2 authentication in Python client. + +```python +from pulsar import Client, AuthenticationOauth2 + +params = ''' +{ + "issuer_url": "https://dev-kt-aa9ne.us.auth0.com", + "private_key": "/path/to/privateKey", + "audience": "https://dev-kt-aa9ne.us.auth0.com/api/v2/" +} +''' + +client = Client("puslar://my-cluster:6650", authentication=AuthenticationOauth2(params)) +``` + +## CLI configuration + +This section describes how to use Pulsar CLI tools to connect a cluster through OAuth2 authentication plugin. + +### pulsar-admin + +This example shows how to use pulsar-admin to connect to a cluster through OAuth2 authentication plugin. + +```shell script +bin/pulsar-admin --admin-url https://streamnative.cloud:443 \ +--auth-plugin org.apache.pulsar.client.impl.auth.oauth2.AuthenticationOAuth2 \ +--auth-params '{"privateKey":"file:///path/to/key/file.json", + "issuerUrl":"https://dev-kt-aa9ne.us.auth0.com", + "audience":"https://dev-kt-aa9ne.us.auth0.com/api/v2/"}' \ +tenants list +``` + +Set the `admin-url` parameter to the Web service URL. A Web service URLis a combination of the protocol, hostname and port ID, such as `pulsar://localhost:6650`. +Set the `privateKey`, `issuerUrl`, and `audience` parameters to the values based on the configuration in the key file. For details, see [authentication types](#authentication-types). + +### pulsar-client + +This example shows how to use pulsar-client to connect to a cluster through OAuth2 authentication plugin. + +```shell script +bin/pulsar-client \ +--url SERVICE_URL \ +--auth-plugin org.apache.pulsar.client.impl.auth.oauth2.AuthenticationOAuth2 \ +--auth-params '{"privateKey":"file:///path/to/key/file.json", + "issuerUrl":"https://dev-kt-aa9ne.us.auth0.com", + "audience":"https://dev-kt-aa9ne.us.auth0.com/api/v2/"}' \ +produce test-topic -m "test-message" -n 10 +``` + +Set the `admin-url` parameter to the Web service URL. A Web service URLis a combination of the protocol, hostname and port ID, such as `pulsar://localhost:6650`. +Set the `privateKey`, `issuerUrl`, and `audience` parameters to the values based on the configuration in the key file. For details, see [authentication types](#authentication-types). + +### pulsar-perf + +This example shows how to use pulsar-perf to connect to a cluster through OAuth2 authentication plugin. + +```shell script +bin/pulsar-perf produce --service-url pulsar+ssl://streamnative.cloud:6651 \ +--auth_plugin org.apache.pulsar.client.impl.auth.oauth2.AuthenticationOAuth2 \ +--auth-params '{"privateKey":"file:///path/to/key/file.json", + "issuerUrl":"https://dev-kt-aa9ne.us.auth0.com", + "audience":"https://dev-kt-aa9ne.us.auth0.com/api/v2/"}' \ +-r 1000 -s 1024 test-topic +``` + +Set the `admin-url` parameter to the Web service URL. A Web service URLis a combination of the protocol, hostname and port ID, such as `pulsar://localhost:6650`. +Set the `privateKey`, `issuerUrl`, and `audience` parameters to the values based on the configuration in the key file. For details, see [authentication types](#authentication-types). \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/sql-deployment-configurations.md b/site2/website/versioned_docs/version-2.7.0/sql-deployment-configurations.md new file mode 100644 index 00000000000000..b1cbde71bdc1cd --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/sql-deployment-configurations.md @@ -0,0 +1,159 @@ +--- +id: version-2.7.0-sql-deployment-configurations +title: Pulsar SQL configuration and deployment +sidebar_label: Configuration and deployment +original_id: sql-deployment-configurations +--- + +You can configure Presto Pulsar connector and deploy a cluster with the following instruction. + +## Configure Presto Pulsar Connector +You can configure Presto Pulsar Connector in the `${project.root}/conf/presto/catalog/pulsar.properties` properties file. The configuration for the connector and the default values are as follows. + +```properties +# name of the connector to be displayed in the catalog +connector.name=pulsar + +# the url of Pulsar broker service +pulsar.broker-service-url=http://localhost:8080 + +# URI of Zookeeper cluster +pulsar.zookeeper-uri=localhost:2181 + +# minimum number of entries to read at a single time +pulsar.entry-read-batch-size=100 + +# default number of splits to use per query +pulsar.target-num-splits=4 +``` + +You can connect Presto to a Pulsar cluster with multiple hosts. To configure multiple hosts for brokers, add multiple URLs to `pulsar.broker-service-url`. To configure multiple hosts for ZooKeeper, add multiple URIs to `pulsar.zookeeper-uri`. The following is an example. + +``` +pulsar.broker-service-url=http://localhost:8080,localhost:8081,localhost:8082 +pulsar.zookeeper-uri=localhost1,localhost2:2181 +``` + +## Query data from existing Presto clusters + +If you already have a Presto cluster, you can copy the Presto Pulsar connector plugin to your existing cluster. Download the archived plugin package with the following command. + +```bash +$ wget pulsar:binary_release_url +``` + +## Deploy a new cluster + +Since Pulsar SQL is powered by [Presto](https://prestosql.io), the configuration for deployment is the same for the Pulsar SQL worker. + +> Note +> For how to set up a standalone single node environment, refer to [Query data](sql-getting-started.md). + +You can use the same CLI args as the Presto launcher. + +```bash +$ ./bin/pulsar sql-worker --help +Usage: launcher [options] command + +Commands: run, start, stop, restart, kill, status + +Options: + -h, --help show this help message and exit + -v, --verbose Run verbosely + --etc-dir=DIR Defaults to INSTALL_PATH/etc + --launcher-config=FILE + Defaults to INSTALL_PATH/bin/launcher.properties + --node-config=FILE Defaults to ETC_DIR/node.properties + --jvm-config=FILE Defaults to ETC_DIR/jvm.config + --config=FILE Defaults to ETC_DIR/config.properties + --log-levels-file=FILE + Defaults to ETC_DIR/log.properties + --data-dir=DIR Defaults to INSTALL_PATH + --pid-file=FILE Defaults to DATA_DIR/var/run/launcher.pid + --launcher-log-file=FILE + Defaults to DATA_DIR/var/log/launcher.log (only in + daemon mode) + --server-log-file=FILE + Defaults to DATA_DIR/var/log/server.log (only in + daemon mode) + -D NAME=VALUE Set a Java system property + +``` + +The default configuration for the cluster is located in `${project.root}/conf/presto`. You can customize your deployment by modifying the default configuration. + +You can set the worker to read from a different configuration directory, or set a different directory to write data. + +```bash +$ ./bin/pulsar sql-worker run --etc-dir /tmp/incubator-pulsar/conf/presto --data-dir /tmp/presto-1 +``` + +You can start the worker as daemon process. + +```bash +$ ./bin sql-worker start +``` + +### Deploy a cluster on multiple nodes + +You can deploy a Pulsar SQL cluster or Presto cluster on multiple nodes. The following example shows how to deploy a cluster on three-node cluster. + +1. Copy the Pulsar binary distribution to three nodes. + +The first node runs as Presto coordinator. The minimal configuration requirement in the `${project.root}/conf/presto/config.properties` file is as follows. + +```properties +coordinator=true +node-scheduler.include-coordinator=true +http-server.http.port=8080 +query.max-memory=50GB +query.max-memory-per-node=1GB +discovery-server.enabled=true +discovery.uri= +``` + +The other two nodes serve as worker nodes, you can use the following configuration for worker nodes. + +```properties +coordinator=false +http-server.http.port=8080 +query.max-memory=50GB +query.max-memory-per-node=1GB +discovery.uri= +``` + +2. Modify `pulsar.broker-service-url` and `pulsar.zookeeper-uri` configuration in the `${project.root}/conf/presto/catalog/pulsar.properties` file accordingly for the three nodes. + +3. Start the coordinator node. + +``` +$ ./bin/pulsar sql-worker run +``` + +4. Start worker nodes. + +``` +$ ./bin/pulsar sql-worker run +``` + +5. Start the SQL CLI and check the status of your cluster. + +```bash +$ ./bin/pulsar sql --server +``` + +6. Check the status of your nodes. + +```bash +presto> SELECT * FROM system.runtime.nodes; + node_id | http_uri | node_version | coordinator | state +---------+-------------------------+--------------+-------------+-------- + 1 | http://192.168.2.1:8081 | testversion | true | active + 3 | http://192.168.2.2:8081 | testversion | false | active + 2 | http://192.168.2.3:8081 | testversion | false | active +``` + +For more information about deployment in Presto, refer to [Presto deployment](https://prestosql.io/docs/current/installation/deployment.html). + +> Note +> The broker does not advance LAC, so when Pulsar SQL bypass broker to query data, it can only read entries up to the LAC that all the bookies learned. You can enable periodically write LAC on the broker by setting "bookkeeperExplicitLacIntervalInMills" in the broker.conf. diff --git a/site2/website/versioned_docs/version-2.7.0/tiered-storage-aws.md b/site2/website/versioned_docs/version-2.7.0/tiered-storage-aws.md new file mode 100644 index 00000000000000..b9ac9ab10f5bb4 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/tiered-storage-aws.md @@ -0,0 +1,282 @@ +--- +id: version-2.7.0-tiered-storage-aws +title: Use AWS S3 offloader with Pulsar +sidebar_label: AWS S3 offloader +original_id: tiered-storage-aws +--- + +This chapter guides you through every step of installing and configuring the AWS S3 offloader and using it with Pulsar. + +## Installation + +Follow the steps below to install the AWS S3 offloader. + +### Prerequisite + +- Pulsar: 2.4.2 or later versions + +### Step + +This example uses Pulsar 2.5.1. + +1. Download the Pulsar tarball using one of the following ways: + + * Download from the [Apache mirror](https://archive.apache.org/dist/pulsar/pulsar-2.5.1/apache-pulsar-2.5.1-bin.tar.gz) + + * Download from the Pulsar [downloads page](https://pulsar.apache.org/download) + + * Use [wget](https://www.gnu.org/software/wget): + + ```shell + wget https://archive.apache.org/dist/pulsar/pulsar-2.5.1/apache-pulsar-2.5.1-bin.tar.gz + ``` + +2. Download and untar the Pulsar offloaders package. + + ```bash + wget https://downloads.apache.org/pulsar/pulsar-2.5.1/apache-pulsar-offloaders-2.5.1-bin.tar.gz + tar xvfz apache-pulsar-offloaders-2.5.1-bin.tar.gz + ``` + +3. Copy the Pulsar offloaders as `offloaders` in the Pulsar directory. + + ``` + mv apache-pulsar-offloaders-2.5.1/offloaders apache-pulsar-2.5.1/offloaders + + ls offloaders + ``` + + **Output** + + As shown from the output, Pulsar uses [Apache jclouds](https://jclouds.apache.org) to support [AWS S3](https://aws.amazon.com/s3/) and [GCS](https://cloud.google.com/storage/) for long term storage. + + + ``` + tiered-storage-file-system-2.5.1.nar + tiered-storage-jcloud-2.5.1.nar + ``` + + > #### Note + > + > * If you are running Pulsar in a bare metal cluster, make sure that `offloaders` tarball is unzipped in every broker's Pulsar directory. + > + > * If you are running Pulsar in Docker or deploying Pulsar using a Docker image (such as K8s and DCOS), you can use the `apachepulsar/pulsar-all` image instead of the `apachepulsar/pulsar` image. `apachepulsar/pulsar-all` image has already bundled tiered storage offloaders. + +## Configuration + +> #### Note +> +> Before offloading data from BookKeeper to AWS S3, you need to configure some properties of the AWS S3 offload driver. + +Besides, you can also configure the AWS S3 offloader to run it automatically or trigger it manually. + +### Configure AWS S3 offloader driver + +You can configure the AWS S3 offloader driver in the configuration file `broker.conf` or `standalone.conf`. + +- **Required** configurations are as below. + + Required configuration | Description | Example value + |---|---|--- + `managedLedgerOffloadDriver` | Offloader driver name, which is case-insensitive.

    **Note**: there is a third driver type, S3, which is identical to AWS S3, though S3 requires that you specify an endpoint URL using `s3ManagedLedgerOffloadServiceEndpoint`. This is useful if using an S3 compatible data store other than AWS S3. | aws-s3 + `offloadersDirectory` | Offloader directory | offloaders + `s3ManagedLedgerOffloadBucket` | Bucket | pulsar-topic-offload + +- **Optional** configurations are as below. + + Optional | Description | Example value + |---|---|--- + `s3ManagedLedgerOffloadRegion` | Bucket region | eu-west-3 + `s3ManagedLedgerOffloadReadBufferSizeInBytes`|Size of block read|1 MB + `s3ManagedLedgerOffloadMaxBlockSizeInBytes`|Size of block write|64 MB + `managedLedgerMinLedgerRolloverTimeMinutes`|Minimum time between ledger rollover for a topic

    **Note**: it is not recommended that you set this configuration in the production environment.|2 + `managedLedgerMaxEntriesPerLedger`|Maximum number of entries to append to a ledger before triggering a rollover.

    **Note**: it is not recommended that you set this configuration in the production environment.|5000 + +#### Bucket (required) + +A bucket is a basic container that holds your data. Everything you store in AWS S3 must be contained in a bucket. You can use a bucket to organize your data and control access to your data, but unlike directory and folder, you cannot nest a bucket. + +##### Example + +This example names the bucket as _pulsar-topic-offload_. + +```conf +s3ManagedLedgerOffloadBucket=pulsar-topic-offload +``` + +#### Bucket region + +A bucket region is a region where a bucket is located. If a bucket region is not specified, the **default** region (`US East (N. Virginia)`) is used. + +> #### Tip +> +> For more information about AWS regions and endpoints, see [here](https://docs.aws.amazon.com/general/latest/gr/rande.html). + +##### Example + +This example sets the bucket region as _europe-west-3_. + +``` +s3ManagedLedgerOffloadRegion=eu-west-3 +``` + +#### Authentication (required) + +To be able to access AWS S3, you need to authenticate with AWS S3. + +Pulsar does not provide any direct methods of configuring authentication for AWS S3, +but relies on the mechanisms supported by the +[DefaultAWSCredentialsProviderChain](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html). + +Once you have created a set of credentials in the AWS IAM console, you can configure credentials using one of the following methods. + +* Use EC2 instance metadata credentials. + + If you are on AWS instance with an instance profile that provides credentials, Pulsar uses these credentials if no other mechanism is provided. + +* Set the environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` in `conf/pulsar_env.sh`. + + "export" is important so that the variables are made available in the environment of spawned processes. + + ```bash + export AWS_ACCESS_KEY_ID=ABC123456789 + export AWS_SECRET_ACCESS_KEY=ded7db27a4558e2ea8bbf0bf37ae0e8521618f366c + ``` + +* Add the Java system properties `aws.accessKeyId` and `aws.secretKey` to `PULSAR_EXTRA_OPTS` in `conf/pulsar_env.sh`. + + ```bash + PULSAR_EXTRA_OPTS="${PULSAR_EXTRA_OPTS} ${PULSAR_MEM} ${PULSAR_GC} -Daws.accessKeyId=ABC123456789 -Daws.secretKey=ded7db27a4558e2ea8bbf0bf37ae0e8521618f366c -Dio.netty.leakDetectionLevel=disabled -Dio.netty.recycler.maxCapacity.default=1000 -Dio.netty.recycler.linkCapacity=1024" + ``` + +* Set the access credentials in `~/.aws/credentials`. + + ```conf + [default] + aws_access_key_id=ABC123456789 + aws_secret_access_key=ded7db27a4558e2ea8bbf0bf37ae0e8521618f366c + ``` + +* Assume an IAM role. + + This example uses the `DefaultAWSCredentialsProviderChain` for assuming this role. + + The broker must be rebooted for credentials specified in `pulsar_env` to take effect. + + ```conf + s3ManagedLedgerOffloadRole= + s3ManagedLedgerOffloadRoleSessionName=pulsar-s3-offload + ``` + +#### Size of block read/write + +You can configure the size of a request sent to or read from AWS S3 in the configuration file `broker.conf` or `standalone.conf`. + +Configuration|Description|Default value +|---|---|--- +`s3ManagedLedgerOffloadReadBufferSizeInBytes`|Block size for each individual read when reading back data from AWS S3.|1 MB +`s3ManagedLedgerOffloadMaxBlockSizeInBytes`|Maximum size of a "part" sent during a multipart upload to GCS. It **cannot** be smaller than 5 MB. |64 MB + +### Configure AWS S3 offloader to run automatically + +Namespace policy can be configured to offload data automatically once a threshold is reached. The threshold is based on the size of data that a topic has stored on a Pulsar cluster. Once the topic reaches the threshold, an offloading operation is triggered automatically. + +Threshold value|Action +|---|--- +> 0 | It triggers the offloading operation if the topic storage reaches its threshold. += 0|It causes a broker to offload data as soon as possible. +< 0 |It disables automatic offloading operation. + +Automatic offloading runs when a new segment is added to a topic log. If you set the threshold on a namespace, but few messages are being produced to the topic, offloader does not work until the current segment is full. + +You can configure the threshold size using CLI tools, such as pulsar-admin. + +The offload configurations in `broker.conf` and `standalone.conf` are used for the namespaces that do not have namespace level offload policies. Each namespace can have its own offload policy. If you want to set offload policy for each namespace, use the command [`pulsar-admin namespaces set-offload-policies options`](http://pulsar.apache.org/tools/pulsar-admin/2.6.0-SNAPSHOT/#-em-set-offload-policies-em-) command. + +#### Example + +This example sets the AWS S3 offloader threshold size to 10 MB using pulsar-admin. + +```bash +bin/pulsar-admin namespaces set-offload-threshold --size 10M my-tenant/my-namespace +``` + +> #### Tip +> +> For more information about the `pulsar-admin namespaces set-offload-threshold options` command, including flags, descriptions, and default values, see [here](http://pulsar.apache.org/tools/pulsar-admin/2.6.0-SNAPSHOT/#-em-set-offload-threshold-em-). + +### Configure AWS S3 offloader to run manually + +For individual topics, you can trigger AWS S3 offloader manually using one of the following methods: + +- Use REST endpoint. + +- Use CLI tools (such as pulsar-admin). + + To trigger it via CLI tools, you need to specify the maximum amount of data (threshold) that should be retained on a Pulsar cluster for a topic. If the size of the topic data on the Pulsar cluster exceeds this threshold, segments from the topic are moved to AWS S3 until the threshold is no longer exceeded. Older segments are moved first. + +#### Example + +- This example triggers the AWS S3 offloader to run manually using pulsar-admin. + + ```bash + bin/pulsar-admin topics offload --size-threshold 10M my-tenant/my-namespace/topic1 + ``` + + **Output** + + ```bash + Offload triggered for persistent://my-tenant/my-namespace/topic1 for messages before 2:0:-1 + ``` + + > #### Tip + > + > For more information about the `pulsar-admin topics offload options` command, including flags, descriptions, and default values, see [here](http://pulsar.apache.org/tools/pulsar-admin/2.6.0-SNAPSHOT/#-em-offload-em-). + +- This example checks the AWS S3 offloader status using pulsar-admin. + + ```bash + bin/pulsar-admin topics offload-status persistent://my-tenant/my-namespace/topic1 + ``` + + **Output** + + ```bash + Offload is currently running + ``` + + To wait for the AWS S3 offloader to complete the job, add the `-w` flag. + + ```bash + bin/pulsar-admin topics offload-status -w persistent://my-tenant/my-namespace/topic1 + ``` + + **Output** + + ``` + Offload was a success + ``` + + + If there is an error in offloading, the error is propagated to the `pulsar-admin topics offload-status` command. + + ```bash + bin/pulsar-admin topics offload-status persistent://my-tenant/my-namespace/topic1 + ``` + + **Output** + + ``` + Error in offload + null + + Reason: Error offloading: org.apache.bookkeeper.mledger.ManagedLedgerException: java.util.concurrent.CompletionException: com.amazonaws.services.s3.model.AmazonS3Exception: Anonymous users cannot initiate multipart uploads. Please authenticate. (Service: Amazon S3; Status Code: 403; Error Code: AccessDenied; Request ID: 798758DE3F1776DF; S3 Extended Request ID: dhBFz/lZm1oiG/oBEepeNlhrtsDlzoOhocuYMpKihQGXe6EG8puRGOkK6UwqzVrMXTWBxxHcS+g=), S3 Extended Request ID: dhBFz/lZm1oiG/oBEepeNlhrtsDlzoOhocuYMpKihQGXe6EG8puRGOkK6UwqzVrMXTWBxxHcS+g= + ```` + + > #### Tip + > + > For more information about the `pulsar-admin topics offload-status options` command, including flags, descriptions, and default values, see [here](http://pulsar.apache.org/tools/pulsar-admin/2.6.0-SNAPSHOT/#-em-offload-status-em-). + +## Tutorial + +For the complete and step-by-step instructions on how to use the AWS S3 offloader with Pulsar, see [here](https://hub.streamnative.io/offloaders/aws-s3/2.5.1#usage). \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/tiered-storage-azure.md b/site2/website/versioned_docs/version-2.7.0/tiered-storage-azure.md new file mode 100644 index 00000000000000..7b15cb88ce3671 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/tiered-storage-azure.md @@ -0,0 +1,225 @@ +--- +id: version-2.7.0-tiered-storage-azure +title: Use Azure BlobStore offloader with Pulsar +sidebar_label: Azure BlobStore offloader +original_id: tiered-storage-azure +--- + +This chapter guides you through every step of installing and configuring the Azure BlobStore offloader and using it with Pulsar. + +## Installation + +Follow the steps below to install the Azure BlobStore offloader. + +### Prerequisite + +- Pulsar: 2.6.2 or later versions + +### Step + +This example uses Pulsar 2.6.2. + +1. Download the Pulsar tarball using one of the following ways: + + * Download from the [Apache mirror](https://archive.apache.org/dist/pulsar/pulsar-2.6.2/apache-pulsar-2.6.2-bin.tar.gz) + + * Download from the Pulsar [downloads page](https://pulsar.apache.org/download) + + * Use [wget](https://www.gnu.org/software/wget): + + ```shell + wget https://archive.apache.org/dist/pulsar/pulsar-2.6.2/apache-pulsar-2.6.2-bin.tar.gz + ``` + +2. Download and untar the Pulsar offloaders package. + + ```bash + wget https://downloads.apache.org/pulsar/pulsar-2.6.2/apache-pulsar-offloaders-2.6.2-bin.tar.gz + tar xvfz apache-pulsar-offloaders-2.6.2-bin.tar.gz + ``` + +3. Copy the Pulsar offloaders as `offloaders` in the Pulsar directory. + + ``` + mv apache-pulsar-offloaders-2.6.2/offloaders apache-pulsar-2.6.2/offloaders + + ls offloaders + ``` + + **Output** + + As shown from the output, Pulsar uses [Apache jclouds](https://jclouds.apache.org) to support [AWS S3](https://aws.amazon.com/s3/), [GCS](https://cloud.google.com/storage/) and [Azure](https://portal.azure.com/#home) for long term storage. + + ``` + tiered-storage-file-system-2.6.2.nar + tiered-storage-jcloud-2.6.2.nar + ``` + + > #### Note + > + > * If you are running Pulsar in a bare metal cluster, make sure that `offloaders` tarball is unzipped in every broker's Pulsar directory. + > + > * If you are running Pulsar in Docker or deploying Pulsar using a Docker image (such as K8s and DCOS), you can use the `apachepulsar/pulsar-all` image instead of the `apachepulsar/pulsar` image. `apachepulsar/pulsar-all` image has already bundled tiered storage offloaders. + +## Configuration + +> #### Note +> +> Before offloading data from BookKeeper to Azure BlobStore, you need to configure some properties of the Azure BlobStore offload driver. + +Besides, you can also configure the Azure BlobStore offloader to run it automatically or trigger it manually. + +### Configure Azure BlobStore offloader driver + +You can configure the Azure BlobStore offloader driver in the configuration file `broker.conf` or `standalone.conf`. + +- **Required** configurations are as below. + + Required configuration | Description | Example value + |---|---|--- + `managedLedgerOffloadDriver` | Offloader driver name | azureblob + `offloadersDirectory` | Offloader directory | offloaders + `managedLedgerOffloadBucket` | Bucket | pulsar-topic-offload + +- **Optional** configurations are as below. + + Optional | Description | Example value + |---|---|--- + `managedLedgerOffloadReadBufferSizeInBytes`|Size of block read|1 MB + `managedLedgerOffloadMaxBlockSizeInBytes`|Size of block write|64 MB + `managedLedgerMinLedgerRolloverTimeMinutes`|Minimum time between ledger rollover for a topic

    **Note**: it is not recommended that you set this configuration in the production environment.|2 + `managedLedgerMaxEntriesPerLedger`|Maximum number of entries to append to a ledger before triggering a rollover.

    **Note**: it is not recommended that you set this configuration in the production environment.|5000 + +#### Bucket (required) + +A bucket is a basic container that holds your data. Everything you store in Azure BlobStore must be contained in a bucket. You can use a bucket to organize your data and control access to your data, but unlike directory and folder, you cannot nest a bucket. + +##### Example + +This example names the bucket as _pulsar-topic-offload_. + +```conf +managedLedgerOffloadBucket=pulsar-topic-offload +``` + +#### Authentication (required) + +To be able to access Azure BlobStore, you need to authenticate with Azure BlobStore. + +* Set the environment variables `AZURE_STORAGE_ACCOUNT` and `AZURE_STORAGE_ACCESS_KEY` in `conf/pulsar_env.sh`. + + "export" is important so that the variables are made available in the environment of spawned processes. + + ```bash + export AZURE_STORAGE_ACCOUNT=ABC123456789 + export AZURE_STORAGE_ACCESS_KEY=ded7db27a4558e2ea8bbf0bf37ae0e8521618f366c + ``` + +#### Size of block read/write + +You can configure the size of a request sent to or read from Azure BlobStore in the configuration file `broker.conf` or `standalone.conf`. + +Configuration|Description|Default value +|---|---|--- +`managedLedgerOffloadReadBufferSizeInBytes`|Block size for each individual read when reading back data from Azure BlobStore store.|1 MB +`managedLedgerOffloadMaxBlockSizeInBytes`|Maximum size of a "part" sent during a multipart upload to Azure BlobStore store. It **cannot** be smaller than 5 MB. |64 MB + +### Configure Azure BlobStore offloader to run automatically + +Namespace policy can be configured to offload data automatically once a threshold is reached. The threshold is based on the size of data that a topic has stored on a Pulsar cluster. Once the topic reaches the threshold, an offloading operation is triggered automatically. + +Threshold value|Action +|---|--- +> 0 | It triggers the offloading operation if the topic storage reaches its threshold. += 0|It causes a broker to offload data as soon as possible. +< 0 |It disables automatic offloading operation. + +Automatic offloading runs when a new segment is added to a topic log. If you set the threshold on a namespace, but few messages are being produced to the topic, offloader does not work until the current segment is full. + +You can configure the threshold size using CLI tools, such as pulsar-admin. + +The offload configurations in `broker.conf` and `standalone.conf` are used for the namespaces that do not have namespace level offload policies. Each namespace can have its own offload policy. If you want to set offload policy for each namespace, use the command [`pulsar-admin namespaces set-offload-policies options`](http://pulsar.apache.org/tools/pulsar-admin/2.6.0-SNAPSHOT/#-em-set-offload-policies-em-) command. + +#### Example + +This example sets the Azure BlobStore offloader threshold size to 10 MB using pulsar-admin. + +```bash +bin/pulsar-admin namespaces set-offload-threshold --size 10M my-tenant/my-namespace +``` + +> #### Tip +> +> For more information about the `pulsar-admin namespaces set-offload-threshold options` command, including flags, descriptions, and default values, see [here](http://pulsar.apache.org/tools/pulsar-admin/2.6.0-SNAPSHOT/#-em-set-offload-threshold-em-). + +### Configure Azure BlobStore offloader to run manually + +For individual topics, you can trigger Azure BlobStore offloader manually using one of the following methods: + +- Use REST endpoint. + +- Use CLI tools (such as pulsar-admin). + + To trigger it via CLI tools, you need to specify the maximum amount of data (threshold) that should be retained on a Pulsar cluster for a topic. If the size of the topic data on the Pulsar cluster exceeds this threshold, segments from the topic are moved to Azure BlobStore until the threshold is no longer exceeded. Older segments are moved first. + +#### Example + +- This example triggers the Azure BlobStore offloader to run manually using pulsar-admin. + + ```bash + bin/pulsar-admin topics offload --size-threshold 10M my-tenant/my-namespace/topic1 + ``` + + **Output** + + ```bash + Offload triggered for persistent://my-tenant/my-namespace/topic1 for messages before 2:0:-1 + ``` + + > #### Tip + > + > For more information about the `pulsar-admin topics offload options` command, including flags, descriptions, and default values, see [here](http://pulsar.apache.org/tools/pulsar-admin/2.6.0-SNAPSHOT/#-em-offload-em-). + +- This example checks the Azure BlobStore offloader status using pulsar-admin. + + ```bash + bin/pulsar-admin topics offload-status persistent://my-tenant/my-namespace/topic1 + ``` + + **Output** + + ```bash + Offload is currently running + ``` + + To wait for the Azure BlobStore offloader to complete the job, add the `-w` flag. + + ```bash + bin/pulsar-admin topics offload-status -w persistent://my-tenant/my-namespace/topic1 + ``` + + **Output** + + ``` + Offload was a success + ``` + + + If there is an error in offloading, the error is propagated to the `pulsar-admin topics offload-status` command. + + ```bash + bin/pulsar-admin topics offload-status persistent://my-tenant/my-namespace/topic1 + ``` + + **Output** + + ``` + Error in offload + null + + Reason: Error offloading: org.apache.bookkeeper.mledger.ManagedLedgerException: + ```` + + > #### Tip + > + > For more information about the `pulsar-admin topics offload-status options` command, including flags, descriptions, and default values, see [here](http://pulsar.apache.org/tools/pulsar-admin/2.6.0-SNAPSHOT/#-em-offload-status-em-). diff --git a/site2/website/versioned_docs/version-2.7.0/tiered-storage-filesystem.md b/site2/website/versioned_docs/version-2.7.0/tiered-storage-filesystem.md new file mode 100644 index 00000000000000..0d84c15a7cf0e1 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/tiered-storage-filesystem.md @@ -0,0 +1,269 @@ +--- +id: version-2.7.0-tiered-storage-filesystem +title: Use filesystem offloader with Pulsar +sidebar_label: Filesystem offloader +original_id: tiered-storage-filesystem +--- + +This chapter guides you through every step of installing and configuring the filesystem offloader and using it with Pulsar. + +## Installation + +Follow the steps below to install the filesystem offloader. + +### Prerequisite + +- Pulsar: 2.4.2 or later versions + +- Hadoop: 3.x.x + +### Step + +This example uses Pulsar 2.5.1. + +1. Download the Pulsar tarball using one of the following ways: + + * Download from the [Apache mirror](https://archive.apache.org/dist/pulsar/pulsar-2.5.1/apache-pulsar-2.5.1-bin.tar.gz) + + * Download from the Pulsar [download page](https://pulsar.apache.org/download) + + * Use [wget](https://www.gnu.org/software/wget) + + ```shell + wget https://archive.apache.org/dist/pulsar/pulsar-2.5.1/apache-pulsar-2.5.1-bin.tar.gz + ``` + +2. Download and untar the Pulsar offloaders package. + + ```bash + wget https://downloads.apache.org/pulsar/pulsar-2.5.1/apache-pulsar-offloaders-2.5.1-bin.tar.gz + + tar xvfz apache-pulsar-offloaders-2.5.1-bin.tar.gz + ``` + + > #### Note + > + > * If you are running Pulsar in a bare metal cluster, make sure that `offloaders` tarball is unzipped in every broker's Pulsar directory. + > + > * If you are running Pulsar in Docker or deploying Pulsar using a Docker image (such as K8S and DCOS), you can use the `apachepulsar/pulsar-all` image instead of the `apachepulsar/pulsar` image. `apachepulsar/pulsar-all` image has already bundled tiered storage offloaders. + +3. Copy the Pulsar offloaders as `offloaders` in the Pulsar directory. + + ``` + mv apache-pulsar-offloaders-2.5.1/offloaders apache-pulsar-2.5.1/offloaders + + ls offloaders + ``` + + **Output** + + ``` + tiered-storage-file-system-2.5.1.nar + tiered-storage-jcloud-2.5.1.nar + ``` + + > #### Note + > + > * If you are running Pulsar in a bare metal cluster, make sure that `offloaders` tarball is unzipped in every broker's Pulsar directory. + > + > * If you are running Pulsar in Docker or deploying Pulsar using a Docker image (such as K8s and DCOS), you can use the `apachepulsar/pulsar-all` image instead of the `apachepulsar/pulsar` image. `apachepulsar/pulsar-all` image has already bundled tiered storage offloaders. + +## Configuration + +> #### Note +> +> Before offloading data from BookKeeper to filesystem, you need to configure some properties of the filesystem offloader driver. + +Besides, you can also configure the filesystem offloader to run it automatically or trigger it manually. + +### Configure filesystem offloader driver + +You can configure filesystem offloader driver in the configuration file `broker.conf` or `standalone.conf`. + +- **Required** configurations are as below. + + Required configuration | Description | Example value + |---|---|--- + `managedLedgerOffloadDriver` | Offloader driver name, which is case-insensitive. | filesystem + `fileSystemURI` | Connection address | hdfs://127.0.0.1:9000 + `offloadersDirectory` | Hadoop profile path | ../conf/filesystem_offload_core_site.xml + +- **Optional** configurations are as below. + + Optional configuration| Description | Example value + |---|---|--- + `managedLedgerMinLedgerRolloverTimeMinutes`|Minimum time between ledger rollover for a topic

    **Note**: it is not recommended that you set this configuration in the production environment.|2 + `managedLedgerMaxEntriesPerLedger`|Maximum number of entries to append to a ledger before triggering a rollover.

    **Note**: it is not recommended that you set this configuration in the production environment.|5000 + +#### Offloader driver (required) + +Offloader driver name, which is case-insensitive. + +This example sets the offloader driver name as _filesystem_. + +```conf +managedLedgerOffloadDriver=filesystem +``` + +#### Connection address (required) + +Connection address is the URI to access the default Hadoop distributed file system. + +##### Example + +This example sets the connection address as _hdfs://127.0.0.1:9000_. + +```conf +fileSystemURI=hdfs://127.0.0.1:9000 +``` + +#### Hadoop profile path (required) + +The configuration file is stored in the Hadoop profile path. It contains various settings for Hadoop performance tuning. + +##### Example + +This example sets the Hadoop profile path as _../conf/filesystem_offload_core_site.xml_. + +```conf +fileSystemProfilePath=../conf/filesystem_offload_core_site.xml +``` + +You can set the following configurations in the _filesystem_offload_core_site.xml_ file. + +``` + + fs.defaultFS + + + + + hadoop.tmp.dir + pulsar + + + + io.file.buffer.size + 4096 + + + + io.seqfile.compress.blocksize + 1000000 + + + + io.seqfile.compression.type + BLOCK + + + + io.map.index.interval + 128 + +``` + +> #### Tip +> +> For more information about the Hadoop HDFS, see [here](https://hadoop.apache.org/docs/current/). + +### Configure filesystem offloader to run automatically + +Namespace policy can be configured to offload data automatically once a threshold is reached. The threshold is based on the size of data that a topic has stored on a Pulsar cluster. Once the topic reaches the threshold, an offload operation is triggered automatically. + +Threshold value|Action +|---|--- +> 0 | It triggers the offloading operation if the topic storage reaches its threshold. += 0|It causes a broker to offload data as soon as possible. +< 0 |It disables automatic offloading operation. + +Automatic offload runs when a new segment is added to a topic log. If you set the threshold on a namespace, but few messages are being produced to the topic, offloader does not work until the current segment is full. + +You can configure the threshold size using CLI tools, such as pulsar-admin. + +#### Example + +This example sets the filesystem offloader threshold size to 10 MB using pulsar-admin. + +```bash +pulsar-admin namespaces set-offload-threshold --size 10M my-tenant/my-namespace +``` + +> #### Tip +> +> For more information about the `pulsar-admin namespaces set-offload-threshold options` command, including flags, descriptions, default values, and shorthands, see [here](reference-pulsar-admin.md#set-offload-threshold). + +### Configure filesystem offloader to run manually + +For individual topics, you can trigger filesystem offloader manually using one of the following methods: + +- Use REST endpoint. + +- Use CLI tools (such as pulsar-admin). + +To trigger via CLI tools, you need to specify the maximum amount of data (threshold) that should be retained on a Pulsar cluster for a topic. If the size of the topic data on the Pulsar cluster exceeds this threshold, segments from the topic are offloaded to the filesystem until the threshold is no longer exceeded. Older segments are offloaded first. + +#### Example + +- This example triggers the filesystem offloader to run manually using pulsar-admin. + + ```bash + pulsar-admin topics offload --size-threshold 10M persistent://my-tenant/my-namespace/topic1 + ``` + + **Output** + + ```bash + Offload triggered for persistent://my-tenant/my-namespace/topic1 for messages before 2:0:-1 + ``` + + > #### Tip + > + > For more information about the `pulsar-admin topics offload options` command, including flags, descriptions, default values, and shorthands, see [here](reference-pulsar-admin.md#offload). + +- This example checks filesystem offloader status using pulsar-admin. + + ```bash + pulsar-admin topics offload-status persistent://my-tenant/my-namespace/topic1 + ``` + + **Output** + + ```bash + Offload is currently running + ``` + + To wait for the filesystem to complete the job, add the `-w` flag. + + ```bash + pulsar-admin topics offload-status -w persistent://my-tenant/my-namespace/topic1 + ``` + + **Output** + + ``` + Offload was a success + ``` + + If there is an error in the offloading operation, the error is propagated to the `pulsar-admin topics offload-status` command. + + ```bash + pulsar-admin topics offload-status persistent://my-tenant/my-namespace/topic1 + ``` + + **Output** + + ``` + Error in offload + null + + Reason: Error offloading: org.apache.bookkeeper.mledger.ManagedLedgerException: java.util.concurrent.CompletionException: com.amazonaws.services.s3.model.AmazonS3Exception: Anonymous users cannot initiate multipart uploads. Please authenticate. (Service: Amazon S3; Status Code: 403; Error Code: AccessDenied; Request ID: 798758DE3F1776DF; S3 Extended Request ID: dhBFz/lZm1oiG/oBEepeNlhrtsDlzoOhocuYMpKihQGXe6EG8puRGOkK6UwqzVrMXTWBxxHcS+g=), S3 Extended Request ID: dhBFz/lZm1oiG/oBEepeNlhrtsDlzoOhocuYMpKihQGXe6EG8puRGOkK6UwqzVrMXTWBxxHcS+g= + ```` + + > #### Tip + > + > For more information about the `pulsar-admin topics offload-status options` command, including flags, descriptions, default values, and shorthands, see [here](reference-pulsar-admin.md#offload-status). + +## Tutorial + +For the complete and step-by-step instructions on how to use the filesystem offloader with Pulsar, see [here](https://hub.streamnative.io/offloaders/filesystem/2.5.1). \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/tiered-storage-gcs.md b/site2/website/versioned_docs/version-2.7.0/tiered-storage-gcs.md new file mode 100644 index 00000000000000..eadecd04b6eb82 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/tiered-storage-gcs.md @@ -0,0 +1,274 @@ +--- +id: version-2.7.0-tiered-storage-gcs +title: Use GCS offloader with Pulsar +sidebar_label: GCS offloader +original_id: tiered-storage-gcs +--- + +This chapter guides you through every step of installing and configuring the GCS offloader and using it with Pulsar. + +## Installation + +Follow the steps below to install the GCS offloader. + +### Prerequisite + +- Pulsar: 2.4.2 or later versions + +### Step + +This example uses Pulsar 2.5.1. + +1. Download the Pulsar tarball using one of the following ways: + + * Download from the [Apache mirror](https://archive.apache.org/dist/pulsar/pulsar-2.5.1/apache-pulsar-2.5.1-bin.tar.gz) + + * Download from the Pulsar [download page](https://pulsar.apache.org/download) + + * Use [wget](https://www.gnu.org/software/wget) + + ```shell + wget https://archive.apache.org/dist/pulsar/pulsar-2.5.1/apache-pulsar-2.5.1-bin.tar.gz + ``` + +2. Download and untar the Pulsar offloaders package. + + ```bash + wget https://downloads.apache.org/pulsar/pulsar-2.5.1/apache-pulsar-offloaders-2.5.1-bin.tar.gz + + tar xvfz apache-pulsar-offloaders-2.5.1-bin.tar.gz + ``` + + > #### Note + > + > * If you are running Pulsar in a bare metal cluster, make sure that `offloaders` tarball is unzipped in every broker's Pulsar directory. + > + > * If you are running Pulsar in Docker or deploying Pulsar using a Docker image (such as K8S and DCOS), you can use the `apachepulsar/pulsar-all` image instead of the `apachepulsar/pulsar` image. `apachepulsar/pulsar-all` image has already bundled tiered storage offloaders. + +3. Copy the Pulsar offloaders as `offloaders` in the Pulsar directory. + + ``` + mv apache-pulsar-offloaders-2.5.1/offloaders apache-pulsar-2.5.1/offloaders + + ls offloaders + ``` + + **Output** + + As shown in the output, Pulsar uses [Apache jclouds](https://jclouds.apache.org) to support GCS and AWS S3 for long term storage. + + + ``` + tiered-storage-file-system-2.5.1.nar + tiered-storage-jcloud-2.5.1.nar + ``` + +## Configuration + +> #### Note +> +> Before offloading data from BookKeeper to GCS, you need to configure some properties of the GCS offloader driver. + +Besides, you can also configure the GCS offloader to run it automatically or trigger it manually. + +### Configure GCS offloader driver + +You can configure GCS offloader driver in the configuration file `broker.conf` or `standalone.conf`. + +- **Required** configurations are as below. + + **Required** configuration | Description | Example value + |---|---|--- + `managedLedgerOffloadDriver`|Offloader driver name, which is case-insensitive.|google-cloud-storage + `offloadersDirectory`|Offloader directory|offloaders + `gcsManagedLedgerOffloadBucket`|Bucket|pulsar-topic-offload + `gcsManagedLedgerOffloadRegion`|Bucket region|europe-west3 + `gcsManagedLedgerOffloadServiceAccountKeyFile`|Authentication |/Users/user-name/Downloads/project-804d5e6a6f33.json + +- **Optional** configurations are as below. + + Optional configuration|Description|Example value + |---|---|--- + `gcsManagedLedgerOffloadReadBufferSizeInBytes`|Size of block read|1 MB + `gcsManagedLedgerOffloadMaxBlockSizeInBytes`|Size of block write|64 MB + `managedLedgerMinLedgerRolloverTimeMinutes`|Minimum time between ledger rollover for a topic.|2 + `managedLedgerMaxEntriesPerLedger`|Max number of entries to append to a ledger before triggering a rollover.|5000 + +#### Bucket (required) + +A bucket is a basic container that holds your data. Everything you store in GCS **must** be contained in a bucket. You can use a bucket to organize your data and control access to your data, but unlike directory and folder, you can not nest a bucket. + +##### Example + +This example names the bucket as _pulsar-topic-offload_. + +```conf +gcsManagedLedgerOffloadBucket=pulsar-topic-offload +``` + +#### Bucket region (required) + +Bucket region is the region where a bucket is located. If a bucket region is not specified, the **default** region (`us multi-regional location`) is used. + +> #### Tip +> +> For more information about bucket location, see [here](https://cloud.google.com/storage/docs/bucket-locations). + +##### Example + +This example sets the bucket region as _europe-west3_. + +``` +gcsManagedLedgerOffloadRegion=europe-west3 +``` + +#### Authentication (required) + +To enable a broker access GCS, you need to configure `gcsManagedLedgerOffloadServiceAccountKeyFile` in the configuration file `broker.conf`. + +`gcsManagedLedgerOffloadServiceAccountKeyFile` is +a JSON file, containing GCS credentials of a service account. + +##### Example + +To generate service account credentials or view the public credentials that you've already generated, follow the following steps. + +1. Navigate to the [Service accounts page](https://console.developers.google.com/iam-admin/serviceaccounts). + +2. Select a project or create a new one. + +3. Click **Create service account**. + +4. In the **Create service account** window, type a name for the service account and select **Furnish a new private key**. + + If you want to [grant G Suite domain-wide authority](https://developers.google.com/identity/protocols/OAuth2ServiceAccount#delegatingauthority) to the service account, select **Enable G Suite Domain-wide Delegation**. + +5. Click **Create**. + + > #### Note + > + > Make sure the service account you create has permission to operate GCS, you need to assign **Storage Admin** permission to your service account [here](https://cloud.google.com/storage/docs/access-control/iam). + +6. You can get the following information and set this in `broker.conf`. + + ```conf + gcsManagedLedgerOffloadServiceAccountKeyFile="/Users/user-name/Downloads/project-804d5e6a6f33.json" + ``` + + > #### Tip + > + > - For more information about how to create `gcsManagedLedgerOffloadServiceAccountKeyFile`, see [here](https://support.google.com/googleapi/answer/6158849). + > + > - For more information about Google Cloud IAM, see [here](https://cloud.google.com/storage/docs/access-control/iam). + +#### Size of block read/write + +You can configure the size of a request sent to or read from GCS in the configuration file `broker.conf`. + +Configuration|Description +|---|--- +`gcsManagedLedgerOffloadReadBufferSizeInBytes`|Block size for each individual read when reading back data from GCS.

    The **default** value is 1 MB. +`gcsManagedLedgerOffloadMaxBlockSizeInBytes`|Maximum size of a "part" sent during a multipart upload to GCS.

    It **can not** be smaller than 5 MB.

    The **default** value is 64 MB. + +### Configure GCS offloader to run automatically + +Namespace policy can be configured to offload data automatically once a threshold is reached. The threshold is based on the size of data that a topic has stored on a Pulsar cluster. Once the topic reaches the threshold, an offload operation is triggered automatically. + +Threshold value|Action +|---|--- +> 0 | It triggers the offloading operation if the topic storage reaches its threshold. += 0|It causes a broker to offload data as soon as possible. +< 0 |It disables automatic offloading operation. + +Automatic offloading runs when a new segment is added to a topic log. If you set the threshold on a namespace, but few messages are being produced to the topic, offloader does not work until the current segment is full. + +You can configure the threshold size using CLI tools, such as pulsar-admin. + +The offload configurations in `broker.conf` and `standalone.conf` are used for the namespaces that do not have namespace level offload policies. Each namespace can have its own offload policy. If you want to set offload policy for each namespace, use the command [`pulsar-admin namespaces set-offload-policies options`](http://pulsar.apache.org/tools/pulsar-admin/2.6.0-SNAPSHOT/#-em-set-offload-policies-em-) command. + +#### Example + +This example sets the GCS offloader threshold size to 10 MB using pulsar-admin. + +```bash +pulsar-admin namespaces set-offload-threshold --size 10M my-tenant/my-namespace +``` + +> #### Tip +> +> For more information about the `pulsar-admin namespaces set-offload-threshold options` command, including flags, descriptions, default values, and shorthands, see [here](reference-pulsar-admin.md#set-offload-threshold). + +### Configure GCS offloader to run manually + +For individual topics, you can trigger GCS offloader manually using one of the following methods: + +- Use REST endpoint. + +- Use CLI tools (such as pulsar-admin). + + To trigger the GCS via CLI tools, you need to specify the maximum amount of data (threshold) that should be retained on a Pulsar cluster for a topic. If the size of the topic data on the Pulsar cluster exceeds this threshold, segments from the topic are moved to GCS until the threshold is no longer exceeded. Older segments are moved first. + +#### Example + +- This example triggers the GCS offloader to run manually using pulsar-admin with the command `pulsar-admin topics offload (topic-name) (threshold)`. + + ```bash + pulsar-admin topics offload persistent://my-tenant/my-namespace/topic1 10M + ``` + + **Output** + + ```bash + Offload triggered for persistent://my-tenant/my-namespace/topic1 for messages before 2:0:-1 + ``` + + > #### Tip + > + > For more information about the `pulsar-admin topics offload options` command, including flags, descriptions, default values, and shorthands, see [here]((reference-pulsar-admin.md#offload). + +- This example checks the GCS offloader status using pulsar-admin with the command `pulsar-admin topics offload-status options`. + + ```bash + pulsar-admin topics offload-status persistent://my-tenant/my-namespace/topic1 + ``` + + **Output** + + ```bash + Offload is currently running + ``` + + To wait for GCS to complete the job, add the `-w` flag. + + ```bash + pulsar-admin topics offload-status -w persistent://my-tenant/my-namespace/topic1 + ``` + + **Output** + + ``` + Offload was a success + ``` + + If there is an error in offloading, the error is propagated to the `pulsar-admin topics offload-status` command. + + ```bash + pulsar-admin topics offload-status persistent://my-tenant/my-namespace/topic1 + ``` + + **Output** + + ``` + Error in offload + null + + Reason: Error offloading: org.apache.bookkeeper.mledger.ManagedLedgerException: java.util.concurrent.CompletionException: com.amazonaws.services.s3.model.AmazonS3Exception: Anonymous users cannot initiate multipart uploads. Please authenticate. (Service: Amazon S3; Status Code: 403; Error Code: AccessDenied; Request ID: 798758DE3F1776DF; S3 Extended Request ID: dhBFz/lZm1oiG/oBEepeNlhrtsDlzoOhocuYMpKihQGXe6EG8puRGOkK6UwqzVrMXTWBxxHcS+g=), S3 Extended Request ID: dhBFz/lZm1oiG/oBEepeNlhrtsDlzoOhocuYMpKihQGXe6EG8puRGOkK6UwqzVrMXTWBxxHcS+g= + ```` + + > #### Tip + > + > For more information about the `pulsar-admin topics offload-status options` command, including flags, descriptions, default values, and shorthands, see [here](reference-pulsar-admin.md#offload-status). + +## Tutorial + +For the complete and step-by-step instructions on how to use the GCS offloader with Pulsar, see [here](https://hub.streamnative.io/offloaders/gcs/2.5.1#usage). \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/tiered-storage-overview.md b/site2/website/versioned_docs/version-2.7.0/tiered-storage-overview.md new file mode 100644 index 00000000000000..97313dfe646db8 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/tiered-storage-overview.md @@ -0,0 +1,50 @@ +--- +id: version-2.7.0-tiered-storage-overview +title: Overview of tiered storage +sidebar_label: Overview +original_id: tiered-storage-overview +--- + +Pulsar's **Tiered Storage** feature allows older backlog data to be moved from BookKeeper to long term and cheaper storage, while still allowing clients to access the backlog as if nothing has changed. + +* Tiered storage uses [Apache jclouds](https://jclouds.apache.org) to support +[Amazon S3](https://aws.amazon.com/s3/) and [GCS (Google Cloud Storage)](https://cloud.google.com/storage/) for long term storage. + + With jclouds, it is easy to add support for more +[cloud storage providers](https://jclouds.apache.org/reference/providers/#blobstore-providers) in the future. + + > #### Tip + > + > For more information about how to use the AWS S3 offloader with Pulsar, see [here](tiered-storage-aws.md). + > + > For more information about how to use the GCS offloader with Pulsar, see [here](tiered-storage-gcs.md). + +* Tiered storage uses [Apache Hadoop](http://hadoop.apache.org/) to support filesystems for long term storage. + + With Hadoop, it is easy to add support for more filesystems in the future. + + > #### Tip + > + > For more information about how to use the filesystem offloader with Pulsar, see [here](tiered-storage-filesystem.md). + +## When to use tiered storage? + +Tiered storage should be used when you have a topic for which you want to keep a very long backlog for a long time. + +For example, if you have a topic containing user actions which you use to train your recommendation systems, you may want to keep that data for a long time, so that if you change your recommendation algorithm, you can rerun it against your full user history. + +## How does tiered storage work? + +A topic in Pulsar is backed by a **log**, known as a **managed ledger**. This log is composed of an ordered list of segments. Pulsar only writes to the final segment of the log. All previous segments are sealed. The data within the segment is immutable. This is known as a **segment oriented architecture**. + +![Tiered storage](assets/pulsar-tiered-storage.png "Tiered Storage") + +The tiered storage offloading mechanism takes advantage of segment oriented architecture. When offloading is requested, the segments of the log are copied one-by-one to tiered storage. All segments of the log (apart from the current segment) written to tiered storage can be offloaded. + +Data written to BookKeeper is replicated to 3 physical machines by default. However, once a segment is sealed in BookKeeper, it becomes immutable and can be copied to long term storage. Long term storage can achieve cost savings by using mechanisms such as [Reed-Solomon error correction](https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction) to require fewer physical copies of data. + +Before offloading ledgers to long term storage, you need to configure buckets, credentials, and other properties for the cloud storage service. Additionally, Pulsar uses multi-part objects to upload the segment data and brokers may crash while uploading the data. It is recommended that you add a life cycle rule for your bucket to expire incomplete multi-part upload after a day or two days to avoid getting charged for incomplete uploads. Moreover, you can trigger the offloading operation manually (via REST API or CLI) or automatically (via CLI). + +After offloading ledgers to long term storage, you can still query data in the offloaded ledgers with Pulsar SQL. + +For more information about tiered storage for Pulsar topics, see [here](https://github.com/apache/pulsar/wiki/PIP-17:-Tiered-storage-for-Pulsar-topics). diff --git a/site2/website/versioned_docs/version-2.7.0/transaction-api.md b/site2/website/versioned_docs/version-2.7.0/transaction-api.md new file mode 100644 index 00000000000000..0eeea492c9b0f9 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/transaction-api.md @@ -0,0 +1,150 @@ +--- +id: version-2.7.0-transactions-api +title: Transactions API (Developer Preview) +sidebar_label: Transactions API +original_id: transactions-api +--- + +All messages in a transaction is available only to consumers after the transaction is committed. If a transaction is aborted, all the writes and acknowledgments in this transaction rollback. + +Currently, Pulsar transaction is a developer preview feature. It is disabled by default. You can enable the feature and use transactions in your application in development environment. + +## Prerequisites +1. To enable transactions in Pulsar, you need to configure the parameter in the `broker.conf` file. + +``` +transactionCoordinatorEnabled=true +``` + +2. Initialize transaction coordinator metadata, so the transaction coordinators can leverage advantages of the partitioned topic, such as load balance. + +``` +bin/pulsar initialize-transaction-coordinator-metadata -cs 127.0.0.1:2181 -c standalone +``` + +After initializing transaction coordinator metadata, you can use the transactions API. The following APIs are available. + +## Initialize Pulsar client + +You can enable transaction for transaction client and initialize transaction coordinator client. + +``` +PulsarClient pulsarClient = PulsarClient.builder() + .serviceUrl("pulsar://localhost:6650") + .enableTransaction(true) + .build(); +``` + +## Start transactions +You can start transaction in the following way. + +``` +Transaction txn = pulsarClient + .newTransaction() + .withTransactionTimeout(5, TimeUnit.MINUTES) + .build() + .get(); +``` + +## Produce transaction messages + +A transaction parameter is required when producing new transaction messages. The semantic of the transaction messages in Pulsar is `read-committed`, so the consumer cannot receive the ongoing transaction messages before the transaction is committed. + +``` +producer.newMessage(txn).value("Hello Pulsar Transaction".getBytes()).sendAsync(); +``` + +## Acknowledge the messages with the transaction + +The transaction acknowledgement requires a transaction parameter. The transaction acknowledgement marks the messages state to pending-ack state. When the transaction is committed, the pending-ack state becomes ack state. If the transaction is aborted, the pending-ack state becomes unack state. + +``` +Message message = consumer.receive(); +consumer.acknowledgeAsync(message.getMessageId(), txn); +``` + +## Commit transactions + +When the transaction is committed, consumers receive the transaction messages and the pending-ack state becomes ack state. + +``` +txn.commit().get(); +``` + +## Abort transaction + +When the transaction is aborted, the transaction acknowledgement is canceled and the pending-ack messages are redelivered. + +``` +txn.abort().get(); +``` + +### Example +The following example shows how messages are processed in transaction. + +``` +PulsarClient pulsarClient = PulsarClient.builder() + .serviceUrl(getPulsarServiceList().get(0).getBrokerServiceUrl()) + .statsInterval(0, TimeUnit.SECONDS) + .enableTransaction(true) + .build(); + +String sourceTopic = "public/default/source-topic"; +String sinkTopic = "public/default/sink-topic"; + +Producer sourceProducer = pulsarClient + .newProducer(Schema.STRING) + .topic(sourceTopic) + .create(); +sourceProducer.newMessage().value("hello pulsar transaction").sendAsync(); + +Consumer sourceConsumer = pulsarClient + .newConsumer(Schema.STRING) + .topic(sourceTopic) + .subscriptionName("test") + .subscriptionType(SubscriptionType.Shared) + .subscriptionInitialPosition(SubscriptionInitialPosition.Earliest) + .subscribe(); + +Producer sinkProducer = pulsarClient + .newProducer(Schema.STRING) + .topic(sinkTopic) + .create(); + +Transaction txn = pulsarClient + .newTransaction() + .withTransactionTimeout(5, TimeUnit.MINUTES) + .build() + .get(); + +// source message acknowledgement and sink message produce belong to one transaction, +// they are combined into an atomic operation. +Message message = sourceConsumer.receive(); +sourceConsumer.acknowledgeAsync(message.getMessageId(), txn); +sinkProducer.newMessage(txn).value("sink data").sendAsync(); + +txn.commit().get(); +``` + +## Enable batch messages in transactions + +To enable batch messages in transactions, you need to enable the batch index acknowledgement feature. The transaction acks check whether the batch index acknowledgement conflicts. + +To enable batch index acknowledgement, you need to set `acknowledgmentAtBatchIndexLevelEnabled` to `true` in the `broker.conf` or `standalone.conf` file. + +``` +acknowledgmentAtBatchIndexLevelEnabled=true +``` + +And then you need to call the `enableBatchIndexAcknowledgment(true)` method in the consumer builder. + +``` +Consumer sinkConsumer = pulsarClient + .newConsumer() + .topic(transferTopic) + .subscriptionName("sink-topic") + .subscriptionInitialPosition(SubscriptionInitialPosition.Earliest) + .subscriptionType(SubscriptionType.Shared) + .enableBatchIndexAcknowledgment(true) // enable batch index acknowledgement + .subscribe(); +``` \ No newline at end of file diff --git a/site2/website/versioned_docs/version-2.7.0/transaction-guarantee.md b/site2/website/versioned_docs/version-2.7.0/transaction-guarantee.md new file mode 100644 index 00000000000000..132d3a76735c54 --- /dev/null +++ b/site2/website/versioned_docs/version-2.7.0/transaction-guarantee.md @@ -0,0 +1,17 @@ +--- +id: version-2.7.0-transactions-guarantee +title: Transactions Guarantee +sidebar_label: Transactions Guarantee +original_id: transactions-guarantee +--- + +Pulsar transactions support the following guarantee. + +## Atomic multi-partition writes and multi-subscription acknowledges +Transactions enable atomic writes to multiple topics and partitions. A batch of messages in a transaction can be received from, produced to, and acknowledged by many partitions. All the operations involved in a transaction succeed or fail as a single unit. + +## Read transactional message +All the messages in a transaction are available only for consumers until the transaction is committed. + +## Acknowledge transactional message +A message is acknowledged successfully only once by a consumer under the subscription when acknowledging the message with the transaction ID. \ No newline at end of file diff --git a/site2/website/versioned_sidebars/version-2.7.0-sidebars.json b/site2/website/versioned_sidebars/version-2.7.0-sidebars.json new file mode 100644 index 00000000000000..a06f65939af068 --- /dev/null +++ b/site2/website/versioned_sidebars/version-2.7.0-sidebars.json @@ -0,0 +1,160 @@ +{ + "version-2.7.0-docs": { + "Get started": [ + "version-2.7.0-standalone", + "version-2.7.0-standalone-docker", + "version-2.7.0-kubernetes-helm" + ], + "Concepts and Architecture": [ + "version-2.7.0-concepts-overview", + "version-2.7.0-concepts-messaging", + "version-2.7.0-concepts-architecture-overview", + "version-2.7.0-concepts-clients", + "version-2.7.0-concepts-replication", + "version-2.7.0-concepts-multi-tenancy", + "version-2.7.0-concepts-authentication", + "version-2.7.0-concepts-topic-compaction", + "version-2.7.0-concepts-proxy-sni-routing" + ], + "Pulsar Schema": [ + "version-2.7.0-schema-get-started", + "version-2.7.0-schema-understand", + "version-2.7.0-schema-evolution-compatibility", + "version-2.7.0-schema-manage" + ], + "Pulsar Functions": [ + "version-2.7.0-functions-overview", + "version-2.7.0-functions-worker", + "version-2.7.0-functions-runtime", + "version-2.7.0-functions-develop", + "version-2.7.0-functions-package", + "version-2.7.0-functions-debug", + "version-2.7.0-functions-deploy", + "version-2.7.0-functions-cli", + "version-2.7.0-window-functions-context" + ], + "Pulsar IO": [ + "version-2.7.0-io-overview", + "version-2.7.0-io-quickstart", + "version-2.7.0-io-use", + "version-2.7.0-io-debug", + "version-2.7.0-io-connectors", + "version-2.7.0-io-cdc", + "version-2.7.0-io-develop", + "version-2.7.0-io-cli" + ], + "Pulsar SQL": [ + "version-2.7.0-sql-overview", + "version-2.7.0-sql-getting-started", + "version-2.7.0-sql-deployment-configurations", + "version-2.7.0-sql-rest-api" + ], + "Tiered storage": [ + "version-2.7.0-tiered-storage-overview", + "version-2.7.0-tiered-storage-aws", + "version-2.7.0-tiered-storage-gcs", + "version-2.7.0-tiered-storage-filesystem" + ], + "Transactions": [ + "version-2.7.0-transactions", + "version-2.7.0-transactions-guarantee", + "version-2.7.0-transactions-api" + ], + "Kubernetes (Helm)": [ + "version-2.7.0-helm-overview", + "version-2.7.0-helm-prepare", + "version-2.7.0-helm-install", + "version-2.7.0-helm-deploy", + "version-2.7.0-helm-upgrade", + "version-2.7.0-helm-tools" + ], + "Deployment": [ + "version-2.7.0-deploy-aws", + "version-2.7.0-deploy-kubernetes", + "version-2.7.0-deploy-bare-metal", + "version-2.7.0-deploy-bare-metal-multi-cluster", + "version-2.7.0-deploy-dcos", + "version-2.7.0-deploy-docker", + "version-2.7.0-deploy-monitoring" + ], + "Administration": [ + "version-2.7.0-administration-zk-bk", + "version-2.7.0-administration-geo", + "version-2.7.0-administration-pulsar-manager", + "version-2.7.0-administration-stats", + "version-2.7.0-administration-load-balance", + "version-2.7.0-administration-proxy", + "version-2.7.0-administration-upgrade" + ], + "Security": [ + "version-2.7.0-security-overview", + "version-2.7.0-security-tls-transport", + "version-2.7.0-security-tls-authentication", + "version-2.7.0-security-tls-keystore", + "version-2.7.0-security-jwt", + "version-2.7.0-security-athenz", + "version-2.7.0-security-kerberos", + "version-2.7.0-security-oauth2", + "version-2.7.0-security-authorization", + "version-2.7.0-security-encryption", + "version-2.7.0-security-extending", + "version-2.7.0-security-bouncy-castle" + ], + "Performance": [ + "version-2.7.0-performance-pulsar-perf" + ], + "Client libraries": [ + "version-2.7.0-client-libraries-java", + "version-2.7.0-client-libraries-go", + "version-2.7.0-client-libraries-python", + "version-2.7.0-client-libraries-cpp", + "version-2.7.0-client-libraries-node", + "version-2.7.0-client-libraries-websocket", + "version-2.7.0-client-libraries-dotnet" + ], + "Admin API": [ + "version-2.7.0-admin-api-overview", + "version-2.7.0-admin-api-clusters", + "version-2.7.0-admin-api-tenants", + "version-2.7.0-admin-api-brokers", + "version-2.7.0-admin-api-namespaces", + "version-2.7.0-admin-api-permissions", + "version-2.7.0-admin-api-topics", + "version-2.7.0-admin-api-schemas", + "version-2.7.0-admin-api-functions" + ], + "Adaptors": [ + "version-2.7.0-adaptors-kafka", + "version-2.7.0-adaptors-spark", + "version-2.7.0-adaptors-storm" + ], + "Cookbooks": [ + "version-2.7.0-cookbooks-compaction", + "version-2.7.0-cookbooks-deduplication", + "version-2.7.0-cookbooks-non-persistent", + "version-2.7.0-cookbooks-retention-expiry", + "version-2.7.0-cookbooks-encryption", + "version-2.7.0-cookbooks-message-queue", + "version-2.7.0-cookbooks-bookkeepermetadata" + ], + "Development": [ + "version-2.7.0-develop-tools", + "version-2.7.0-develop-binary-protocol", + "version-2.7.0-develop-schema", + "version-2.7.0-develop-load-manager", + "version-2.7.0-develop-cpp" + ], + "Reference": [ + "version-2.7.0-reference-terminology", + "version-2.7.0-reference-cli-tools", + "version-2.7.0-reference-configuration", + "version-2.7.0-reference-metrics" + ] + }, + "version-2.7.0-docs-other": { + "First Category": [ + "version-2.7.0-doc4", + "version-2.7.0-doc5" + ] + } +} diff --git a/site2/website/versions.json b/site2/website/versions.json index 266922a424056c..5b704ba4b42932 100644 --- a/site2/website/versions.json +++ b/site2/website/versions.json @@ -1,4 +1,5 @@ [ + "2.7.0", "2.6.2", "2.6.1", "2.6.0",