-
Notifications
You must be signed in to change notification settings - Fork 49
Expand file tree
/
Copy path__init__.py
More file actions
1596 lines (1332 loc) · 62.8 KB
/
__init__.py
File metadata and controls
1596 lines (1332 loc) · 62.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
"""
The Pulsar Python client library is based on the existing C++ client library.
All the same features are exposed through the Python interface.
Currently, the supported Python versions are 3.7, 3.8, 3.9 and 3.10.
=================
Install from PyPI
=================
Download Python wheel binary files for macOS and Linux directly from
the PyPI archive:
.. code-block:: shell
sudo pip install pulsar-client
========================
Install from source code
========================
Read the instructions on `source code repository
<https://github.com/apache/pulsar-client-python#install-the-python-wheel>`_.
"""
import logging
import _pulsar
from _pulsar import Result, CompressionType, ConsumerType, InitialPosition, PartitionsRoutingMode, BatchingType, \
LoggerLevel, BatchReceivePolicy # noqa: F401
from pulsar.__about__ import __version__
from pulsar.exceptions import *
from pulsar.functions.function import Function
from pulsar.functions.context import Context
from pulsar.functions.serde import SerDe, IdentitySerDe, PickleSerDe
from pulsar import schema
_schema = schema
import re
_retype = type(re.compile('x'))
from datetime import timedelta
class MessageId:
"""
Represents a message id.
Attributes
----------
earliest:
Represents the earliest message stored in a topic
latest:
Represents the latest message published on a topic
"""
def __init__(self, partition=-1, ledger_id=-1, entry_id=-1, batch_index=-1):
self._msg_id = _pulsar.MessageId(partition, ledger_id, entry_id, batch_index)
earliest = _pulsar.MessageId.earliest
latest = _pulsar.MessageId.latest
def ledger_id(self):
return self._msg_id.ledger_id()
def entry_id(self):
return self._msg_id.entry_id()
def batch_index(self):
return self._msg_id.batch_index()
def partition(self):
return self._msg_id.partition()
def serialize(self):
"""
Returns a bytes representation of the message id.
This byte sequence can be stored and later deserialized.
"""
return self._msg_id.serialize()
@staticmethod
def deserialize(message_id_bytes):
"""
Deserialize a message id object from a previously
serialized bytes sequence.
"""
return _pulsar.MessageId.deserialize(message_id_bytes)
class Message:
"""
Message objects are returned by a consumer, either by calling `receive` or
through a listener.
"""
def data(self):
"""
Returns object typed bytes with the payload of the message.
"""
return self._message.data()
def value(self):
"""
Returns object with the de-serialized version of the message content
"""
return self._schema.decode_message(self._message)
def properties(self):
"""
Return the properties attached to the message. Properties are
application-defined key/value pairs that will be attached to the
message.
"""
return self._message.properties()
def partition_key(self):
"""
Get the partitioning key for the message.
"""
return self._message.partition_key()
def publish_timestamp(self):
"""
Get the timestamp in milliseconds with the message publish time.
"""
return self._message.publish_timestamp()
def event_timestamp(self):
"""
Get the timestamp in milliseconds with the message event time.
"""
return self._message.event_timestamp()
def message_id(self):
"""
The message ID that can be used to refer to this particular message.
"""
return self._message.message_id()
def topic_name(self):
"""
Get the topic Name from which this message originated from
"""
return self._message.topic_name()
def redelivery_count(self):
"""
Get the redelivery count for this message
"""
return self._message.redelivery_count()
def schema_version(self):
"""
Get the schema version for this message
"""
return self._message.schema_version()
@staticmethod
def _wrap(_message):
self = Message()
self._message = _message
return self
class MessageBatch:
def __init__(self):
self._msg_batch = _pulsar.MessageBatch()
def with_message_id(self, msg_id):
if not isinstance(msg_id, _pulsar.MessageId):
if isinstance(msg_id, MessageId):
msg_id = msg_id._msg_id
else:
raise TypeError("unknown message id type")
self._msg_batch.with_message_id(msg_id)
return self
def parse_from(self, data, size):
self._msg_batch.parse_from(data, size)
_msgs = self._msg_batch.messages()
return list(map(Message._wrap, _msgs))
class Authentication:
"""
Authentication provider object. Used to load authentication from an external
shared library.
"""
def __init__(self, dynamicLibPath, authParamsString):
"""
Create the authentication provider instance.
Parameters
----------
dynamicLibPath: str
Path to the authentication provider shared library (such as ``tls.so``)
authParamsString: str
Comma-separated list of provider-specific configuration params
"""
_check_type(str, dynamicLibPath, 'dynamicLibPath')
_check_type(str, authParamsString, 'authParamsString')
self.auth = _pulsar.Authentication.create(dynamicLibPath, authParamsString)
class AuthenticationTLS(Authentication):
"""
TLS Authentication implementation
"""
def __init__(self, certificate_path, private_key_path):
"""
Create the TLS authentication provider instance.
Parameters
----------
certificate_path: str
Path to the public certificate
private_key_path: str
Path to private TLS key
"""
_check_type(str, certificate_path, 'certificate_path')
_check_type(str, private_key_path, 'private_key_path')
self.auth = _pulsar.AuthenticationTLS.create(certificate_path, private_key_path)
class AuthenticationToken(Authentication):
"""
Token based authentication implementation
"""
def __init__(self, token):
"""
Create the token authentication provider instance.
Parameters
----------
token
A string containing the token or a functions that provides a string with the token
"""
if not (isinstance(token, str) or callable(token)):
raise ValueError("Argument token is expected to be of type 'str' or a function returning 'str'")
self.auth = _pulsar.AuthenticationToken.create(token)
class AuthenticationAthenz(Authentication):
"""
Athenz Authentication implementation
"""
def __init__(self, auth_params_string):
"""
Create the Athenz authentication provider instance.
Parameters
----------
auth_params_string: str
JSON encoded configuration for Athenz client
"""
_check_type(str, auth_params_string, 'auth_params_string')
self.auth = _pulsar.AuthenticationAthenz.create(auth_params_string)
class AuthenticationOauth2(Authentication):
"""
Oauth2 Authentication implementation
"""
def __init__(self, auth_params_string: str):
"""
Create the Oauth2 authentication provider instance.
You can create the instance by setting the necessary fields in the JSON string.
.. code-block:: python
auth = AuthenticationOauth2('{"issuer_url": "xxx", "private_key": "yyy"}')
The valid JSON fields are:
* issuer_url (required)
The URL of the authentication provider which allows the Pulsar client to obtain an
access token.
* private_key (required)
The URL to the JSON credentials file. It supports the following pattern formats:
* ``/path/to/file``
* ``file:///path/to/file``
* ``file:/path/to/file``
* ``data:application/json;base64,<base64-encoded-value>``
The file content or the based64 encoded value is the encoded JSON string that contains
the following fields:
* ``client_id``
* ``client_secret``
* audience
The OAuth 2.0 "resource server" identifier for a Pulsar cluster.
* scope
The scope of an access request.
Parameters
----------
auth_params_string : str
JSON encoded configuration for Oauth2 client
"""
_check_type(str, auth_params_string, 'auth_params_string')
self.auth = _pulsar.AuthenticationOauth2.create(auth_params_string)
class AuthenticationBasic(Authentication):
"""
Basic Authentication implementation
"""
def __init__(self, username=None, password=None, method='basic', auth_params_string=None):
"""
Create the Basic authentication provider instance.
For example, if you want to create a basic authentication instance whose
username is "my-user" and password is "my-pass", there are two ways:
.. code-block:: python
auth = AuthenticationBasic('my-user', 'my-pass')
auth = AuthenticationBasic(auth_params_string='{"username": "my-user", "password": "my-pass"}')
Parameters
----------
username : str, optional
password : str, optional
method : str, default='basic'
The authentication method name
auth_params_string : str, optional
The JSON presentation of all fields above. If it's not None, the other parameters will be ignored.
Here is an example JSON presentation:
{"username": "my-user", "password": "my-pass", "method": "oms3.0"}
The ``username`` and ``password`` fields are required. If the "method" field is not set, it will be
"basic" by default.
"""
if auth_params_string is not None:
_check_type(str, auth_params_string, 'auth_params_string')
self.auth = _pulsar.AuthenticationBasic.create(auth_params_string)
else:
_check_type(str, username, 'username')
_check_type(str, password, 'password')
_check_type(str, method, 'method')
self.auth = _pulsar.AuthenticationBasic.create(username, password, method)
class Client:
"""
The Pulsar client. A single client instance can be used to create producers
and consumers on multiple topics.
The client will share the same connection pool and threads across all
producers and consumers.
"""
def __init__(self, service_url,
authentication=None,
operation_timeout_seconds=30,
io_threads=1,
message_listener_threads=1,
concurrent_lookup_requests=50000,
log_conf_file_path=None,
use_tls=False,
tls_trust_certs_file_path=None,
tls_allow_insecure_connection=False,
tls_validate_hostname=True,
logger=None,
connection_timeout_ms=10000,
listener_name=None
):
"""
Create a new Pulsar client instance.
Parameters
----------
service_url: str
The Pulsar service url eg: pulsar://my-broker.com:6650/
authentication: Authentication, optional
Set the authentication provider to be used with the broker. Supported methods:
* `AuthenticationTLS`
* `AuthenticationToken`
* `AuthenticationAthenz`
* `AuthenticationOauth2`
operation_timeout_seconds: int, default=30
Set timeout on client operations (subscribe, create producer, close, unsubscribe).
io_threads: int, default=1
Set the number of IO threads to be used by the Pulsar client.
message_listener_threads: int, default=1
Set the number of threads to be used by the Pulsar client when delivering messages through
message listener. The default is 1 thread per Pulsar client. If using more than 1 thread,
messages for distinct ``message_listener``s will be delivered in different threads, however a
single ``MessageListener`` will always be assigned to the same thread.
concurrent_lookup_requests: int, default=50000
Number of concurrent lookup-requests allowed on each broker connection to prevent overload
on the broker.
log_conf_file_path: str, optional
Initialize log4cxx from a configuration file.
use_tls: bool, default=False
Configure whether to use TLS encryption on the connection. This setting is deprecated.
TLS will be automatically enabled if the ``serviceUrl`` is set to ``pulsar+ssl://`` or ``https://``
tls_trust_certs_file_path: str, optional
Set the path to the trusted TLS certificate file. If empty defaults to certifi.
tls_allow_insecure_connection: bool, default=False
Configure whether the Pulsar client accepts untrusted TLS certificates from the broker.
tls_validate_hostname: bool, default=True
Configure whether the Pulsar client validates that the hostname of the endpoint,
matches the common name on the TLS certificate presented by the endpoint.
logger: optional
Set a Python logger for this Pulsar client. Should be an instance of `logging.Logger`.
connection_timeout_ms: int, default=10000
Set timeout in milliseconds on TCP connections.
listener_name: str, optional
Listener name for lookup. Clients can use listenerName to choose one of the listeners as
the service URL to create a connection to the broker as long as the network is accessible.
``advertisedListeners`` must be enabled in broker side.
"""
_check_type(str, service_url, 'service_url')
_check_type_or_none(Authentication, authentication, 'authentication')
_check_type(int, operation_timeout_seconds, 'operation_timeout_seconds')
_check_type(int, connection_timeout_ms, 'connection_timeout_ms')
_check_type(int, io_threads, 'io_threads')
_check_type(int, message_listener_threads, 'message_listener_threads')
_check_type(int, concurrent_lookup_requests, 'concurrent_lookup_requests')
_check_type_or_none(str, log_conf_file_path, 'log_conf_file_path')
_check_type(bool, use_tls, 'use_tls')
_check_type_or_none(str, tls_trust_certs_file_path, 'tls_trust_certs_file_path')
_check_type(bool, tls_allow_insecure_connection, 'tls_allow_insecure_connection')
_check_type(bool, tls_validate_hostname, 'tls_validate_hostname')
_check_type_or_none(str, listener_name, 'listener_name')
conf = _pulsar.ClientConfiguration()
if authentication:
conf.authentication(authentication.auth)
conf.operation_timeout_seconds(operation_timeout_seconds)
conf.connection_timeout(connection_timeout_ms)
conf.io_threads(io_threads)
conf.message_listener_threads(message_listener_threads)
conf.concurrent_lookup_requests(concurrent_lookup_requests)
if log_conf_file_path:
conf.log_conf_file_path(log_conf_file_path)
if isinstance(logger, logging.Logger):
conf.set_logger(self._prepare_logger(logger))
elif isinstance(logger, ConsoleLogger):
conf.set_console_logger(logger.log_level)
elif isinstance(logger, FileLogger):
conf.set_file_logger(logger.log_level, logger.log_file)
elif logger is not None:
raise ValueError("Logger is expected to be either None, logger.Logger, pulsar.ConsoleLogger or pulsar.FileLogger")
if listener_name:
conf.listener_name(listener_name)
if use_tls or service_url.startswith('pulsar+ssl://') or service_url.startswith('https://'):
conf.use_tls(True)
if tls_trust_certs_file_path:
conf.tls_trust_certs_file_path(tls_trust_certs_file_path)
else:
import certifi
conf.tls_trust_certs_file_path(certifi.where())
conf.tls_allow_insecure_connection(tls_allow_insecure_connection)
conf.tls_validate_hostname(tls_validate_hostname)
self._client = _pulsar.Client(service_url, conf)
self._consumers = []
@staticmethod
def _prepare_logger(logger):
import logging
def log(level, message):
old_threads = logging.logThreads
logging.logThreads = False
logger.log(logging.getLevelName(level), message)
logging.logThreads = old_threads
return log
def create_producer(self, topic,
producer_name=None,
schema=schema.BytesSchema(),
initial_sequence_id=None,
send_timeout_millis=30000,
compression_type=CompressionType.NONE,
max_pending_messages=1000,
max_pending_messages_across_partitions=50000,
block_if_queue_full=False,
batching_enabled=False,
batching_max_messages=1000,
batching_max_allowed_size_in_bytes=128*1024,
batching_max_publish_delay_ms=10,
chunking_enabled=False,
message_routing_mode=PartitionsRoutingMode.RoundRobinDistribution,
lazy_start_partitioned_producers=False,
properties=None,
batching_type=BatchingType.Default,
encryption_key=None,
crypto_key_reader=None
):
"""
Create a new producer on a given topic.
Parameters
----------
topic: str
The topic name
producer_name: str, optional
Specify a name for the producer. If not assigned, the system will generate a globally unique name
which can be accessed with `Producer.producer_name()`. When specifying a name, it is app to the user
to ensure that, for a given topic, the producer name is unique across all Pulsar's clusters.
schema: pulsar.schema.Schema, default=pulsar.schema.BytesSchema
Define the schema of the data that will be published by this producer, e.g,
``schema=JsonSchema(MyRecordClass)``.
The schema will be used for two purposes:
* Validate the data format against the topic defined schema
* Perform serialization/deserialization between data and objects
initial_sequence_id: int, optional
Set the baseline for the sequence ids for messages published by the producer. First message will be
using ``(initialSequenceId + 1)`` as its sequence id and subsequent messages will be assigned
incremental sequence ids, if not otherwise specified.
send_timeout_millis: int, default=30000
If a message is not acknowledged by the server before the ``send_timeout`` expires, an error will be reported.
compression_type: CompressionType, default=CompressionType.NONE
Set the compression type for the producer. By default, message payloads are not compressed.
Supported compression types:
* CompressionType.LZ4
* CompressionType.ZLib
* CompressionType.ZSTD
* CompressionType.SNAPPY
ZSTD is supported since Pulsar 2.3. Consumers will need to be at least at that release in order to
be able to receive messages compressed with ZSTD.
SNAPPY is supported since Pulsar 2.4. Consumers will need to be at least at that release in order to
be able to receive messages compressed with SNAPPY.
max_pending_messages: int, default=1000
Set the max size of the queue holding the messages pending to receive an acknowledgment from the broker.
max_pending_messages_across_partitions: int, default=50000
Set the max size of the queue holding the messages pending to receive an acknowledgment across partitions
from the broker.
block_if_queue_full: bool, default=False
Set whether `send_async` operations should block when the outgoing message queue is full.
message_routing_mode: PartitionsRoutingMode, default=PartitionsRoutingMode.RoundRobinDistribution
Set the message routing mode for the partitioned producer.
Supported modes:
* ``PartitionsRoutingMode.RoundRobinDistribution``
* ``PartitionsRoutingMode.UseSinglePartition``
lazy_start_partitioned_producers: bool, default=False
This config affects producers of partitioned topics only. It controls whether producers register
and connect immediately to the owner broker of each partition or start lazily on demand. The internal
producer of one partition is always started eagerly, chosen by the routing policy, but the internal
producers of any additional partitions are started on demand, upon receiving their first message.
Using this mode can reduce the strain on brokers for topics with large numbers of partitions and when
the SinglePartition routing policy is used without keyed messages. Because producer connection can be
on demand, this can produce extra send latency for the first messages of a given partition.
properties: dict, optional
Sets the properties for the producer. The properties associated with a producer can be used for identify
a producer at broker side.
batching_type: BatchingType, default=BatchingType.Default
Sets the batching type for the producer.
There are two batching type: DefaultBatching and KeyBasedBatching.
DefaultBatching will batch single messages:
(k1, v1), (k2, v1), (k3, v1), (k1, v2), (k2, v2), (k3, v2), (k1, v3), (k2, v3), (k3, v3)
... into single batch message:
[(k1, v1), (k2, v1), (k3, v1), (k1, v2), (k2, v2), (k3, v2), (k1, v3), (k2, v3), (k3, v3)]
KeyBasedBatching will batch incoming single messages:
(k1, v1), (k2, v1), (k3, v1), (k1, v2), (k2, v2), (k3, v2), (k1, v3), (k2, v3), (k3, v3)
... into single batch message:
[(k1, v1), (k1, v2), (k1, v3)], [(k2, v1), (k2, v2), (k2, v3)], [(k3, v1), (k3, v2), (k3, v3)]
chunking_enabled: bool, default=False
If message size is higher than allowed max publish-payload size by broker then chunking_enabled helps
producer to split message into multiple chunks and publish them to broker separately and in order.
So, it allows client to successfully publish large size of messages in pulsar.
encryption_key: str, optional
The key used for symmetric encryption, configured on the producer side
crypto_key_reader: CryptoKeyReader, optional
Symmetric encryption class implementation, configuring public key encryption messages for the producer
and private key decryption messages for the consumer
"""
_check_type(str, topic, 'topic')
_check_type_or_none(str, producer_name, 'producer_name')
_check_type(_schema.Schema, schema, 'schema')
_check_type_or_none(int, initial_sequence_id, 'initial_sequence_id')
_check_type(int, send_timeout_millis, 'send_timeout_millis')
_check_type(CompressionType, compression_type, 'compression_type')
_check_type(int, max_pending_messages, 'max_pending_messages')
_check_type(int, max_pending_messages_across_partitions, 'max_pending_messages_across_partitions')
_check_type(bool, block_if_queue_full, 'block_if_queue_full')
_check_type(bool, batching_enabled, 'batching_enabled')
_check_type(int, batching_max_messages, 'batching_max_messages')
_check_type(int, batching_max_allowed_size_in_bytes, 'batching_max_allowed_size_in_bytes')
_check_type(int, batching_max_publish_delay_ms, 'batching_max_publish_delay_ms')
_check_type(bool, chunking_enabled, 'chunking_enabled')
_check_type_or_none(dict, properties, 'properties')
_check_type(BatchingType, batching_type, 'batching_type')
_check_type_or_none(str, encryption_key, 'encryption_key')
_check_type_or_none(CryptoKeyReader, crypto_key_reader, 'crypto_key_reader')
_check_type(bool, lazy_start_partitioned_producers, 'lazy_start_partitioned_producers')
conf = _pulsar.ProducerConfiguration()
conf.send_timeout_millis(send_timeout_millis)
conf.compression_type(compression_type)
conf.max_pending_messages(max_pending_messages)
conf.max_pending_messages_across_partitions(max_pending_messages_across_partitions)
conf.block_if_queue_full(block_if_queue_full)
conf.batching_enabled(batching_enabled)
conf.batching_max_messages(batching_max_messages)
conf.batching_max_allowed_size_in_bytes(batching_max_allowed_size_in_bytes)
conf.batching_max_publish_delay_ms(batching_max_publish_delay_ms)
conf.partitions_routing_mode(message_routing_mode)
conf.batching_type(batching_type)
conf.chunking_enabled(chunking_enabled)
conf.lazy_start_partitioned_producers(lazy_start_partitioned_producers)
if producer_name:
conf.producer_name(producer_name)
if initial_sequence_id:
conf.initial_sequence_id(initial_sequence_id)
if properties:
for k, v in properties.items():
conf.property(k, v)
conf.schema(schema.schema_info())
if encryption_key:
conf.encryption_key(encryption_key)
if crypto_key_reader:
conf.crypto_key_reader(crypto_key_reader.cryptoKeyReader)
if batching_enabled and chunking_enabled:
raise ValueError("Batching and chunking of messages can't be enabled together.")
p = Producer()
p._producer = self._client.create_producer(topic, conf)
p._schema = schema
p._client = self._client
return p
def subscribe(self, topic, subscription_name,
consumer_type=ConsumerType.Exclusive,
schema=schema.BytesSchema(),
message_listener=None,
receiver_queue_size=1000,
max_total_receiver_queue_size_across_partitions=50000,
consumer_name=None,
unacked_messages_timeout_ms=None,
broker_consumer_stats_cache_time_ms=30000,
negative_ack_redelivery_delay_ms=60000,
is_read_compacted=False,
properties=None,
pattern_auto_discovery_period=60,
initial_position=InitialPosition.Latest,
crypto_key_reader=None,
replicate_subscription_state_enabled=False,
max_pending_chunked_message=10,
auto_ack_oldest_chunked_message_on_queue_full=False,
start_message_id_inclusive=False,
batch_receive_policy=None
):
"""
Subscribe to the given topic and subscription combination.
Parameters
----------
topic:
The name of the topic, list of topics or regex pattern. This method will accept these forms:
* ``topic='my-topic'``
* ``topic=['topic-1', 'topic-2', 'topic-3']``
* ``topic=re.compile('persistent://public/default/topic-*')``
subscription_name: str
The name of the subscription.
consumer_type: ConsumerType, default=ConsumerType.Exclusive
Select the subscription type to be used when subscribing to the topic.
schema: pulsar.schema.Schema, default=pulsar.schema.BytesSchema
Define the schema of the data that will be received by this consumer.
message_listener: optional
Sets a message listener for the consumer. When the listener is set, the application will
receive messages through it. Calls to ``consumer.receive()`` will not be allowed.
The listener function needs to accept (consumer, message), for example:
.. code-block:: python
def my_listener(consumer, message):
# process message
consumer.acknowledge(message)
receiver_queue_size: int, default=1000
Sets the size of the consumer receive queue. The consumer receive queue controls how many messages can be
accumulated by the consumer before the application calls `receive()`. Using a higher value could potentially
increase the consumer throughput at the expense of higher memory utilization. Setting the consumer queue
size to zero decreases the throughput of the consumer by disabling pre-fetching of messages.
This approach improves the message distribution on shared subscription by pushing messages only to those
consumers that are ready to process them. Neither receive with timeout nor partitioned topics can be used
if the consumer queue size is zero. The `receive()` function call should not be interrupted when the
consumer queue size is zero. The default value is 1000 messages and should work well for most use cases.
max_total_receiver_queue_size_across_partitions: int, default=50000
Set the max total receiver queue size across partitions. This setting will be used to reduce the
receiver queue size for individual partitions
consumer_name: str, optional
Sets the consumer name.
unacked_messages_timeout_ms: int, optional
Sets the timeout in milliseconds for unacknowledged messages. The timeout needs to be greater than
10 seconds. An exception is thrown if the given value is less than 10 seconds. If a successful
acknowledgement is not sent within the timeout, all the unacknowledged messages are redelivered.
negative_ack_redelivery_delay_ms: int, default=60000
The delay after which to redeliver the messages that failed to be processed
(with the ``consumer.negative_acknowledge()``)
broker_consumer_stats_cache_time_ms: int, default=30000
Sets the time duration for which the broker-side consumer stats will be cached in the client.
is_read_compacted: bool, default=False
Selects whether to read the compacted version of the topic
properties: dict, optional
Sets the properties for the consumer. The properties associated with a consumer can be used for
identify a consumer at broker side.
pattern_auto_discovery_period: int, default=60
Periods of seconds for consumer to auto discover match topics.
initial_position: InitialPosition, default=InitialPosition.Latest
Set the initial position of a consumer when subscribing to the topic.
It could be either: ``InitialPosition.Earliest`` or ``InitialPosition.Latest``.
crypto_key_reader: CryptoKeyReader, optional
Symmetric encryption class implementation, configuring public key encryption messages for the producer
and private key decryption messages for the consumer
replicate_subscription_state_enabled: bool, default=False
Set whether the subscription status should be replicated.
max_pending_chunked_message: int, default=10
Consumer buffers chunk messages into memory until it receives all the chunks of the original message.
While consuming chunk-messages, chunks from same message might not be contiguous in the stream, and they
might be mixed with other messages' chunks. so, consumer has to maintain multiple buffers to manage
chunks coming from different messages. This mainly happens when multiple publishers are publishing
messages on the topic concurrently or publisher failed to publish all chunks of the messages.
If it's zero, the pending chunked messages will not be limited.
auto_ack_oldest_chunked_message_on_queue_full: bool, default=False
Buffering large number of outstanding uncompleted chunked messages can create memory pressure, and it
can be guarded by providing the maxPendingChunkedMessage threshold. See setMaxPendingChunkedMessage.
Once, consumer reaches this threshold, it drops the outstanding unchunked-messages by silently acking
if autoAckOldestChunkedMessageOnQueueFull is true else it marks them for redelivery.
start_message_id_inclusive: bool, default=False
Set the consumer to include the given position of any reset operation like Consumer::seek.
batch_receive_policy: class ConsumerBatchReceivePolicy
Set the batch collection policy for batch receiving.
"""
_check_type(str, subscription_name, 'subscription_name')
_check_type(ConsumerType, consumer_type, 'consumer_type')
_check_type(_schema.Schema, schema, 'schema')
_check_type(int, receiver_queue_size, 'receiver_queue_size')
_check_type(int, max_total_receiver_queue_size_across_partitions,
'max_total_receiver_queue_size_across_partitions')
_check_type_or_none(str, consumer_name, 'consumer_name')
_check_type_or_none(int, unacked_messages_timeout_ms, 'unacked_messages_timeout_ms')
_check_type(int, broker_consumer_stats_cache_time_ms, 'broker_consumer_stats_cache_time_ms')
_check_type(int, negative_ack_redelivery_delay_ms, 'negative_ack_redelivery_delay_ms')
_check_type(int, pattern_auto_discovery_period, 'pattern_auto_discovery_period')
_check_type(bool, is_read_compacted, 'is_read_compacted')
_check_type_or_none(dict, properties, 'properties')
_check_type(InitialPosition, initial_position, 'initial_position')
_check_type_or_none(CryptoKeyReader, crypto_key_reader, 'crypto_key_reader')
_check_type(int, max_pending_chunked_message, 'max_pending_chunked_message')
_check_type(bool, auto_ack_oldest_chunked_message_on_queue_full, 'auto_ack_oldest_chunked_message_on_queue_full')
_check_type(bool, start_message_id_inclusive, 'start_message_id_inclusive')
_check_type_or_none(ConsumerBatchReceivePolicy, batch_receive_policy, 'batch_receive_policy')
conf = _pulsar.ConsumerConfiguration()
conf.consumer_type(consumer_type)
conf.read_compacted(is_read_compacted)
if message_listener:
conf.message_listener(_listener_wrapper(message_listener, schema))
conf.receiver_queue_size(receiver_queue_size)
conf.max_total_receiver_queue_size_across_partitions(max_total_receiver_queue_size_across_partitions)
if consumer_name:
conf.consumer_name(consumer_name)
if unacked_messages_timeout_ms:
conf.unacked_messages_timeout_ms(unacked_messages_timeout_ms)
conf.negative_ack_redelivery_delay_ms(negative_ack_redelivery_delay_ms)
conf.broker_consumer_stats_cache_time_ms(broker_consumer_stats_cache_time_ms)
if properties:
for k, v in properties.items():
conf.property(k, v)
conf.subscription_initial_position(initial_position)
conf.schema(schema.schema_info())
if crypto_key_reader:
conf.crypto_key_reader(crypto_key_reader.cryptoKeyReader)
conf.replicate_subscription_state_enabled(replicate_subscription_state_enabled)
conf.max_pending_chunked_message(max_pending_chunked_message)
conf.auto_ack_oldest_chunked_message_on_queue_full(auto_ack_oldest_chunked_message_on_queue_full)
conf.start_message_id_inclusive(start_message_id_inclusive)
if batch_receive_policy:
conf.batch_receive_policy(batch_receive_policy.policy())
c = Consumer()
if isinstance(topic, str):
# Single topic
c._consumer = self._client.subscribe(topic, subscription_name, conf)
elif isinstance(topic, list):
# List of topics
c._consumer = self._client.subscribe_topics(topic, subscription_name, conf)
elif isinstance(topic, _retype):
# Regex pattern
c._consumer = self._client.subscribe_pattern(topic.pattern, subscription_name, conf)
else:
raise ValueError("Argument 'topic' is expected to be of a type between (str, list, re.pattern)")
c._client = self
c._schema = schema
c._schema.attach_client(self._client)
self._consumers.append(c)
return c
def create_reader(self, topic, start_message_id,
schema=schema.BytesSchema(),
reader_listener=None,
receiver_queue_size=1000,
reader_name=None,
subscription_role_prefix=None,
is_read_compacted=False,
crypto_key_reader=None
):
"""
Create a reader on a particular topic
Parameters
----------
topic:
The name of the topic.
start_message_id:
The initial reader positioning is done by specifying a message id. The options are:
* ``MessageId.earliest``:
Start reading from the earliest message available in the topic
* ``MessageId.latest``:
Start reading from the end topic, only getting messages published after the reader was created
* ``MessageId``:
When passing a particular message id, the reader will position itself on that specific position.
The first message to be read will be the message next to the specified messageId.
Message id can be serialized into a string and deserialized back into a `MessageId` object:
.. code-block:: python
# Serialize to string
s = msg.message_id().serialize()
# Deserialize from string
msg_id = MessageId.deserialize(s)
schema: pulsar.schema.Schema, default=pulsar.schema.BytesSchema
Define the schema of the data that will be received by this reader.
reader_listener: optional
Sets a message listener for the reader. When the listener is set, the application will receive messages
through it. Calls to ``reader.read_next()`` will not be allowed. The listener function needs to accept
(reader, message), for example:
.. code-block:: python
def my_listener(reader, message):
# process message
pass
receiver_queue_size: int, default=1000
Sets the size of the reader receive queue. The reader receive queue controls how many messages can be
accumulated by the reader before the application calls `read_next()`. Using a higher value could
potentially increase the reader throughput at the expense of higher memory utilization.
reader_name: str, optional
Sets the reader name.
subscription_role_prefix: str, optional
Sets the subscription role prefix.
is_read_compacted: bool, default=False
Selects whether to read the compacted version of the topic
crypto_key_reader: CryptoKeyReader, optional
Symmetric encryption class implementation, configuring public key encryption messages for the producer
and private key decryption messages for the consumer
"""
# If a pulsar.MessageId object is passed, access the _pulsar.MessageId object
if isinstance(start_message_id, MessageId):
start_message_id = start_message_id._msg_id
_check_type(str, topic, 'topic')
_check_type(_pulsar.MessageId, start_message_id, 'start_message_id')
_check_type(_schema.Schema, schema, 'schema')
_check_type(int, receiver_queue_size, 'receiver_queue_size')
_check_type_or_none(str, reader_name, 'reader_name')
_check_type_or_none(str, subscription_role_prefix, 'subscription_role_prefix')
_check_type(bool, is_read_compacted, 'is_read_compacted')
_check_type_or_none(CryptoKeyReader, crypto_key_reader, 'crypto_key_reader')
conf = _pulsar.ReaderConfiguration()
if reader_listener:
conf.reader_listener(_listener_wrapper(reader_listener, schema))
conf.receiver_queue_size(receiver_queue_size)
if reader_name:
conf.reader_name(reader_name)
if subscription_role_prefix:
conf.subscription_role_prefix(subscription_role_prefix)
conf.schema(schema.schema_info())
conf.read_compacted(is_read_compacted)
if crypto_key_reader:
conf.crypto_key_reader(crypto_key_reader.cryptoKeyReader)
c = Reader()
c._reader = self._client.create_reader(topic, start_message_id, conf)
c._client = self
c._schema = schema
c._schema.attach_client(self._client)
self._consumers.append(c)
return c
def get_topic_partitions(self, topic):
"""
Get the list of partitions for a given topic.
If the topic is partitioned, this will return a list of partition names. If the topic is not
partitioned, the returned list will contain the topic name itself.
This can be used to discover the partitions and create Reader, Consumer or Producer
instances directly on a particular partition.
Parameters
----------
topic: str
the topic name to lookup
Returns
-------
list
a list of partition name
"""
_check_type(str, topic, 'topic')
return self._client.get_topic_partitions(topic)
def shutdown(self):
"""
Perform immediate shutdown of Pulsar client.
Release all resources and close all producer, consumer, and readers without waiting
for ongoing operations to complete.
"""
self._client.shutdown()
def close(self):
"""
Close the client and all the associated producers and consumers
"""
self._client.close()
class Producer:
"""
The Pulsar message producer, used to publish messages on a topic.
Examples
--------
.. code-block:: python
import pulsar