From 3a32907cbd0e4d6324ca15341c1d20fea5d61fe9 Mon Sep 17 00:00:00 2001 From: yijxie Date: Wed, 21 Aug 2019 19:17:00 -0700 Subject: [PATCH 01/39] Draft EventProcessor Loadbalancing --- .../eventprocessor/_ownership_manager.py | 120 ++++++++++ .../eventprocessor/event_processor.py | 207 +++++++++--------- .../eventprocessor/partition_processor.py | 15 +- .../sqlite3_partition_manager.py | 95 ++++---- 4 files changed, 287 insertions(+), 150 deletions(-) create mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py new file mode 100644 index 000000000000..710e71ab8439 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py @@ -0,0 +1,120 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +from typing import List, Iterable, Any, Dict +import time +import random +import math +from collections import Counter +from azure.eventhub.aio import EventHubClient + + +class OwnershipManager(object): + """Increases or decreases the number of partitions owned by an EventProcessor + so the number of owned partitions are balanced among multiple EventProcessors + + An EventProcessor calls claim_ownership() of this class every x seconds, + where x is set by keyword argument "polling_interval" in EventProcessor, + to claim the ownership of partitions, create tasks for the claimed ownership, and cancel tasks that no longer belong + to the claimed ownership. + + """ + def __init__(self, event_processor, eventhub_client: EventHubClient, ownership_timeout: int): + self.all_parition_ids = [] + self.eventhub_client = eventhub_client + self.eventhub_name = eventhub_client.eh_name + self.consumer_group_name = event_processor._consumer_group_name + self.owner_id = event_processor._id + self.partition_manager = event_processor._partition_manager + self.ownership_timeout = ownership_timeout + + async def claim_ownership(self): + """Claims ownership for this EventProcessor + 1. Retrieves all partition ids of an event hub from azure event hub service + 2. Retrieves current ownership list via this EventProcessor's PartitionManager. + 3. Searches claimable partitions for this EventProcessor. Refer to claim_ownership() for details. + 4. Claims the ownership for the claimable partitions + + :return: List[Dict[Any]] + """ + if not self.all_parition_ids: + await self._retrieve_partition_ids() + to_claim = await self._balance_ownership() + claimed_list = await self._claim_ownership(to_claim) + return claimed_list + + async def _retrieve_partition_ids(self): + """List all partition ids of the event hub that the EventProcessor is working on. + + :return: List[str] + """ + self.all_parition_ids = await self.eventhub_client.get_partition_ids() + + async def _balance_ownership(self): + ownership_list = await self.partition_manager.list_ownership(self.eventhub_client.eh_name, self.consumer_group_name) + ownership_dict = dict((x["partition_id"], x) for x in ownership_list) # put the list to dict for fast lookup + ''' + now = time.time() + partition_ids_no_ownership = list(filter(lambda x: x not in ownership_dict, self.all_parition_ids)) + inactive_ownership = filter(lambda x: x["last_modified_time"] + self.ownership_timeout < now, ownership_list) + claimable_partition_ids = partition_ids_no_ownership + [x["partition_id"] for x in inactive_ownership] + active_ownership = list(filter(lambda x: x["last_modified_time"] + self.ownership_timeout >= now, ownership_list)) + active_ownership_count_group_by_owner = Counter([x["owner_id"] for x in active_ownership]) + active_ownership_self = list(filter(lambda x: x["owner_id"] == self.owner_id, active_ownership)) + ''' + claimable_partition_ids = [] + active_ownership_self = [] + active_ownership_count_group_by_owner = Counter() + for partition_id in self.all_parition_ids: + ownership = ownership_dict.get(partition_id) + if not ownership: # no ownership found for this partition. So it is claimable + claimable_partition_ids.append(partition_id) + else: + last_modified_time = ownership["last_modified_time"] + owner_id = ownership["owner_id"] + now = time.time() + if now > self.ownership_timeout + last_modified_time: # ownership timed out. So it is claimable + claimable_partition_ids.append(partition_id) + else: # the ownership is still active + if owner_id == self.owner_id: # partition is actively owned by this running EventProcessor + active_ownership_self.append(ownership) + active_ownership_count_group_by_owner[owner_id] = active_ownership_count_group_by_owner.get(owner_id, 0) + 1 # all active owners + + # calculate expected count per owner + all_partition_count = len(self.all_parition_ids) + owners_count = len(active_ownership_count_group_by_owner) + (1 if self.owner_id not in active_ownership_count_group_by_owner else 0) + expected_count_per_owner = all_partition_count // owners_count + most_count_allowed_per_owner = math.ceil(all_partition_count / owners_count) + # end of calculating expected count per owner + + to_claim = active_ownership_self + if len(active_ownership_self) > most_count_allowed_per_owner: # needs to abandon a partition + to_claim.pop() # abandon one partition if owned too many + # TODO: Release a ownership immediately so other EventProcessors won't need to wait it to timeout + elif len(active_ownership_self) < expected_count_per_owner: # Either claims an inactive partition, or steals from other owners + if claimable_partition_ids: # claim an inactive partition if there is + random_partition_id = random.choice(claimable_partition_ids) + random_chosen_to_claim = ownership_dict.get(random_partition_id, + {"partition_id": random_partition_id, + "eventhub_name": self.eventhub_client.eh_name, + "consumer_group_name": self.consumer_group_name, + "owner_level": 0}) + random_chosen_to_claim["owner_id"] = self.owner_id + to_claim.append(random_chosen_to_claim) + else: # steal from another owner that has the most count + most_frequent_owner_id = active_ownership_count_group_by_owner.most_common(1)[0][0] + # randomly choose a partition to steal from the most_frequent_owner + to_steal_partition = random.choice(list(filter(lambda x: x["owner_id"] == most_frequent_owner_id, + ownership_list))) + to_steal_partition["owner_id"] = self.owner_id + to_claim.append(to_steal_partition) + return to_claim + + async def _claim_ownership(self, ownership_list): + if ownership_list: + claimed_list = await self.partition_manager.claim_ownership(ownership_list) + return claimed_list + else: + return None diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index 85020257df46..c675afd165a4 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -3,15 +3,17 @@ # Licensed under the MIT License. See License.txt in the project root for license information. # ----------------------------------------------------------------------------------- -from typing import Callable, List +from typing import Callable, Dict import uuid import asyncio import logging +from enum import Enum from azure.eventhub import EventPosition, EventHubError from azure.eventhub.aio import EventHubClient from .checkpoint_manager import CheckpointManager from .partition_manager import PartitionManager +from ._ownership_manager import OwnershipManager from .partition_processor import PartitionProcessor, CloseReason from .utils import get_running_loop @@ -53,7 +55,7 @@ async def process_events(self, events): """ def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, - partition_processor_factory: Callable[[CheckpointManager], PartitionProcessor], + partition_processor_factory, partition_manager: PartitionManager, **kwargs): """ Instantiate an EventProcessor. @@ -73,16 +75,22 @@ def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, :type initial_event_position: int or str """ + self._consumer_group_name = consumer_group_name self._eventhub_client = eventhub_client self._eventhub_name = eventhub_client.eh_name self._partition_processor_factory = partition_processor_factory self._partition_manager = partition_manager self._initial_event_position = kwargs.get("initial_event_position", "-1") + # TODO: initial position provider will be a callable + # so users can create initial event position for every partition self._max_batch_size = eventhub_client.config.max_batch_size self._receive_timeout = eventhub_client.config.receive_timeout - self._tasks = [] # type: List[asyncio.Task] + self._polling_interval = kwargs.get("polling_interval", 10) + self._ownership_timeout = self._polling_interval * 2 # TODO: Team haven't decided if this is a separate argument + self._tasks = {} # type: Dict[str, asyncio.Task] self._id = str(uuid.uuid4()) + self._running = False def __repr__(self): return 'EventProcessor: id {}'.format(self._id) @@ -98,9 +106,22 @@ async def start(self): """ log.info("EventProcessor %r is being started", self._id) - partition_ids = await self._eventhub_client.get_partition_ids() - claimed_list = await self._claim_partitions(partition_ids) - await self._start_claimed_partitions(claimed_list) + ownership_manager = OwnershipManager(self, self._eventhub_client, self._ownership_timeout) + if not self._running: + self._running = True + while self._running: + claimed_ownership_list = await ownership_manager.claim_ownership() + claimed_partition_ids = [x["partition_id"] for x in claimed_ownership_list] + to_cancel_list = self._tasks.keys() - claimed_partition_ids + if to_cancel_list: + self._cancel_tasks_for_partitions(to_cancel_list) + log.info("EventProcesor %r has cancelled partitions %r", self._id, to_cancel_list) + + if claimed_partition_ids: + self._create_tasks_for_claimed_ownership(claimed_ownership_list) + else: + log.warning("EventProcessor %r hasn't claimed an ownership. It keeps claiming.", self._id) + await asyncio.sleep(self._polling_interval) async def stop(self): """Stop all the partition consumer @@ -110,109 +131,85 @@ async def stop(self): :return: None """ + self._running = False for i in range(len(self._tasks)): - task = self._tasks.pop() + task = self._tasks.popitem()[1] task.cancel() log.info("EventProcessor %r has been cancelled", self._id) await asyncio.sleep(2) # give some time to finish after cancelled - async def _claim_partitions(self, partition_ids): - partitions_ownership = await self._partition_manager.list_ownership(self._eventhub_name, self._consumer_group_name) - partitions_ownership_dict = dict() - for ownership in partitions_ownership: - partitions_ownership_dict[ownership["partition_id"]] = ownership - - to_claim_list = [] - for pid in partition_ids: - p_ownership = partitions_ownership_dict.get(pid) - if p_ownership: - to_claim_list.append(p_ownership) - else: - new_ownership = {"eventhub_name": self._eventhub_name, "consumer_group_name": self._consumer_group_name, - "owner_id": self._id, "partition_id": pid, "owner_level": OWNER_LEVEL} - to_claim_list.append(new_ownership) - claimed_list = await self._partition_manager.claim_ownership(to_claim_list) - return claimed_list - - async def _start_claimed_partitions(self, claimed_partitions): - for partition in claimed_partitions: - partition_id = partition["partition_id"] - offset = partition.get("offset", self._initial_event_position) - consumer = self._eventhub_client.create_consumer(self._consumer_group_name, partition_id, - EventPosition(str(offset))) - partition_processor = self._partition_processor_factory( - checkpoint_manager=CheckpointManager(partition_id, self._eventhub_name, self._consumer_group_name, - self._id, self._partition_manager) - ) - loop = get_running_loop() - task = loop.create_task( - _receive(consumer, partition_processor, self._receive_timeout)) - self._tasks.append(task) + def _cancel_tasks_for_partitions(self, to_cancel_partitions): + for partition_id in to_cancel_partitions: + if partition_id in self._tasks: + task = self._tasks.pop(partition_id) + task.cancel() + + def _create_tasks_for_claimed_ownership(self, to_claim_ownership_list): + for ownership in to_claim_ownership_list: + partition_id = ownership["partition_id"] + if partition_id not in self._tasks: + self._tasks[partition_id] = asyncio.create_task(self._receive(ownership)) + + async def _receive(self, ownership): + log.info("start ownership, %r", ownership) + partition_consumer = self._eventhub_client.create_consumer(ownership["consumer_group_name"], + ownership["partition_id"], + EventPosition(ownership.get("offset", self._initial_event_position)) + ) + checkpoint_manager = CheckpointManager(ownership["partition_id"], + ownership["eventhub_name"], + ownership["consumer_group_name"], + ownership["owner_id"], + self._partition_manager) + partition_processor = self._partition_processor_factory() + + async def initialize(): + if hasattr(partition_processor, "initialize"): + await partition_processor.initialize(checkpoint_manager) + + async def process_error(err): + if hasattr(partition_processor, "process_error"): + await partition_processor.process_error(err, checkpoint_manager) + + async def close(close_reason): + if hasattr(partition_processor, "close"): + await partition_processor.close(close_reason, checkpoint_manager) + try: - await asyncio.gather(*self._tasks) + while True: + try: + await initialize() + events = await partition_consumer.receive(timeout=self._receive_timeout) + await partition_processor.process_events(events, checkpoint_manager) + except asyncio.CancelledError as cancelled_error: + log.info( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " + "is cancelled", + ownership["owner_id"], + ownership["eventhub_name"], + ownership["partition_id"], + ownership["consumer_group_name"] + ) + await process_error(cancelled_error) + await close(CloseReason.SHUTDOWN) + break + except EventHubError as eh_err: + reason = CloseReason.LEASE_LOST if eh_err.error == "link:stolen" else CloseReason.EVENTHUB_EXCEPTION + log.warning( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " + "has met an exception receiving events. It's being closed. The exception is %r.", + ownership["owner_id"], + ownership["eventhub_name"], + ownership["partition_id"], + ownership["consumer_group_name"], + eh_err + ) + await process_error(eh_err) + await close(reason) + break + except Exception as exp: + log.warning(exp) + # TODO: will review whether to break and close partition processor after user's code has an exception + # TODO: try to inform other EventProcessors to take the partition when this partition is closed in preview 3? finally: - log.info("EventProcessor %r has stopped", self._id) - - -async def _receive(partition_consumer, partition_processor, receive_timeout): - try: - while True: - try: - events = await partition_consumer.receive(timeout=receive_timeout) - except asyncio.CancelledError as cancelled_error: - log.info( - "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " - "is cancelled", - partition_processor._checkpoint_manager.owner_id, - partition_processor._checkpoint_manager.eventhub_name, - partition_processor._checkpoint_manager.partition_id, - partition_processor._checkpoint_manager.consumer_group_name - ) - await partition_processor.process_error(cancelled_error) - await partition_processor.close(reason=CloseReason.SHUTDOWN) - break - except EventHubError as eh_err: - reason = CloseReason.LEASE_LOST if eh_err.error == "link:stolen" else CloseReason.EVENTHUB_EXCEPTION - log.warning( - "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " - "has met an exception receiving events. It's being closed. The exception is %r.", - partition_processor._checkpoint_manager.owner_id, - partition_processor._checkpoint_manager.eventhub_name, - partition_processor._checkpoint_manager.partition_id, - partition_processor._checkpoint_manager.consumer_group_name, - eh_err - ) - await partition_processor.process_error(eh_err) - await partition_processor.close(reason=reason) - break - try: - await partition_processor.process_events(events) - except asyncio.CancelledError as cancelled_error: - log.info( - "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " - "is cancelled.", - partition_processor._checkpoint_manager.owner_id, - partition_processor._checkpoint_manager.eventhub_name, - partition_processor._checkpoint_manager.partition_id, - partition_processor._checkpoint_manager.consumer_group_name - ) - await partition_processor.process_error(cancelled_error) - await partition_processor.close(reason=CloseReason.SHUTDOWN) - break - except Exception as exp: # user code has caused an error - log.warning( - "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " - "has met an exception from user code process_events. It's being closed. The exception is %r.", - partition_processor._checkpoint_manager.owner_id, - partition_processor._checkpoint_manager.eventhub_name, - partition_processor._checkpoint_manager.partition_id, - partition_processor._checkpoint_manager.consumer_group_name, - exp - ) - await partition_processor.process_error(exp) - await partition_processor.close(reason=CloseReason.EVENTHUB_EXCEPTION) - break - # TODO: will review whether to break and close partition processor after user's code has an exception - # TODO: try to inform other EventProcessors to take the partition when this partition is closed in preview 3? - finally: - await partition_consumer.close() + await partition_consumer.close() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py index 10aafc79c492..94729aae1269 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py @@ -4,7 +4,7 @@ # ----------------------------------------------------------------------------------- from typing import List -from abc import ABC, abstractmethod +from typing_extensions import Protocol from enum import Enum from .checkpoint_manager import CheckpointManager @@ -17,16 +17,16 @@ class CloseReason(Enum): EVENTHUB_EXCEPTION = 2 # Exception happens during receiving events -class PartitionProcessor(ABC): +class PartitionProcessor(Protocol): """ PartitionProcessor processes events received from the Azure Event Hubs service. A single instance of a class implementing this abstract class will be created for every partition the associated ~azure.eventhub.eventprocessor.EventProcessor owns. """ - def __init__(self, checkpoint_manager: CheckpointManager): - self._checkpoint_manager = checkpoint_manager + async def initialize(self, checkpoint_manager: CheckpointManager): + pass - async def close(self, reason): + async def close(self, reason, checkpoint_manager: CheckpointManager): """Called when EventProcessor stops processing this PartitionProcessor. There are different reasons to trigger the PartitionProcessor to close. @@ -38,8 +38,7 @@ async def close(self, reason): """ pass - @abstractmethod - async def process_events(self, events: List[EventData]): + async def process_events(self, events: List[EventData], checkpoint_manager: CheckpointManager): """Called when a batch of events have been received. :param events: Received events. @@ -48,7 +47,7 @@ async def process_events(self, events: List[EventData]): """ pass - async def process_error(self, error): + async def process_error(self, error, checkpoint_manager: CheckpointManager): """Called when an error happens :param error: The error that happens. diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py index eb08e970fa89..5e2c3dd45748 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py @@ -6,8 +6,11 @@ import time import uuid import sqlite3 +import logging from .partition_manager import PartitionManager +logger = logging.getLogger(__name__) + def _check_table_name(table_name: str): for c in table_name: @@ -22,6 +25,15 @@ class Sqlite3PartitionManager(PartitionManager): """ + primary_keys_dict = {"eventhub_name": "text", "consumer_group_name": "text", "partition_id": "text"} + other_fields_dict = {"owner_id": "text", "owner_level": "integer", "sequence_number": "integer", "offset": "text", + "last_modified_time": "real", "etag": "text"} + checkpoint_fields = ["sequence_number", "offset"] + fields_dict = {**primary_keys_dict, **other_fields_dict} + primary_keys = list(primary_keys_dict.keys()) + other_fields = list(other_fields_dict.keys()) + fields = primary_keys + other_fields + def __init__(self, db_filename: str = ":memory:", ownership_table: str = "ownership"): """ @@ -34,17 +46,15 @@ def __init__(self, db_filename: str = ":memory:", ownership_table: str = "owners conn = sqlite3.connect(db_filename) c = conn.cursor() try: - c.execute("create table " + ownership_table + - "(eventhub_name text," - "consumer_group_name text," - "owner_id text," - "partition_id text," - "owner_level integer," - "sequence_number integer," - "offset text," - "last_modified_time integer," - "etag text)") + sql = "create table if not exists " + _check_table_name(ownership_table)\ + + "("\ + + ",".join([x[0]+" "+x[1] for x in self.fields_dict.items()])\ + + ", constraint pk_ownership PRIMARY KEY ("\ + + ",".join(self.primary_keys)\ + + "))" + c.execute(sql) except sqlite3.OperationalError: + raise pass finally: c.close() @@ -53,44 +63,55 @@ def __init__(self, db_filename: str = ":memory:", ownership_table: str = "owners async def list_ownership(self, eventhub_name, consumer_group_name): cursor = self.conn.cursor() try: - fields = ["eventhub_name", "consumer_group_name", "owner_id", "partition_id", "owner_level", - "sequence_number", - "offset", "last_modified_time", "etag"] - cursor.execute("select " + ",".join(fields) + + cursor.execute("select " + ",".join(self.fields) + " from "+_check_table_name(self.ownership_table)+" where eventhub_name=? " "and consumer_group_name=?", (eventhub_name, consumer_group_name)) - result_list = [] - - for row in cursor.fetchall(): - d = dict(zip(fields, row)) - result_list.append(d) - return result_list + return [dict(zip(self.fields, row)) for row in cursor.fetchall()] finally: cursor.close() async def claim_ownership(self, partitions): + result = [] cursor = self.conn.cursor() try: for p in partitions: - cursor.execute("select * from " + _check_table_name(self.ownership_table) + - " where eventhub_name=? " - "and consumer_group_name=? " - "and partition_id =?", - (p["eventhub_name"], p["consumer_group_name"], - p["partition_id"])) - if not cursor.fetchall(): - cursor.execute("insert into " + _check_table_name(self.ownership_table) + - " (eventhub_name,consumer_group_name,partition_id,owner_id,owner_level,last_modified_time,etag) " - "values (?,?,?,?,?,?,?)", - (p["eventhub_name"], p["consumer_group_name"], p["partition_id"], p["owner_id"], p["owner_level"], - time.time(), str(uuid.uuid4()) - )) + cursor.execute("select etag from " + _check_table_name(self.ownership_table) + + " where "+ " and ".join([field+"=?" for field in self.primary_keys]), + tuple(p.get(field) for field in self.primary_keys)) + cursor_fetch = cursor.fetchall() + if not cursor_fetch: + p["last_modified_time"] = time.time() + p["etag"] = str(uuid.uuid4()) + try: + fields_without_checkpoint = list(filter(lambda x: x not in self.checkpoint_fields, self.fields)) + sql = "insert into " + _check_table_name(self.ownership_table) + " (" \ + + ",".join(fields_without_checkpoint) \ + + ") values (?,?,?,?,?,?,?)" + cursor.execute(sql, tuple(p.get(field) for field in fields_without_checkpoint)) + except sqlite3.OperationalError as op_err: + logger.info("EventProcessor %r failed to claim partition %r " + "because it was claimed by another EventProcessor at the same time. " + "The Sqlite3 exception is %r", p["owner_id"], p["partition_id"], op_err) + break + else: + result.append(p) else: - cursor.execute("update " + _check_table_name(self.ownership_table) + " set owner_id=?, owner_level=?, last_modified_time=?, etag=? " - "where eventhub_name=? and consumer_group_name=? and partition_id=?", - (p["owner_id"], p["owner_level"], time.time(), str(uuid.uuid4()), - p["eventhub_name"], p["consumer_group_name"], p["partition_id"])) + if p.get("etag") == cursor_fetch[0][0]: + p["last_modified_time"] = time.time() + p["etag"] = str(uuid.uuid4()) + other_fields_without_checkpoint = list(filter(lambda x: x not in self.checkpoint_fields, self.other_fields)) + sql = "update " + _check_table_name(self.ownership_table) + " set "\ + + ','.join([field+"=?" for field in other_fields_without_checkpoint])\ + + " where "\ + + " and ".join([field+"=?" for field in self.primary_keys]) + + cursor.execute(sql, tuple(p.get(field) for field in other_fields_without_checkpoint) + tuple(p.get(field) for field in self.primary_keys)) + result.append(p) + else: + logger.info("EventProcessor %r failed to claim partition %r " + "because it was claimed by another EventProcessor at the same time", p["owner_id"], + p["partition_id"]) self.conn.commit() return partitions finally: From 39b1b86853f5f4a4ca52afc80ef6a20e8fe51615 Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 22 Aug 2019 01:01:35 -0700 Subject: [PATCH 02/39] EventProcessor Load balancing --- .../eventprocessor/_ownership_manager.py | 45 ++++++++----- .../eventprocessor/event_processor.py | 66 +++++++++++-------- .../eventprocessor/partition_processor.py | 4 +- .../sqlite3_partition_manager.py | 13 +++- .../eventprocessor/event_processor_example.py | 39 ++++------- 5 files changed, 94 insertions(+), 73 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py index 710e71ab8439..40e3419a7c5a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. See License.txt in the project root for license information. # ----------------------------------------------------------------------------------- -from typing import List, Iterable, Any, Dict import time import random import math @@ -34,8 +33,8 @@ async def claim_ownership(self): """Claims ownership for this EventProcessor 1. Retrieves all partition ids of an event hub from azure event hub service 2. Retrieves current ownership list via this EventProcessor's PartitionManager. - 3. Searches claimable partitions for this EventProcessor. Refer to claim_ownership() for details. - 4. Claims the ownership for the claimable partitions + 3. Balances number of ownership. Refer to _balance_ownership() for details. + 4. Claims the ownership for the balanced number of partitions. :return: List[Dict[Any]] """ @@ -53,19 +52,35 @@ async def _retrieve_partition_ids(self): self.all_parition_ids = await self.eventhub_client.get_partition_ids() async def _balance_ownership(self): + """Balances and claims ownership of partitions for this EventProcessor. + The balancing algorithm is: + 1. Find partitions with inactive ownership and partitions that haven never been claimed before + 2. Find the number of active owners, including this EventProcessor, for all partitions. + 3. Calculate the average count of partitions that an owner should own. + (number of partitions // number of active owners) + 4. Calculate the largest allowed count of partitions that an owner can own. + math.ceil(number of partitions / number of active owners). This should be equal or 1 greater than the average count + 5. Adjust the number of partitions owned by this EventProcessor (owner) + a. if this EventProcessor owns more than largest allowed count, abandon one partition + b. if this EventProcessor owns less than average count, add one from the inactive or unclaimed partitions, + or steal one from another owner that has the largest number of ownership among all owners (EventProcessors) + c. Otherwise, no change to the ownership + + The balancing algorithm adjust one partition at a time to gradually build the balanced ownership. + Ownership must be renewed to keep it active. So the returned result includes both existing ownership and + the newly adjusted ownership. + This method balances but doesn't claim ownership. The caller of this method tries to claim the result ownership + list. But it may not successfully claim all of them because of concurrency. Other EventProcessors may happen to + claim a partition at that time. Since balancing and claiming are run in infinite repeatedly, + it achieves balancing among all EventProcessors after some time of running. + + :return: List[Dict[str, Any]], A list of ownership. + """ ownership_list = await self.partition_manager.list_ownership(self.eventhub_client.eh_name, self.consumer_group_name) ownership_dict = dict((x["partition_id"], x) for x in ownership_list) # put the list to dict for fast lookup - ''' - now = time.time() - partition_ids_no_ownership = list(filter(lambda x: x not in ownership_dict, self.all_parition_ids)) - inactive_ownership = filter(lambda x: x["last_modified_time"] + self.ownership_timeout < now, ownership_list) - claimable_partition_ids = partition_ids_no_ownership + [x["partition_id"] for x in inactive_ownership] - active_ownership = list(filter(lambda x: x["last_modified_time"] + self.ownership_timeout >= now, ownership_list)) - active_ownership_count_group_by_owner = Counter([x["owner_id"] for x in active_ownership]) - active_ownership_self = list(filter(lambda x: x["owner_id"] == self.owner_id, active_ownership)) - ''' - claimable_partition_ids = [] - active_ownership_self = [] + + claimable_partition_ids = [] # partitions with inactive ownership and partitions that have never been claimed yet + active_ownership_self = [] # active ownership of this EventProcessor active_ownership_count_group_by_owner = Counter() for partition_id in self.all_parition_ids: ownership = ownership_dict.get(partition_id) @@ -100,7 +115,7 @@ async def _balance_ownership(self): {"partition_id": random_partition_id, "eventhub_name": self.eventhub_client.eh_name, "consumer_group_name": self.consumer_group_name, - "owner_level": 0}) + "owner_level": 0}) # TODO: consider removing owner_level random_chosen_to_claim["owner_id"] = self.owner_id to_claim.append(random_chosen_to_claim) else: # steal from another owner that has the most count diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index c675afd165a4..7e342a369d0d 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -7,15 +7,13 @@ import uuid import asyncio import logging -from enum import Enum from azure.eventhub import EventPosition, EventHubError from azure.eventhub.aio import EventHubClient from .checkpoint_manager import CheckpointManager from .partition_manager import PartitionManager from ._ownership_manager import OwnershipManager -from .partition_processor import PartitionProcessor, CloseReason -from .utils import get_running_loop +from .partition_processor import CloseReason, PartitionProcessor log = logging.getLogger(__name__) @@ -29,20 +27,21 @@ class EventProcessor(object): It provides the user a convenient way to receive events from multiple partitions and save checkpoints. If multiple EventProcessors are running for an event hub, they will automatically balance load. - This load balancing won't be available until preview 3. Example: .. code-block:: python - class MyPartitionProcessor(PartitionProcessor): - async def process_events(self, events): - if events: - # do something sync or async to process the events - await self._checkpoint_manager.update_checkpoint(events[-1].offset, events[-1].sequence_number) - import asyncio from azure.eventhub.aio import EventHubClient from azure.eventhub.eventprocessor import EventProcessor, PartitionProcessor, Sqlite3PartitionManager + + class MyPartitionProcessor(object): + async def process_events(self, events, checkpoint_manager): + if events: + # do something sync or async to process the events + await checkpoint_manager.update_checkpoint(events[-1].offset, events[-1].sequence_number) + + client = EventHubClient.from_connection_string("", receive_timeout=5, retry_total=3) partition_manager = Sqlite3PartitionManager() try: @@ -55,7 +54,7 @@ async def process_events(self, events): """ def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, - partition_processor_factory, + partition_processor_factory: Callable[..., PartitionProcessor], partition_manager: PartitionManager, **kwargs): """ Instantiate an EventProcessor. @@ -73,6 +72,8 @@ def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, :type partition_manager: Class implementing the ~azure.eventhub.eventprocessor.PartitionManager. :param initial_event_position: The offset to start a partition consumer if the partition has no checkpoint yet. :type initial_event_position: int or str + :param polling_interval: The interval between any two pollings of balancing and claiming + :type float """ @@ -98,9 +99,12 @@ def __repr__(self): async def start(self): """Start the EventProcessor. - 1. retrieve the partition ids from eventhubs. - 2. claim partition ownership of these partitions. - 3. repeatedly call EvenHubConsumer.receive() to retrieve events and call user defined PartitionProcessor.process_events(). + 1. Calls the OwnershipManager to keep claiming and balancing ownership of partitions in an + infinitely loop until self.stop() is called. + 2. Cancels tasks for partitions that are no longer owned by this EventProcessor + 3. Creates tasks for partitions that are newly claimed by this EventProcessor + 4. Keeps tasks running for partitions that haven't changed ownership + 5. Each task repeatedly calls EvenHubConsumer.receive() to retrieve events and call user defined partition processor :return: None @@ -111,22 +115,23 @@ async def start(self): self._running = True while self._running: claimed_ownership_list = await ownership_manager.claim_ownership() - claimed_partition_ids = [x["partition_id"] for x in claimed_ownership_list] - to_cancel_list = self._tasks.keys() - claimed_partition_ids - if to_cancel_list: - self._cancel_tasks_for_partitions(to_cancel_list) - log.info("EventProcesor %r has cancelled partitions %r", self._id, to_cancel_list) - - if claimed_partition_ids: + if claimed_ownership_list: + claimed_partition_ids = [x["partition_id"] for x in claimed_ownership_list] + to_cancel_list = self._tasks.keys() - claimed_partition_ids self._create_tasks_for_claimed_ownership(claimed_ownership_list) else: log.warning("EventProcessor %r hasn't claimed an ownership. It keeps claiming.", self._id) + to_cancel_list = self._tasks.keys() + if to_cancel_list: + self._cancel_tasks_for_partitions(to_cancel_list) + log.info("EventProcesor %r has cancelled partitions %r", self._id, to_cancel_list) await asyncio.sleep(self._polling_interval) async def stop(self): - """Stop all the partition consumer + """Stop claiming ownership and all the partition consumers owned by this EventProcessor - This method cancels tasks that are running EventHubConsumer.receive() for the partitions owned by this EventProcessor. + This method stops claiming ownership of owned partitions and cancels tasks that are running + EventHubConsumer.receive() for the partitions owned by this EventProcessor. :return: None @@ -152,6 +157,13 @@ def _create_tasks_for_claimed_ownership(self, to_claim_ownership_list): async def _receive(self, ownership): log.info("start ownership, %r", ownership) + partition_processor = self._partition_processor_factory() + if not hasattr(partition_processor, "process_events"): + log.error( + "Fatal error: a partition processor should at least have method process_events(events, checkpoint_manager). EventProcessor will stop.") + await self.stop() + raise TypeError("Partition processor must has method process_events(events, checkpoint_manager") + partition_consumer = self._eventhub_client.create_consumer(ownership["consumer_group_name"], ownership["partition_id"], EventPosition(ownership.get("offset", self._initial_event_position)) @@ -161,8 +173,6 @@ async def _receive(self, ownership): ownership["consumer_group_name"], ownership["owner_id"], self._partition_manager) - partition_processor = self._partition_processor_factory() - async def initialize(): if hasattr(partition_processor, "initialize"): await partition_processor.initialize(checkpoint_manager) @@ -192,6 +202,7 @@ async def close(close_reason): ) await process_error(cancelled_error) await close(CloseReason.SHUTDOWN) + # TODO: release the ownership immediately via partition manager break except EventHubError as eh_err: reason = CloseReason.LEASE_LOST if eh_err.error == "link:stolen" else CloseReason.EVENTHUB_EXCEPTION @@ -205,11 +216,10 @@ async def close(close_reason): eh_err ) await process_error(eh_err) - await close(reason) + await close(reason) # An EventProcessor will pick up this partition again after the ownership is released + # TODO: release the ownership immediately via partition manager break except Exception as exp: log.warning(exp) - # TODO: will review whether to break and close partition processor after user's code has an exception - # TODO: try to inform other EventProcessors to take the partition when this partition is closed in preview 3? finally: await partition_consumer.close() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py index 94729aae1269..60a95b99e628 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py @@ -23,7 +23,7 @@ class PartitionProcessor(Protocol): implementing this abstract class will be created for every partition the associated ~azure.eventhub.eventprocessor.EventProcessor owns. """ - async def initialize(self, checkpoint_manager: CheckpointManager): + async def initialize(self): pass async def close(self, reason, checkpoint_manager: CheckpointManager): @@ -45,7 +45,7 @@ async def process_events(self, events: List[EventData], checkpoint_manager: Chec :type events: list[~azure.eventhub.common.EventData] """ - pass + raise NotImplementedError async def process_error(self, error, checkpoint_manager: CheckpointManager): """Called when an error happens diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py index 5e2c3dd45748..bf4c46c95eb9 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py @@ -121,9 +121,16 @@ async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_ offset, sequence_number): cursor = self.conn.cursor() try: - cursor.execute("update " + _check_table_name(self.ownership_table) + " set offset=?, sequence_number=? where eventhub_name=? and consumer_group_name=? and partition_id=?", - (offset, sequence_number, eventhub_name, consumer_group_name, partition_id)) - self.conn.commit() + cursor.execute("select owner_id from " + _check_table_name(self.ownership_table) + " where eventhub_name=? and consumer_group_name=? and partition_id=?", + (eventhub_name, consumer_group_name, partition_id)) + cursor_fetch = cursor.fetchall() + if cursor_fetch and owner_id == cursor_fetch[0][0]: + cursor.execute("update " + _check_table_name(self.ownership_table) + " set offset=?, sequence_number=? where eventhub_name=? and consumer_group_name=? and partition_id=?", + (offset, sequence_number, eventhub_name, consumer_group_name, partition_id)) + self.conn.commit() + else: + logger.info("EventProcessor couldn't checkpoint to partition %r because it no longer has the ownership", partition_id) + finally: cursor.close() diff --git a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py index 8c4c9ced7d29..a2165eb81110 100644 --- a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py +++ b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py @@ -3,7 +3,6 @@ import os from azure.eventhub.aio import EventHubClient from azure.eventhub.eventprocessor import EventProcessor -from azure.eventhub.eventprocessor import PartitionProcessor from azure.eventhub.eventprocessor import Sqlite3PartitionManager RECEIVE_TIMEOUT = 5 # timeout in seconds for a receiving operation. 0 or None means no timeout @@ -18,33 +17,23 @@ async def do_operation(event): print(event) -class MyPartitionProcessor(PartitionProcessor): - def __init__(self, checkpoint_manager): - super(MyPartitionProcessor, self).__init__(checkpoint_manager) - - async def process_events(self, events): +class MyPartitionProcessor(object): + async def process_events(self, events, checkpoint_manager): if events: await asyncio.gather(*[do_operation(event) for event in events]) - await self._checkpoint_manager.update_checkpoint(events[-1].offset, events[-1].sequence_number) - - -def partition_processor_factory(checkpoint_manager): - return MyPartitionProcessor(checkpoint_manager) - - -async def run_awhile(duration): - client = EventHubClient.from_connection_string(CONNECTION_STR, receive_timeout=RECEIVE_TIMEOUT, - retry_total=RETRY_TOTAL) - partition_manager = Sqlite3PartitionManager() - event_processor = EventProcessor(client, "$default", MyPartitionProcessor, partition_manager) - try: - asyncio.ensure_future(event_processor.start()) - await asyncio.sleep(duration) - await event_processor.stop() - finally: - await partition_manager.close() + await checkpoint_manager.update_checkpoint(events[-1].offset, events[-1].sequence_number) + else: + print("empty events received", "partition:", checkpoint_manager.partition_id) if __name__ == '__main__': loop = asyncio.get_event_loop() - loop.run_until_complete(run_awhile(60)) + client = EventHubClient.from_connection_string(CONNECTION_STR, receive_timeout=RECEIVE_TIMEOUT, retry_total=RETRY_TOTAL) + partition_manager = Sqlite3PartitionManager(db_filename="eventprocessor_test_db") + event_processor = EventProcessor(client, "$default", MyPartitionProcessor, partition_manager, polling_interval=1) + try: + loop.run_until_complete(event_processor.start()) + except KeyboardInterrupt: + loop.run_until_complete(event_processor.stop()) + finally: + loop.stop() From 17f5153ed3762667cf5aa8d330dcec9d339512b3 Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 22 Aug 2019 18:10:35 -0700 Subject: [PATCH 03/39] small changes from bryan's review --- .../azure/eventhub/eventprocessor/event_processor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index 7e342a369d0d..2db6feb4bbf1 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -137,8 +137,8 @@ async def stop(self): """ self._running = False - for i in range(len(self._tasks)): - task = self._tasks.popitem()[1] + for _ in range(len(self._tasks)): + _, task = self._tasks.popitem() task.cancel() log.info("EventProcessor %r has been cancelled", self._id) await asyncio.sleep(2) # give some time to finish after cancelled @@ -205,7 +205,7 @@ async def close(close_reason): # TODO: release the ownership immediately via partition manager break except EventHubError as eh_err: - reason = CloseReason.LEASE_LOST if eh_err.error == "link:stolen" else CloseReason.EVENTHUB_EXCEPTION + reason = CloseReason.OWNERSHIP_LOST if eh_err.error == "link:stolen" else CloseReason.EVENTHUB_EXCEPTION log.warning( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "has met an exception receiving events. It's being closed. The exception is %r.", From 04ef5484e03ac1789e2f9f2e8c6723e85a392c0f Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 22 Aug 2019 18:21:05 -0700 Subject: [PATCH 04/39] remove checkpoint manager from initialize --- .../azure/eventhub/eventprocessor/event_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index 2db6feb4bbf1..e808d603c457 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -175,7 +175,7 @@ async def _receive(self, ownership): self._partition_manager) async def initialize(): if hasattr(partition_processor, "initialize"): - await partition_processor.initialize(checkpoint_manager) + await partition_processor.initialize() async def process_error(err): if hasattr(partition_processor, "process_error"): From 9be17413316d4bf556472e950b3bfbeb56df166b Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 22 Aug 2019 18:41:18 -0700 Subject: [PATCH 05/39] small changes --- .../eventprocessor/event_processor.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index e808d603c457..23178deba76a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -164,15 +164,19 @@ async def _receive(self, ownership): await self.stop() raise TypeError("Partition processor must has method process_events(events, checkpoint_manager") - partition_consumer = self._eventhub_client.create_consumer(ownership["consumer_group_name"], - ownership["partition_id"], - EventPosition(ownership.get("offset", self._initial_event_position)) - ) - checkpoint_manager = CheckpointManager(ownership["partition_id"], - ownership["eventhub_name"], - ownership["consumer_group_name"], - ownership["owner_id"], - self._partition_manager) + partition_consumer = self._eventhub_client.create_consumer( + ownership["consumer_group_name"], + ownership["partition_id"], + EventPosition(ownership.get("offset", self._initial_event_position)) + ) + checkpoint_manager = CheckpointManager( + ownership["partition_id"], + ownership["eventhub_name"], + ownership["consumer_group_name"], + ownership["owner_id"], + self._partition_manager + ) + async def initialize(): if hasattr(partition_processor, "initialize"): await partition_processor.initialize() From 1b5753c14c970b388756dad6aaa174faf10840a8 Mon Sep 17 00:00:00 2001 From: yijxie Date: Wed, 21 Aug 2019 19:17:00 -0700 Subject: [PATCH 06/39] Draft EventProcessor Loadbalancing --- .../eventprocessor/_ownership_manager.py | 120 ++++++++++ .../eventprocessor/event_processor.py | 207 +++++++++--------- .../eventprocessor/partition_processor.py | 15 +- .../sqlite3_partition_manager.py | 95 ++++---- 4 files changed, 287 insertions(+), 150 deletions(-) create mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py new file mode 100644 index 000000000000..710e71ab8439 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py @@ -0,0 +1,120 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +from typing import List, Iterable, Any, Dict +import time +import random +import math +from collections import Counter +from azure.eventhub.aio import EventHubClient + + +class OwnershipManager(object): + """Increases or decreases the number of partitions owned by an EventProcessor + so the number of owned partitions are balanced among multiple EventProcessors + + An EventProcessor calls claim_ownership() of this class every x seconds, + where x is set by keyword argument "polling_interval" in EventProcessor, + to claim the ownership of partitions, create tasks for the claimed ownership, and cancel tasks that no longer belong + to the claimed ownership. + + """ + def __init__(self, event_processor, eventhub_client: EventHubClient, ownership_timeout: int): + self.all_parition_ids = [] + self.eventhub_client = eventhub_client + self.eventhub_name = eventhub_client.eh_name + self.consumer_group_name = event_processor._consumer_group_name + self.owner_id = event_processor._id + self.partition_manager = event_processor._partition_manager + self.ownership_timeout = ownership_timeout + + async def claim_ownership(self): + """Claims ownership for this EventProcessor + 1. Retrieves all partition ids of an event hub from azure event hub service + 2. Retrieves current ownership list via this EventProcessor's PartitionManager. + 3. Searches claimable partitions for this EventProcessor. Refer to claim_ownership() for details. + 4. Claims the ownership for the claimable partitions + + :return: List[Dict[Any]] + """ + if not self.all_parition_ids: + await self._retrieve_partition_ids() + to_claim = await self._balance_ownership() + claimed_list = await self._claim_ownership(to_claim) + return claimed_list + + async def _retrieve_partition_ids(self): + """List all partition ids of the event hub that the EventProcessor is working on. + + :return: List[str] + """ + self.all_parition_ids = await self.eventhub_client.get_partition_ids() + + async def _balance_ownership(self): + ownership_list = await self.partition_manager.list_ownership(self.eventhub_client.eh_name, self.consumer_group_name) + ownership_dict = dict((x["partition_id"], x) for x in ownership_list) # put the list to dict for fast lookup + ''' + now = time.time() + partition_ids_no_ownership = list(filter(lambda x: x not in ownership_dict, self.all_parition_ids)) + inactive_ownership = filter(lambda x: x["last_modified_time"] + self.ownership_timeout < now, ownership_list) + claimable_partition_ids = partition_ids_no_ownership + [x["partition_id"] for x in inactive_ownership] + active_ownership = list(filter(lambda x: x["last_modified_time"] + self.ownership_timeout >= now, ownership_list)) + active_ownership_count_group_by_owner = Counter([x["owner_id"] for x in active_ownership]) + active_ownership_self = list(filter(lambda x: x["owner_id"] == self.owner_id, active_ownership)) + ''' + claimable_partition_ids = [] + active_ownership_self = [] + active_ownership_count_group_by_owner = Counter() + for partition_id in self.all_parition_ids: + ownership = ownership_dict.get(partition_id) + if not ownership: # no ownership found for this partition. So it is claimable + claimable_partition_ids.append(partition_id) + else: + last_modified_time = ownership["last_modified_time"] + owner_id = ownership["owner_id"] + now = time.time() + if now > self.ownership_timeout + last_modified_time: # ownership timed out. So it is claimable + claimable_partition_ids.append(partition_id) + else: # the ownership is still active + if owner_id == self.owner_id: # partition is actively owned by this running EventProcessor + active_ownership_self.append(ownership) + active_ownership_count_group_by_owner[owner_id] = active_ownership_count_group_by_owner.get(owner_id, 0) + 1 # all active owners + + # calculate expected count per owner + all_partition_count = len(self.all_parition_ids) + owners_count = len(active_ownership_count_group_by_owner) + (1 if self.owner_id not in active_ownership_count_group_by_owner else 0) + expected_count_per_owner = all_partition_count // owners_count + most_count_allowed_per_owner = math.ceil(all_partition_count / owners_count) + # end of calculating expected count per owner + + to_claim = active_ownership_self + if len(active_ownership_self) > most_count_allowed_per_owner: # needs to abandon a partition + to_claim.pop() # abandon one partition if owned too many + # TODO: Release a ownership immediately so other EventProcessors won't need to wait it to timeout + elif len(active_ownership_self) < expected_count_per_owner: # Either claims an inactive partition, or steals from other owners + if claimable_partition_ids: # claim an inactive partition if there is + random_partition_id = random.choice(claimable_partition_ids) + random_chosen_to_claim = ownership_dict.get(random_partition_id, + {"partition_id": random_partition_id, + "eventhub_name": self.eventhub_client.eh_name, + "consumer_group_name": self.consumer_group_name, + "owner_level": 0}) + random_chosen_to_claim["owner_id"] = self.owner_id + to_claim.append(random_chosen_to_claim) + else: # steal from another owner that has the most count + most_frequent_owner_id = active_ownership_count_group_by_owner.most_common(1)[0][0] + # randomly choose a partition to steal from the most_frequent_owner + to_steal_partition = random.choice(list(filter(lambda x: x["owner_id"] == most_frequent_owner_id, + ownership_list))) + to_steal_partition["owner_id"] = self.owner_id + to_claim.append(to_steal_partition) + return to_claim + + async def _claim_ownership(self, ownership_list): + if ownership_list: + claimed_list = await self.partition_manager.claim_ownership(ownership_list) + return claimed_list + else: + return None diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index 85020257df46..c675afd165a4 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -3,15 +3,17 @@ # Licensed under the MIT License. See License.txt in the project root for license information. # ----------------------------------------------------------------------------------- -from typing import Callable, List +from typing import Callable, Dict import uuid import asyncio import logging +from enum import Enum from azure.eventhub import EventPosition, EventHubError from azure.eventhub.aio import EventHubClient from .checkpoint_manager import CheckpointManager from .partition_manager import PartitionManager +from ._ownership_manager import OwnershipManager from .partition_processor import PartitionProcessor, CloseReason from .utils import get_running_loop @@ -53,7 +55,7 @@ async def process_events(self, events): """ def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, - partition_processor_factory: Callable[[CheckpointManager], PartitionProcessor], + partition_processor_factory, partition_manager: PartitionManager, **kwargs): """ Instantiate an EventProcessor. @@ -73,16 +75,22 @@ def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, :type initial_event_position: int or str """ + self._consumer_group_name = consumer_group_name self._eventhub_client = eventhub_client self._eventhub_name = eventhub_client.eh_name self._partition_processor_factory = partition_processor_factory self._partition_manager = partition_manager self._initial_event_position = kwargs.get("initial_event_position", "-1") + # TODO: initial position provider will be a callable + # so users can create initial event position for every partition self._max_batch_size = eventhub_client.config.max_batch_size self._receive_timeout = eventhub_client.config.receive_timeout - self._tasks = [] # type: List[asyncio.Task] + self._polling_interval = kwargs.get("polling_interval", 10) + self._ownership_timeout = self._polling_interval * 2 # TODO: Team haven't decided if this is a separate argument + self._tasks = {} # type: Dict[str, asyncio.Task] self._id = str(uuid.uuid4()) + self._running = False def __repr__(self): return 'EventProcessor: id {}'.format(self._id) @@ -98,9 +106,22 @@ async def start(self): """ log.info("EventProcessor %r is being started", self._id) - partition_ids = await self._eventhub_client.get_partition_ids() - claimed_list = await self._claim_partitions(partition_ids) - await self._start_claimed_partitions(claimed_list) + ownership_manager = OwnershipManager(self, self._eventhub_client, self._ownership_timeout) + if not self._running: + self._running = True + while self._running: + claimed_ownership_list = await ownership_manager.claim_ownership() + claimed_partition_ids = [x["partition_id"] for x in claimed_ownership_list] + to_cancel_list = self._tasks.keys() - claimed_partition_ids + if to_cancel_list: + self._cancel_tasks_for_partitions(to_cancel_list) + log.info("EventProcesor %r has cancelled partitions %r", self._id, to_cancel_list) + + if claimed_partition_ids: + self._create_tasks_for_claimed_ownership(claimed_ownership_list) + else: + log.warning("EventProcessor %r hasn't claimed an ownership. It keeps claiming.", self._id) + await asyncio.sleep(self._polling_interval) async def stop(self): """Stop all the partition consumer @@ -110,109 +131,85 @@ async def stop(self): :return: None """ + self._running = False for i in range(len(self._tasks)): - task = self._tasks.pop() + task = self._tasks.popitem()[1] task.cancel() log.info("EventProcessor %r has been cancelled", self._id) await asyncio.sleep(2) # give some time to finish after cancelled - async def _claim_partitions(self, partition_ids): - partitions_ownership = await self._partition_manager.list_ownership(self._eventhub_name, self._consumer_group_name) - partitions_ownership_dict = dict() - for ownership in partitions_ownership: - partitions_ownership_dict[ownership["partition_id"]] = ownership - - to_claim_list = [] - for pid in partition_ids: - p_ownership = partitions_ownership_dict.get(pid) - if p_ownership: - to_claim_list.append(p_ownership) - else: - new_ownership = {"eventhub_name": self._eventhub_name, "consumer_group_name": self._consumer_group_name, - "owner_id": self._id, "partition_id": pid, "owner_level": OWNER_LEVEL} - to_claim_list.append(new_ownership) - claimed_list = await self._partition_manager.claim_ownership(to_claim_list) - return claimed_list - - async def _start_claimed_partitions(self, claimed_partitions): - for partition in claimed_partitions: - partition_id = partition["partition_id"] - offset = partition.get("offset", self._initial_event_position) - consumer = self._eventhub_client.create_consumer(self._consumer_group_name, partition_id, - EventPosition(str(offset))) - partition_processor = self._partition_processor_factory( - checkpoint_manager=CheckpointManager(partition_id, self._eventhub_name, self._consumer_group_name, - self._id, self._partition_manager) - ) - loop = get_running_loop() - task = loop.create_task( - _receive(consumer, partition_processor, self._receive_timeout)) - self._tasks.append(task) + def _cancel_tasks_for_partitions(self, to_cancel_partitions): + for partition_id in to_cancel_partitions: + if partition_id in self._tasks: + task = self._tasks.pop(partition_id) + task.cancel() + + def _create_tasks_for_claimed_ownership(self, to_claim_ownership_list): + for ownership in to_claim_ownership_list: + partition_id = ownership["partition_id"] + if partition_id not in self._tasks: + self._tasks[partition_id] = asyncio.create_task(self._receive(ownership)) + + async def _receive(self, ownership): + log.info("start ownership, %r", ownership) + partition_consumer = self._eventhub_client.create_consumer(ownership["consumer_group_name"], + ownership["partition_id"], + EventPosition(ownership.get("offset", self._initial_event_position)) + ) + checkpoint_manager = CheckpointManager(ownership["partition_id"], + ownership["eventhub_name"], + ownership["consumer_group_name"], + ownership["owner_id"], + self._partition_manager) + partition_processor = self._partition_processor_factory() + + async def initialize(): + if hasattr(partition_processor, "initialize"): + await partition_processor.initialize(checkpoint_manager) + + async def process_error(err): + if hasattr(partition_processor, "process_error"): + await partition_processor.process_error(err, checkpoint_manager) + + async def close(close_reason): + if hasattr(partition_processor, "close"): + await partition_processor.close(close_reason, checkpoint_manager) + try: - await asyncio.gather(*self._tasks) + while True: + try: + await initialize() + events = await partition_consumer.receive(timeout=self._receive_timeout) + await partition_processor.process_events(events, checkpoint_manager) + except asyncio.CancelledError as cancelled_error: + log.info( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " + "is cancelled", + ownership["owner_id"], + ownership["eventhub_name"], + ownership["partition_id"], + ownership["consumer_group_name"] + ) + await process_error(cancelled_error) + await close(CloseReason.SHUTDOWN) + break + except EventHubError as eh_err: + reason = CloseReason.LEASE_LOST if eh_err.error == "link:stolen" else CloseReason.EVENTHUB_EXCEPTION + log.warning( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " + "has met an exception receiving events. It's being closed. The exception is %r.", + ownership["owner_id"], + ownership["eventhub_name"], + ownership["partition_id"], + ownership["consumer_group_name"], + eh_err + ) + await process_error(eh_err) + await close(reason) + break + except Exception as exp: + log.warning(exp) + # TODO: will review whether to break and close partition processor after user's code has an exception + # TODO: try to inform other EventProcessors to take the partition when this partition is closed in preview 3? finally: - log.info("EventProcessor %r has stopped", self._id) - - -async def _receive(partition_consumer, partition_processor, receive_timeout): - try: - while True: - try: - events = await partition_consumer.receive(timeout=receive_timeout) - except asyncio.CancelledError as cancelled_error: - log.info( - "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " - "is cancelled", - partition_processor._checkpoint_manager.owner_id, - partition_processor._checkpoint_manager.eventhub_name, - partition_processor._checkpoint_manager.partition_id, - partition_processor._checkpoint_manager.consumer_group_name - ) - await partition_processor.process_error(cancelled_error) - await partition_processor.close(reason=CloseReason.SHUTDOWN) - break - except EventHubError as eh_err: - reason = CloseReason.LEASE_LOST if eh_err.error == "link:stolen" else CloseReason.EVENTHUB_EXCEPTION - log.warning( - "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " - "has met an exception receiving events. It's being closed. The exception is %r.", - partition_processor._checkpoint_manager.owner_id, - partition_processor._checkpoint_manager.eventhub_name, - partition_processor._checkpoint_manager.partition_id, - partition_processor._checkpoint_manager.consumer_group_name, - eh_err - ) - await partition_processor.process_error(eh_err) - await partition_processor.close(reason=reason) - break - try: - await partition_processor.process_events(events) - except asyncio.CancelledError as cancelled_error: - log.info( - "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " - "is cancelled.", - partition_processor._checkpoint_manager.owner_id, - partition_processor._checkpoint_manager.eventhub_name, - partition_processor._checkpoint_manager.partition_id, - partition_processor._checkpoint_manager.consumer_group_name - ) - await partition_processor.process_error(cancelled_error) - await partition_processor.close(reason=CloseReason.SHUTDOWN) - break - except Exception as exp: # user code has caused an error - log.warning( - "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " - "has met an exception from user code process_events. It's being closed. The exception is %r.", - partition_processor._checkpoint_manager.owner_id, - partition_processor._checkpoint_manager.eventhub_name, - partition_processor._checkpoint_manager.partition_id, - partition_processor._checkpoint_manager.consumer_group_name, - exp - ) - await partition_processor.process_error(exp) - await partition_processor.close(reason=CloseReason.EVENTHUB_EXCEPTION) - break - # TODO: will review whether to break and close partition processor after user's code has an exception - # TODO: try to inform other EventProcessors to take the partition when this partition is closed in preview 3? - finally: - await partition_consumer.close() + await partition_consumer.close() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py index 10aafc79c492..94729aae1269 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py @@ -4,7 +4,7 @@ # ----------------------------------------------------------------------------------- from typing import List -from abc import ABC, abstractmethod +from typing_extensions import Protocol from enum import Enum from .checkpoint_manager import CheckpointManager @@ -17,16 +17,16 @@ class CloseReason(Enum): EVENTHUB_EXCEPTION = 2 # Exception happens during receiving events -class PartitionProcessor(ABC): +class PartitionProcessor(Protocol): """ PartitionProcessor processes events received from the Azure Event Hubs service. A single instance of a class implementing this abstract class will be created for every partition the associated ~azure.eventhub.eventprocessor.EventProcessor owns. """ - def __init__(self, checkpoint_manager: CheckpointManager): - self._checkpoint_manager = checkpoint_manager + async def initialize(self, checkpoint_manager: CheckpointManager): + pass - async def close(self, reason): + async def close(self, reason, checkpoint_manager: CheckpointManager): """Called when EventProcessor stops processing this PartitionProcessor. There are different reasons to trigger the PartitionProcessor to close. @@ -38,8 +38,7 @@ async def close(self, reason): """ pass - @abstractmethod - async def process_events(self, events: List[EventData]): + async def process_events(self, events: List[EventData], checkpoint_manager: CheckpointManager): """Called when a batch of events have been received. :param events: Received events. @@ -48,7 +47,7 @@ async def process_events(self, events: List[EventData]): """ pass - async def process_error(self, error): + async def process_error(self, error, checkpoint_manager: CheckpointManager): """Called when an error happens :param error: The error that happens. diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py index eb08e970fa89..5e2c3dd45748 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py @@ -6,8 +6,11 @@ import time import uuid import sqlite3 +import logging from .partition_manager import PartitionManager +logger = logging.getLogger(__name__) + def _check_table_name(table_name: str): for c in table_name: @@ -22,6 +25,15 @@ class Sqlite3PartitionManager(PartitionManager): """ + primary_keys_dict = {"eventhub_name": "text", "consumer_group_name": "text", "partition_id": "text"} + other_fields_dict = {"owner_id": "text", "owner_level": "integer", "sequence_number": "integer", "offset": "text", + "last_modified_time": "real", "etag": "text"} + checkpoint_fields = ["sequence_number", "offset"] + fields_dict = {**primary_keys_dict, **other_fields_dict} + primary_keys = list(primary_keys_dict.keys()) + other_fields = list(other_fields_dict.keys()) + fields = primary_keys + other_fields + def __init__(self, db_filename: str = ":memory:", ownership_table: str = "ownership"): """ @@ -34,17 +46,15 @@ def __init__(self, db_filename: str = ":memory:", ownership_table: str = "owners conn = sqlite3.connect(db_filename) c = conn.cursor() try: - c.execute("create table " + ownership_table + - "(eventhub_name text," - "consumer_group_name text," - "owner_id text," - "partition_id text," - "owner_level integer," - "sequence_number integer," - "offset text," - "last_modified_time integer," - "etag text)") + sql = "create table if not exists " + _check_table_name(ownership_table)\ + + "("\ + + ",".join([x[0]+" "+x[1] for x in self.fields_dict.items()])\ + + ", constraint pk_ownership PRIMARY KEY ("\ + + ",".join(self.primary_keys)\ + + "))" + c.execute(sql) except sqlite3.OperationalError: + raise pass finally: c.close() @@ -53,44 +63,55 @@ def __init__(self, db_filename: str = ":memory:", ownership_table: str = "owners async def list_ownership(self, eventhub_name, consumer_group_name): cursor = self.conn.cursor() try: - fields = ["eventhub_name", "consumer_group_name", "owner_id", "partition_id", "owner_level", - "sequence_number", - "offset", "last_modified_time", "etag"] - cursor.execute("select " + ",".join(fields) + + cursor.execute("select " + ",".join(self.fields) + " from "+_check_table_name(self.ownership_table)+" where eventhub_name=? " "and consumer_group_name=?", (eventhub_name, consumer_group_name)) - result_list = [] - - for row in cursor.fetchall(): - d = dict(zip(fields, row)) - result_list.append(d) - return result_list + return [dict(zip(self.fields, row)) for row in cursor.fetchall()] finally: cursor.close() async def claim_ownership(self, partitions): + result = [] cursor = self.conn.cursor() try: for p in partitions: - cursor.execute("select * from " + _check_table_name(self.ownership_table) + - " where eventhub_name=? " - "and consumer_group_name=? " - "and partition_id =?", - (p["eventhub_name"], p["consumer_group_name"], - p["partition_id"])) - if not cursor.fetchall(): - cursor.execute("insert into " + _check_table_name(self.ownership_table) + - " (eventhub_name,consumer_group_name,partition_id,owner_id,owner_level,last_modified_time,etag) " - "values (?,?,?,?,?,?,?)", - (p["eventhub_name"], p["consumer_group_name"], p["partition_id"], p["owner_id"], p["owner_level"], - time.time(), str(uuid.uuid4()) - )) + cursor.execute("select etag from " + _check_table_name(self.ownership_table) + + " where "+ " and ".join([field+"=?" for field in self.primary_keys]), + tuple(p.get(field) for field in self.primary_keys)) + cursor_fetch = cursor.fetchall() + if not cursor_fetch: + p["last_modified_time"] = time.time() + p["etag"] = str(uuid.uuid4()) + try: + fields_without_checkpoint = list(filter(lambda x: x not in self.checkpoint_fields, self.fields)) + sql = "insert into " + _check_table_name(self.ownership_table) + " (" \ + + ",".join(fields_without_checkpoint) \ + + ") values (?,?,?,?,?,?,?)" + cursor.execute(sql, tuple(p.get(field) for field in fields_without_checkpoint)) + except sqlite3.OperationalError as op_err: + logger.info("EventProcessor %r failed to claim partition %r " + "because it was claimed by another EventProcessor at the same time. " + "The Sqlite3 exception is %r", p["owner_id"], p["partition_id"], op_err) + break + else: + result.append(p) else: - cursor.execute("update " + _check_table_name(self.ownership_table) + " set owner_id=?, owner_level=?, last_modified_time=?, etag=? " - "where eventhub_name=? and consumer_group_name=? and partition_id=?", - (p["owner_id"], p["owner_level"], time.time(), str(uuid.uuid4()), - p["eventhub_name"], p["consumer_group_name"], p["partition_id"])) + if p.get("etag") == cursor_fetch[0][0]: + p["last_modified_time"] = time.time() + p["etag"] = str(uuid.uuid4()) + other_fields_without_checkpoint = list(filter(lambda x: x not in self.checkpoint_fields, self.other_fields)) + sql = "update " + _check_table_name(self.ownership_table) + " set "\ + + ','.join([field+"=?" for field in other_fields_without_checkpoint])\ + + " where "\ + + " and ".join([field+"=?" for field in self.primary_keys]) + + cursor.execute(sql, tuple(p.get(field) for field in other_fields_without_checkpoint) + tuple(p.get(field) for field in self.primary_keys)) + result.append(p) + else: + logger.info("EventProcessor %r failed to claim partition %r " + "because it was claimed by another EventProcessor at the same time", p["owner_id"], + p["partition_id"]) self.conn.commit() return partitions finally: From b4b77f9d6bb19fb2f4f7e6fa957c25fe572a4fec Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 22 Aug 2019 01:01:35 -0700 Subject: [PATCH 07/39] EventProcessor Load balancing --- .../eventprocessor/_ownership_manager.py | 45 ++++++++----- .../eventprocessor/event_processor.py | 66 +++++++++++-------- .../eventprocessor/partition_processor.py | 4 +- .../sqlite3_partition_manager.py | 13 +++- .../eventprocessor/event_processor_example.py | 39 ++++------- 5 files changed, 94 insertions(+), 73 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py index 710e71ab8439..40e3419a7c5a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. See License.txt in the project root for license information. # ----------------------------------------------------------------------------------- -from typing import List, Iterable, Any, Dict import time import random import math @@ -34,8 +33,8 @@ async def claim_ownership(self): """Claims ownership for this EventProcessor 1. Retrieves all partition ids of an event hub from azure event hub service 2. Retrieves current ownership list via this EventProcessor's PartitionManager. - 3. Searches claimable partitions for this EventProcessor. Refer to claim_ownership() for details. - 4. Claims the ownership for the claimable partitions + 3. Balances number of ownership. Refer to _balance_ownership() for details. + 4. Claims the ownership for the balanced number of partitions. :return: List[Dict[Any]] """ @@ -53,19 +52,35 @@ async def _retrieve_partition_ids(self): self.all_parition_ids = await self.eventhub_client.get_partition_ids() async def _balance_ownership(self): + """Balances and claims ownership of partitions for this EventProcessor. + The balancing algorithm is: + 1. Find partitions with inactive ownership and partitions that haven never been claimed before + 2. Find the number of active owners, including this EventProcessor, for all partitions. + 3. Calculate the average count of partitions that an owner should own. + (number of partitions // number of active owners) + 4. Calculate the largest allowed count of partitions that an owner can own. + math.ceil(number of partitions / number of active owners). This should be equal or 1 greater than the average count + 5. Adjust the number of partitions owned by this EventProcessor (owner) + a. if this EventProcessor owns more than largest allowed count, abandon one partition + b. if this EventProcessor owns less than average count, add one from the inactive or unclaimed partitions, + or steal one from another owner that has the largest number of ownership among all owners (EventProcessors) + c. Otherwise, no change to the ownership + + The balancing algorithm adjust one partition at a time to gradually build the balanced ownership. + Ownership must be renewed to keep it active. So the returned result includes both existing ownership and + the newly adjusted ownership. + This method balances but doesn't claim ownership. The caller of this method tries to claim the result ownership + list. But it may not successfully claim all of them because of concurrency. Other EventProcessors may happen to + claim a partition at that time. Since balancing and claiming are run in infinite repeatedly, + it achieves balancing among all EventProcessors after some time of running. + + :return: List[Dict[str, Any]], A list of ownership. + """ ownership_list = await self.partition_manager.list_ownership(self.eventhub_client.eh_name, self.consumer_group_name) ownership_dict = dict((x["partition_id"], x) for x in ownership_list) # put the list to dict for fast lookup - ''' - now = time.time() - partition_ids_no_ownership = list(filter(lambda x: x not in ownership_dict, self.all_parition_ids)) - inactive_ownership = filter(lambda x: x["last_modified_time"] + self.ownership_timeout < now, ownership_list) - claimable_partition_ids = partition_ids_no_ownership + [x["partition_id"] for x in inactive_ownership] - active_ownership = list(filter(lambda x: x["last_modified_time"] + self.ownership_timeout >= now, ownership_list)) - active_ownership_count_group_by_owner = Counter([x["owner_id"] for x in active_ownership]) - active_ownership_self = list(filter(lambda x: x["owner_id"] == self.owner_id, active_ownership)) - ''' - claimable_partition_ids = [] - active_ownership_self = [] + + claimable_partition_ids = [] # partitions with inactive ownership and partitions that have never been claimed yet + active_ownership_self = [] # active ownership of this EventProcessor active_ownership_count_group_by_owner = Counter() for partition_id in self.all_parition_ids: ownership = ownership_dict.get(partition_id) @@ -100,7 +115,7 @@ async def _balance_ownership(self): {"partition_id": random_partition_id, "eventhub_name": self.eventhub_client.eh_name, "consumer_group_name": self.consumer_group_name, - "owner_level": 0}) + "owner_level": 0}) # TODO: consider removing owner_level random_chosen_to_claim["owner_id"] = self.owner_id to_claim.append(random_chosen_to_claim) else: # steal from another owner that has the most count diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index c675afd165a4..7e342a369d0d 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -7,15 +7,13 @@ import uuid import asyncio import logging -from enum import Enum from azure.eventhub import EventPosition, EventHubError from azure.eventhub.aio import EventHubClient from .checkpoint_manager import CheckpointManager from .partition_manager import PartitionManager from ._ownership_manager import OwnershipManager -from .partition_processor import PartitionProcessor, CloseReason -from .utils import get_running_loop +from .partition_processor import CloseReason, PartitionProcessor log = logging.getLogger(__name__) @@ -29,20 +27,21 @@ class EventProcessor(object): It provides the user a convenient way to receive events from multiple partitions and save checkpoints. If multiple EventProcessors are running for an event hub, they will automatically balance load. - This load balancing won't be available until preview 3. Example: .. code-block:: python - class MyPartitionProcessor(PartitionProcessor): - async def process_events(self, events): - if events: - # do something sync or async to process the events - await self._checkpoint_manager.update_checkpoint(events[-1].offset, events[-1].sequence_number) - import asyncio from azure.eventhub.aio import EventHubClient from azure.eventhub.eventprocessor import EventProcessor, PartitionProcessor, Sqlite3PartitionManager + + class MyPartitionProcessor(object): + async def process_events(self, events, checkpoint_manager): + if events: + # do something sync or async to process the events + await checkpoint_manager.update_checkpoint(events[-1].offset, events[-1].sequence_number) + + client = EventHubClient.from_connection_string("", receive_timeout=5, retry_total=3) partition_manager = Sqlite3PartitionManager() try: @@ -55,7 +54,7 @@ async def process_events(self, events): """ def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, - partition_processor_factory, + partition_processor_factory: Callable[..., PartitionProcessor], partition_manager: PartitionManager, **kwargs): """ Instantiate an EventProcessor. @@ -73,6 +72,8 @@ def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, :type partition_manager: Class implementing the ~azure.eventhub.eventprocessor.PartitionManager. :param initial_event_position: The offset to start a partition consumer if the partition has no checkpoint yet. :type initial_event_position: int or str + :param polling_interval: The interval between any two pollings of balancing and claiming + :type float """ @@ -98,9 +99,12 @@ def __repr__(self): async def start(self): """Start the EventProcessor. - 1. retrieve the partition ids from eventhubs. - 2. claim partition ownership of these partitions. - 3. repeatedly call EvenHubConsumer.receive() to retrieve events and call user defined PartitionProcessor.process_events(). + 1. Calls the OwnershipManager to keep claiming and balancing ownership of partitions in an + infinitely loop until self.stop() is called. + 2. Cancels tasks for partitions that are no longer owned by this EventProcessor + 3. Creates tasks for partitions that are newly claimed by this EventProcessor + 4. Keeps tasks running for partitions that haven't changed ownership + 5. Each task repeatedly calls EvenHubConsumer.receive() to retrieve events and call user defined partition processor :return: None @@ -111,22 +115,23 @@ async def start(self): self._running = True while self._running: claimed_ownership_list = await ownership_manager.claim_ownership() - claimed_partition_ids = [x["partition_id"] for x in claimed_ownership_list] - to_cancel_list = self._tasks.keys() - claimed_partition_ids - if to_cancel_list: - self._cancel_tasks_for_partitions(to_cancel_list) - log.info("EventProcesor %r has cancelled partitions %r", self._id, to_cancel_list) - - if claimed_partition_ids: + if claimed_ownership_list: + claimed_partition_ids = [x["partition_id"] for x in claimed_ownership_list] + to_cancel_list = self._tasks.keys() - claimed_partition_ids self._create_tasks_for_claimed_ownership(claimed_ownership_list) else: log.warning("EventProcessor %r hasn't claimed an ownership. It keeps claiming.", self._id) + to_cancel_list = self._tasks.keys() + if to_cancel_list: + self._cancel_tasks_for_partitions(to_cancel_list) + log.info("EventProcesor %r has cancelled partitions %r", self._id, to_cancel_list) await asyncio.sleep(self._polling_interval) async def stop(self): - """Stop all the partition consumer + """Stop claiming ownership and all the partition consumers owned by this EventProcessor - This method cancels tasks that are running EventHubConsumer.receive() for the partitions owned by this EventProcessor. + This method stops claiming ownership of owned partitions and cancels tasks that are running + EventHubConsumer.receive() for the partitions owned by this EventProcessor. :return: None @@ -152,6 +157,13 @@ def _create_tasks_for_claimed_ownership(self, to_claim_ownership_list): async def _receive(self, ownership): log.info("start ownership, %r", ownership) + partition_processor = self._partition_processor_factory() + if not hasattr(partition_processor, "process_events"): + log.error( + "Fatal error: a partition processor should at least have method process_events(events, checkpoint_manager). EventProcessor will stop.") + await self.stop() + raise TypeError("Partition processor must has method process_events(events, checkpoint_manager") + partition_consumer = self._eventhub_client.create_consumer(ownership["consumer_group_name"], ownership["partition_id"], EventPosition(ownership.get("offset", self._initial_event_position)) @@ -161,8 +173,6 @@ async def _receive(self, ownership): ownership["consumer_group_name"], ownership["owner_id"], self._partition_manager) - partition_processor = self._partition_processor_factory() - async def initialize(): if hasattr(partition_processor, "initialize"): await partition_processor.initialize(checkpoint_manager) @@ -192,6 +202,7 @@ async def close(close_reason): ) await process_error(cancelled_error) await close(CloseReason.SHUTDOWN) + # TODO: release the ownership immediately via partition manager break except EventHubError as eh_err: reason = CloseReason.LEASE_LOST if eh_err.error == "link:stolen" else CloseReason.EVENTHUB_EXCEPTION @@ -205,11 +216,10 @@ async def close(close_reason): eh_err ) await process_error(eh_err) - await close(reason) + await close(reason) # An EventProcessor will pick up this partition again after the ownership is released + # TODO: release the ownership immediately via partition manager break except Exception as exp: log.warning(exp) - # TODO: will review whether to break and close partition processor after user's code has an exception - # TODO: try to inform other EventProcessors to take the partition when this partition is closed in preview 3? finally: await partition_consumer.close() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py index 94729aae1269..60a95b99e628 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py @@ -23,7 +23,7 @@ class PartitionProcessor(Protocol): implementing this abstract class will be created for every partition the associated ~azure.eventhub.eventprocessor.EventProcessor owns. """ - async def initialize(self, checkpoint_manager: CheckpointManager): + async def initialize(self): pass async def close(self, reason, checkpoint_manager: CheckpointManager): @@ -45,7 +45,7 @@ async def process_events(self, events: List[EventData], checkpoint_manager: Chec :type events: list[~azure.eventhub.common.EventData] """ - pass + raise NotImplementedError async def process_error(self, error, checkpoint_manager: CheckpointManager): """Called when an error happens diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py index 5e2c3dd45748..bf4c46c95eb9 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py @@ -121,9 +121,16 @@ async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_ offset, sequence_number): cursor = self.conn.cursor() try: - cursor.execute("update " + _check_table_name(self.ownership_table) + " set offset=?, sequence_number=? where eventhub_name=? and consumer_group_name=? and partition_id=?", - (offset, sequence_number, eventhub_name, consumer_group_name, partition_id)) - self.conn.commit() + cursor.execute("select owner_id from " + _check_table_name(self.ownership_table) + " where eventhub_name=? and consumer_group_name=? and partition_id=?", + (eventhub_name, consumer_group_name, partition_id)) + cursor_fetch = cursor.fetchall() + if cursor_fetch and owner_id == cursor_fetch[0][0]: + cursor.execute("update " + _check_table_name(self.ownership_table) + " set offset=?, sequence_number=? where eventhub_name=? and consumer_group_name=? and partition_id=?", + (offset, sequence_number, eventhub_name, consumer_group_name, partition_id)) + self.conn.commit() + else: + logger.info("EventProcessor couldn't checkpoint to partition %r because it no longer has the ownership", partition_id) + finally: cursor.close() diff --git a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py index 8c4c9ced7d29..a2165eb81110 100644 --- a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py +++ b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py @@ -3,7 +3,6 @@ import os from azure.eventhub.aio import EventHubClient from azure.eventhub.eventprocessor import EventProcessor -from azure.eventhub.eventprocessor import PartitionProcessor from azure.eventhub.eventprocessor import Sqlite3PartitionManager RECEIVE_TIMEOUT = 5 # timeout in seconds for a receiving operation. 0 or None means no timeout @@ -18,33 +17,23 @@ async def do_operation(event): print(event) -class MyPartitionProcessor(PartitionProcessor): - def __init__(self, checkpoint_manager): - super(MyPartitionProcessor, self).__init__(checkpoint_manager) - - async def process_events(self, events): +class MyPartitionProcessor(object): + async def process_events(self, events, checkpoint_manager): if events: await asyncio.gather(*[do_operation(event) for event in events]) - await self._checkpoint_manager.update_checkpoint(events[-1].offset, events[-1].sequence_number) - - -def partition_processor_factory(checkpoint_manager): - return MyPartitionProcessor(checkpoint_manager) - - -async def run_awhile(duration): - client = EventHubClient.from_connection_string(CONNECTION_STR, receive_timeout=RECEIVE_TIMEOUT, - retry_total=RETRY_TOTAL) - partition_manager = Sqlite3PartitionManager() - event_processor = EventProcessor(client, "$default", MyPartitionProcessor, partition_manager) - try: - asyncio.ensure_future(event_processor.start()) - await asyncio.sleep(duration) - await event_processor.stop() - finally: - await partition_manager.close() + await checkpoint_manager.update_checkpoint(events[-1].offset, events[-1].sequence_number) + else: + print("empty events received", "partition:", checkpoint_manager.partition_id) if __name__ == '__main__': loop = asyncio.get_event_loop() - loop.run_until_complete(run_awhile(60)) + client = EventHubClient.from_connection_string(CONNECTION_STR, receive_timeout=RECEIVE_TIMEOUT, retry_total=RETRY_TOTAL) + partition_manager = Sqlite3PartitionManager(db_filename="eventprocessor_test_db") + event_processor = EventProcessor(client, "$default", MyPartitionProcessor, partition_manager, polling_interval=1) + try: + loop.run_until_complete(event_processor.start()) + except KeyboardInterrupt: + loop.run_until_complete(event_processor.stop()) + finally: + loop.stop() From 1787fdd6ff1c9339e67a6bf76c5aa4513506f2ff Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 22 Aug 2019 18:10:35 -0700 Subject: [PATCH 08/39] small changes from bryan's review --- .../azure/eventhub/eventprocessor/event_processor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index 7e342a369d0d..2db6feb4bbf1 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -137,8 +137,8 @@ async def stop(self): """ self._running = False - for i in range(len(self._tasks)): - task = self._tasks.popitem()[1] + for _ in range(len(self._tasks)): + _, task = self._tasks.popitem() task.cancel() log.info("EventProcessor %r has been cancelled", self._id) await asyncio.sleep(2) # give some time to finish after cancelled @@ -205,7 +205,7 @@ async def close(close_reason): # TODO: release the ownership immediately via partition manager break except EventHubError as eh_err: - reason = CloseReason.LEASE_LOST if eh_err.error == "link:stolen" else CloseReason.EVENTHUB_EXCEPTION + reason = CloseReason.OWNERSHIP_LOST if eh_err.error == "link:stolen" else CloseReason.EVENTHUB_EXCEPTION log.warning( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "has met an exception receiving events. It's being closed. The exception is %r.", From c2d0155c58d3e74f28931538c44d4e9f7af4d25d Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 22 Aug 2019 18:21:05 -0700 Subject: [PATCH 09/39] remove checkpoint manager from initialize --- .../azure/eventhub/eventprocessor/event_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index 2db6feb4bbf1..e808d603c457 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -175,7 +175,7 @@ async def _receive(self, ownership): self._partition_manager) async def initialize(): if hasattr(partition_processor, "initialize"): - await partition_processor.initialize(checkpoint_manager) + await partition_processor.initialize() async def process_error(err): if hasattr(partition_processor, "process_error"): From 10743856d30facb239bc4b14e54e6a5ec38a934b Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 22 Aug 2019 18:41:18 -0700 Subject: [PATCH 10/39] small changes --- .../eventprocessor/event_processor.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index e808d603c457..23178deba76a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -164,15 +164,19 @@ async def _receive(self, ownership): await self.stop() raise TypeError("Partition processor must has method process_events(events, checkpoint_manager") - partition_consumer = self._eventhub_client.create_consumer(ownership["consumer_group_name"], - ownership["partition_id"], - EventPosition(ownership.get("offset", self._initial_event_position)) - ) - checkpoint_manager = CheckpointManager(ownership["partition_id"], - ownership["eventhub_name"], - ownership["consumer_group_name"], - ownership["owner_id"], - self._partition_manager) + partition_consumer = self._eventhub_client.create_consumer( + ownership["consumer_group_name"], + ownership["partition_id"], + EventPosition(ownership.get("offset", self._initial_event_position)) + ) + checkpoint_manager = CheckpointManager( + ownership["partition_id"], + ownership["eventhub_name"], + ownership["consumer_group_name"], + ownership["owner_id"], + self._partition_manager + ) + async def initialize(): if hasattr(partition_processor, "initialize"): await partition_processor.initialize() From 386baf011acce33839479ad8ecbf1278e641a993 Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 29 Aug 2019 12:14:28 -0700 Subject: [PATCH 11/39] Fix code review feedback --- .../eventprocessor/_ownership_manager.py | 76 +++++++++---------- .../eventprocessor/checkpoint_manager.py | 9 ++- .../eventprocessor/event_processor.py | 17 +++-- .../azure/eventhub/eventprocessor/utils.py | 2 +- 4 files changed, 54 insertions(+), 50 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py index 40e3419a7c5a..5a49a1d093e9 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py @@ -6,8 +6,10 @@ import time import random import math +import collections from collections import Counter from azure.eventhub.aio import EventHubClient +from .partition_manager import PartitionManager class OwnershipManager(object): @@ -20,13 +22,16 @@ class OwnershipManager(object): to the claimed ownership. """ - def __init__(self, event_processor, eventhub_client: EventHubClient, ownership_timeout: int): - self.all_parition_ids = [] + def __init__( + self, eventhub_client: EventHubClient, consumer_group_name: str, owner_id: str, + partition_manager: PartitionManager, ownership_timeout: int + ): + self.cached_parition_ids = [] self.eventhub_client = eventhub_client self.eventhub_name = eventhub_client.eh_name - self.consumer_group_name = event_processor._consumer_group_name - self.owner_id = event_processor._id - self.partition_manager = event_processor._partition_manager + self.consumer_group_name = consumer_group_name + self.owner_id = owner_id + self.partition_manager = partition_manager self.ownership_timeout = ownership_timeout async def claim_ownership(self): @@ -38,10 +43,10 @@ async def claim_ownership(self): :return: List[Dict[Any]] """ - if not self.all_parition_ids: + if not self.cached_parition_ids: await self._retrieve_partition_ids() to_claim = await self._balance_ownership() - claimed_list = await self._claim_ownership(to_claim) + claimed_list = await self.partition_manager.claim_ownership(to_claim) if to_claim else None return claimed_list async def _retrieve_partition_ids(self): @@ -49,7 +54,7 @@ async def _retrieve_partition_ids(self): :return: List[str] """ - self.all_parition_ids = await self.eventhub_client.get_partition_ids() + self.cached_parition_ids = await self.eventhub_client.get_partition_ids() async def _balance_ownership(self): """Balances and claims ownership of partitions for this EventProcessor. @@ -59,11 +64,12 @@ async def _balance_ownership(self): 3. Calculate the average count of partitions that an owner should own. (number of partitions // number of active owners) 4. Calculate the largest allowed count of partitions that an owner can own. - math.ceil(number of partitions / number of active owners). This should be equal or 1 greater than the average count + math.ceil(number of partitions / number of active owners). + This should be equal or 1 greater than the average count 5. Adjust the number of partitions owned by this EventProcessor (owner) a. if this EventProcessor owns more than largest allowed count, abandon one partition b. if this EventProcessor owns less than average count, add one from the inactive or unclaimed partitions, - or steal one from another owner that has the largest number of ownership among all owners (EventProcessors) + or steal one from another owner that has the largest number of ownership among all owners (EventProcessors) c. Otherwise, no change to the ownership The balancing algorithm adjust one partition at a time to gradually build the balanced ownership. @@ -76,30 +82,26 @@ async def _balance_ownership(self): :return: List[Dict[str, Any]], A list of ownership. """ - ownership_list = await self.partition_manager.list_ownership(self.eventhub_client.eh_name, self.consumer_group_name) + ownership_list = await self.partition_manager.list_ownership( + self.eventhub_client.eh_name, self.consumer_group_name + ) + now = time.time() ownership_dict = dict((x["partition_id"], x) for x in ownership_list) # put the list to dict for fast lookup - - claimable_partition_ids = [] # partitions with inactive ownership and partitions that have never been claimed yet - active_ownership_self = [] # active ownership of this EventProcessor - active_ownership_count_group_by_owner = Counter() - for partition_id in self.all_parition_ids: - ownership = ownership_dict.get(partition_id) - if not ownership: # no ownership found for this partition. So it is claimable - claimable_partition_ids.append(partition_id) - else: - last_modified_time = ownership["last_modified_time"] - owner_id = ownership["owner_id"] - now = time.time() - if now > self.ownership_timeout + last_modified_time: # ownership timed out. So it is claimable - claimable_partition_ids.append(partition_id) - else: # the ownership is still active - if owner_id == self.owner_id: # partition is actively owned by this running EventProcessor - active_ownership_self.append(ownership) - active_ownership_count_group_by_owner[owner_id] = active_ownership_count_group_by_owner.get(owner_id, 0) + 1 # all active owners + not_owned_partition_ids = [pid for pid in self.cached_parition_ids if pid not in ownership_dict] + timed_out_partition_ids = [ownership["partition_id"] for ownership in ownership_list + if ownership["last_modified_time"] + self.ownership_timeout < now] + claimable_partition_ids = not_owned_partition_ids + timed_out_partition_ids + active_ownership = [ownership for ownership in ownership_list + if ownership["last_modified_time"] + self.ownership_timeout >= now] + active_ownership_by_owner = collections.defaultdict(list) + for ownership in active_ownership: + active_ownership_by_owner[ownership["owner_id"]].append(ownership) + active_ownership_self = active_ownership_by_owner[self.owner_id] # calculate expected count per owner - all_partition_count = len(self.all_parition_ids) - owners_count = len(active_ownership_count_group_by_owner) + (1 if self.owner_id not in active_ownership_count_group_by_owner else 0) + all_partition_count = len(self.cached_parition_ids) + owners_count = len(active_ownership_by_owner) + \ + (0 if self.owner_id in active_ownership_by_owner else 1) expected_count_per_owner = all_partition_count // owners_count most_count_allowed_per_owner = math.ceil(all_partition_count / owners_count) # end of calculating expected count per owner @@ -119,17 +121,11 @@ async def _balance_ownership(self): random_chosen_to_claim["owner_id"] = self.owner_id to_claim.append(random_chosen_to_claim) else: # steal from another owner that has the most count + active_ownership_count_group_by_owner = Counter( + (x, len(y)) for x, y in active_ownership_by_owner.items()) most_frequent_owner_id = active_ownership_count_group_by_owner.most_common(1)[0][0] # randomly choose a partition to steal from the most_frequent_owner - to_steal_partition = random.choice(list(filter(lambda x: x["owner_id"] == most_frequent_owner_id, - ownership_list))) + to_steal_partition = random.choice(active_ownership_by_owner[most_frequent_owner_id]) to_steal_partition["owner_id"] = self.owner_id to_claim.append(to_steal_partition) return to_claim - - async def _claim_ownership(self, ownership_list): - if ownership_list: - claimed_list = await self.partition_manager.claim_ownership(ownership_list) - return claimed_list - else: - return None diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py index 2714f675b28c..f9d9e46e7ed8 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py @@ -13,7 +13,8 @@ class CheckpointManager(object): The interaction with the chosen storage service is done via ~azure.eventhub.eventprocessor.PartitionManager. """ - def __init__(self, partition_id: str, eventhub_name: str, consumer_group_name: str, owner_id: str, partition_manager: PartitionManager): + def __init__(self, partition_id: str, eventhub_name: str, consumer_group_name: str, + owner_id: str, partition_manager: PartitionManager): self.partition_id = partition_id self.eventhub_name = eventhub_name self.consumer_group_name = consumer_group_name @@ -22,11 +23,13 @@ def __init__(self, partition_id: str, eventhub_name: str, consumer_group_name: s async def update_checkpoint(self, offset, sequence_number=None): """ - Updates the checkpoint using the given information for the associated partition and consumer group in the chosen storage service. + Updates the checkpoint using the given information for the associated partition and consumer group in the + chosen storage service. :param offset: The offset of the ~azure.eventhub.EventData the new checkpoint will be associated with. :type offset: str - :param sequence_number: The sequence_number of the ~azure.eventhub.EventData the new checkpoint will be associated with. + :param sequence_number: The sequence_number of the ~azure.eventhub.EventData the new checkpoint will be + associated with. :type sequence_number: int :return: None """ diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index 23178deba76a..dfed927015c1 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -53,9 +53,11 @@ async def process_events(self, events, checkpoint_manager): await partition_manager.close() """ - def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, - partition_processor_factory: Callable[..., PartitionProcessor], - partition_manager: PartitionManager, **kwargs): + def __init__( + self, eventhub_client: EventHubClient, consumer_group_name: str, + partition_processor_factory: Callable[..., PartitionProcessor], + partition_manager: PartitionManager, **kwargs + ): """ Instantiate an EventProcessor. @@ -88,7 +90,8 @@ def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, self._max_batch_size = eventhub_client.config.max_batch_size self._receive_timeout = eventhub_client.config.receive_timeout self._polling_interval = kwargs.get("polling_interval", 10) - self._ownership_timeout = self._polling_interval * 2 # TODO: Team haven't decided if this is a separate argument + self._ownership_timeout = self._polling_interval * 2 + # TODO: Team haven't decided if this is a separate argument self._tasks = {} # type: Dict[str, asyncio.Task] self._id = str(uuid.uuid4()) self._running = False @@ -104,13 +107,15 @@ async def start(self): 2. Cancels tasks for partitions that are no longer owned by this EventProcessor 3. Creates tasks for partitions that are newly claimed by this EventProcessor 4. Keeps tasks running for partitions that haven't changed ownership - 5. Each task repeatedly calls EvenHubConsumer.receive() to retrieve events and call user defined partition processor + 5. Each task repeatedly calls EvenHubConsumer.receive() to retrieve events and + call user defined partition processor :return: None """ log.info("EventProcessor %r is being started", self._id) - ownership_manager = OwnershipManager(self, self._eventhub_client, self._ownership_timeout) + ownership_manager = OwnershipManager(self._eventhub_client, self._consumer_group_name, self._id, + self._partition_manager, self._ownership_timeout) if not self._running: self._running = True while self._running: diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/utils.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/utils.py index 368cd8469f10..1d8add0f49a0 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/utils.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/utils.py @@ -10,7 +10,7 @@ def get_running_loop(): try: return asyncio.get_running_loop() except AttributeError: # 3.5 / 3.6 - loop = asyncio._get_running_loop() # pylint: disable=protected-access + loop = asyncio._get_running_loop() # pylint: disable=protected-access, no-member if loop is None: raise RuntimeError('No running event loop') return loop From c126bea5d4053c7c720dea12e8826e0f36f0a5e5 Mon Sep 17 00:00:00 2001 From: Azure SDK for Python bot Date: Fri, 30 Aug 2019 00:06:32 +0000 Subject: [PATCH 12/39] Packaging update of azure-mgmt-datalake-analytics --- .../azure-mgmt-datalake-analytics/MANIFEST.in | 5 +++++ .../azure-mgmt-datalake-analytics/README.rst | 21 +------------------ .../azure-mgmt-datalake-analytics/setup.py | 2 +- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/sdk/datalake/azure-mgmt-datalake-analytics/MANIFEST.in b/sdk/datalake/azure-mgmt-datalake-analytics/MANIFEST.in index bb37a2723dae..c0cf549166ee 100644 --- a/sdk/datalake/azure-mgmt-datalake-analytics/MANIFEST.in +++ b/sdk/datalake/azure-mgmt-datalake-analytics/MANIFEST.in @@ -1 +1,6 @@ +recursive-include tests *.py *.yaml include *.rst +include azure/__init__.py +include azure/mgmt/__init__.py +include azure/mgmt/datalake/__init__.py + diff --git a/sdk/datalake/azure-mgmt-datalake-analytics/README.rst b/sdk/datalake/azure-mgmt-datalake-analytics/README.rst index 756ded7f07a9..fb18ab8d347d 100644 --- a/sdk/datalake/azure-mgmt-datalake-analytics/README.rst +++ b/sdk/datalake/azure-mgmt-datalake-analytics/README.rst @@ -6,7 +6,7 @@ This is the Microsoft Azure Data Lake Analytics Management Client Library. Azure Resource Manager (ARM) is the next generation of management APIs that replace the old Azure Service Management (ASM). -This package has been tested with Python 2.7, 3.4, 3.5, 3.6 and 3.7. +This package has been tested with Python 2.7, 3.5, 3.6 and 3.7. For the older Azure Service Management (ASM) libraries, see `azure-servicemanagement-legacy `__ library. @@ -14,25 +14,6 @@ For the older Azure Service Management (ASM) libraries, see For a more complete set of Azure libraries, see the `azure `__ bundle package. -Compatibility -============= - -**IMPORTANT**: If you have an earlier version of the azure package -(version < 1.0), you should uninstall it before installing this package. - -You can check the version using pip: - -.. code:: shell - - pip freeze - -If you see azure==0.11.0 (or any version below 1.0), uninstall it first: - -.. code:: shell - - pip uninstall azure - - Usage ===== diff --git a/sdk/datalake/azure-mgmt-datalake-analytics/setup.py b/sdk/datalake/azure-mgmt-datalake-analytics/setup.py index 68f340388091..4ba2e4e83639 100644 --- a/sdk/datalake/azure-mgmt-datalake-analytics/setup.py +++ b/sdk/datalake/azure-mgmt-datalake-analytics/setup.py @@ -53,6 +53,7 @@ version=version, description='Microsoft Azure {} Client Library for Python'.format(PACKAGE_PPRINT_NAME), long_description=readme + '\n\n' + history, + long_description_content_type='text/x-rst', license='MIT License', author='Microsoft Corporation', author_email='azpysdkhelp@microsoft.com', @@ -63,7 +64,6 @@ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', From 40c7f03b3940069479340effe10afde00c41cbb4 Mon Sep 17 00:00:00 2001 From: Azure SDK for Python bot Date: Fri, 30 Aug 2019 00:06:33 +0000 Subject: [PATCH 13/39] Packaging update of azure-loganalytics --- .../azure-loganalytics/MANIFEST.in | 3 ++- .../azure-loganalytics/README.rst | 21 +------------------ sdk/loganalytics/azure-loganalytics/setup.py | 3 ++- 3 files changed, 5 insertions(+), 22 deletions(-) diff --git a/sdk/loganalytics/azure-loganalytics/MANIFEST.in b/sdk/loganalytics/azure-loganalytics/MANIFEST.in index 88830671bc4d..74c0ac7e42a1 100644 --- a/sdk/loganalytics/azure-loganalytics/MANIFEST.in +++ b/sdk/loganalytics/azure-loganalytics/MANIFEST.in @@ -1,3 +1,4 @@ +recursive-include tests *.py *.yaml include *.rst +include azure/__init__.py -include azure/__init__.py \ No newline at end of file diff --git a/sdk/loganalytics/azure-loganalytics/README.rst b/sdk/loganalytics/azure-loganalytics/README.rst index f2da77db1eca..78bf6021ad9e 100644 --- a/sdk/loganalytics/azure-loganalytics/README.rst +++ b/sdk/loganalytics/azure-loganalytics/README.rst @@ -3,30 +3,11 @@ Microsoft Azure SDK for Python This is the Microsoft Azure Log Analytics Client Library. -This package has been tested with Python 2.7, 3.4, 3.5, 3.6 and 3.7. +This package has been tested with Python 2.7, 3.5, 3.6 and 3.7. For a more complete set of Azure libraries, see the `azure `__ bundle package. -Compatibility -============= - -**IMPORTANT**: If you have an earlier version of the azure package -(version < 1.0), you should uninstall it before installing this package. - -You can check the version using pip: - -.. code:: shell - - pip freeze - -If you see azure==0.11.0 (or any version below 1.0), uninstall it first: - -.. code:: shell - - pip uninstall azure - - Usage ===== diff --git a/sdk/loganalytics/azure-loganalytics/setup.py b/sdk/loganalytics/azure-loganalytics/setup.py index 87ff80807710..52e41c6df814 100644 --- a/sdk/loganalytics/azure-loganalytics/setup.py +++ b/sdk/loganalytics/azure-loganalytics/setup.py @@ -53,6 +53,7 @@ version=version, description='Microsoft Azure {} Client Library for Python'.format(PACKAGE_PPRINT_NAME), long_description=readme + '\n\n' + history, + long_description_content_type='text/x-rst', license='MIT License', author='Microsoft Corporation', author_email='azpysdkhelp@microsoft.com', @@ -63,7 +64,6 @@ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', @@ -77,6 +77,7 @@ ]), install_requires=[ 'msrest>=0.5.0', + 'msrestazure>=0.4.32,<2.0.0', 'azure-common~=1.1', ], extras_require={ From cf22c7c2e6f83d4e31a741c190ab63e19d417cc2 Mon Sep 17 00:00:00 2001 From: Azure SDK for Python bot Date: Fri, 30 Aug 2019 00:06:34 +0000 Subject: [PATCH 14/39] Packaging update of azure-mgmt-storage --- sdk/storage/azure-mgmt-storage/README.rst | 2 +- sdk/storage/azure-mgmt-storage/setup.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sdk/storage/azure-mgmt-storage/README.rst b/sdk/storage/azure-mgmt-storage/README.rst index e8a573239aa7..5b9e52dc5b2d 100644 --- a/sdk/storage/azure-mgmt-storage/README.rst +++ b/sdk/storage/azure-mgmt-storage/README.rst @@ -6,7 +6,7 @@ This is the Microsoft Azure Storage Management Client Library. Azure Resource Manager (ARM) is the next generation of management APIs that replace the old Azure Service Management (ASM). -This package has been tested with Python 2.7, 3.4, 3.5, 3.6 and 3.7. +This package has been tested with Python 2.7, 3.5, 3.6 and 3.7. For the older Azure Service Management (ASM) libraries, see `azure-servicemanagement-legacy `__ library. diff --git a/sdk/storage/azure-mgmt-storage/setup.py b/sdk/storage/azure-mgmt-storage/setup.py index 8bef269c261e..8609a1e171a7 100644 --- a/sdk/storage/azure-mgmt-storage/setup.py +++ b/sdk/storage/azure-mgmt-storage/setup.py @@ -64,7 +64,6 @@ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', From fa804f4d087823e82e118358578d4050fa9c8b40 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 30 Aug 2019 09:18:25 -0700 Subject: [PATCH 15/39] code review fixes and pylint error --- .../azure/eventhub/eventprocessor/__init__.py | 4 +- .../eventprocessor/_ownership_manager.py | 27 ++++---- .../eventprocessor/checkpoint_manager.py | 4 +- .../eventprocessor/event_processor.py | 68 ++++++++----------- .../eventprocessor/partition_manager.py | 11 +-- .../eventprocessor/partition_processor.py | 19 ++++-- .../sqlite3_partition_manager.py | 20 +++--- .../eventprocessor/event_processor_example.py | 4 +- 8 files changed, 80 insertions(+), 77 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py index f4b48afac6f3..6280a03fce12 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py @@ -4,6 +4,7 @@ # ----------------------------------------------------------------------------------- from .event_processor import EventProcessor +from .checkpoint_manager import CheckpointManager from .partition_processor import PartitionProcessor, CloseReason from .partition_manager import PartitionManager from .sqlite3_partition_manager import Sqlite3PartitionManager @@ -13,5 +14,6 @@ 'EventProcessor', 'PartitionProcessor', 'PartitionManager', + 'CheckpointManager', 'Sqlite3PartitionManager', -] \ No newline at end of file +] diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py index 5a49a1d093e9..e0c3d213c63a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py @@ -6,8 +6,8 @@ import time import random import math -import collections -from collections import Counter +from typing import List +from collections import Counter, defaultdict from azure.eventhub.aio import EventHubClient from .partition_manager import PartitionManager @@ -26,7 +26,7 @@ def __init__( self, eventhub_client: EventHubClient, consumer_group_name: str, owner_id: str, partition_manager: PartitionManager, ownership_timeout: int ): - self.cached_parition_ids = [] + self.cached_parition_ids = [] # type: List[str] self.eventhub_client = eventhub_client self.eventhub_name = eventhub_client.eh_name self.consumer_group_name = consumer_group_name @@ -45,7 +45,7 @@ async def claim_ownership(self): """ if not self.cached_parition_ids: await self._retrieve_partition_ids() - to_claim = await self._balance_ownership() + to_claim = await self._balance_ownership(self.cached_parition_ids) claimed_list = await self.partition_manager.claim_ownership(to_claim) if to_claim else None return claimed_list @@ -56,7 +56,7 @@ async def _retrieve_partition_ids(self): """ self.cached_parition_ids = await self.eventhub_client.get_partition_ids() - async def _balance_ownership(self): + async def _balance_ownership(self, all_partition_ids): """Balances and claims ownership of partitions for this EventProcessor. The balancing algorithm is: 1. Find partitions with inactive ownership and partitions that haven never been claimed before @@ -87,19 +87,19 @@ async def _balance_ownership(self): ) now = time.time() ownership_dict = dict((x["partition_id"], x) for x in ownership_list) # put the list to dict for fast lookup - not_owned_partition_ids = [pid for pid in self.cached_parition_ids if pid not in ownership_dict] + not_owned_partition_ids = [pid for pid in all_partition_ids if pid not in ownership_dict] timed_out_partition_ids = [ownership["partition_id"] for ownership in ownership_list if ownership["last_modified_time"] + self.ownership_timeout < now] claimable_partition_ids = not_owned_partition_ids + timed_out_partition_ids active_ownership = [ownership for ownership in ownership_list if ownership["last_modified_time"] + self.ownership_timeout >= now] - active_ownership_by_owner = collections.defaultdict(list) + active_ownership_by_owner = defaultdict(list) for ownership in active_ownership: active_ownership_by_owner[ownership["owner_id"]].append(ownership) active_ownership_self = active_ownership_by_owner[self.owner_id] # calculate expected count per owner - all_partition_count = len(self.cached_parition_ids) + all_partition_count = len(all_partition_ids) owners_count = len(active_ownership_by_owner) + \ (0 if self.owner_id in active_ownership_by_owner else 1) expected_count_per_owner = all_partition_count // owners_count @@ -109,20 +109,21 @@ async def _balance_ownership(self): to_claim = active_ownership_self if len(active_ownership_self) > most_count_allowed_per_owner: # needs to abandon a partition to_claim.pop() # abandon one partition if owned too many - # TODO: Release a ownership immediately so other EventProcessors won't need to wait it to timeout - elif len(active_ownership_self) < expected_count_per_owner: # Either claims an inactive partition, or steals from other owners + # TODO: Release an ownership immediately so other EventProcessors won't need to wait it to timeout + elif len(active_ownership_self) < expected_count_per_owner: + # Either claims an inactive partition, or steals from other owners if claimable_partition_ids: # claim an inactive partition if there is random_partition_id = random.choice(claimable_partition_ids) random_chosen_to_claim = ownership_dict.get(random_partition_id, {"partition_id": random_partition_id, "eventhub_name": self.eventhub_client.eh_name, - "consumer_group_name": self.consumer_group_name, - "owner_level": 0}) # TODO: consider removing owner_level + "consumer_group_name": self.consumer_group_name + }) random_chosen_to_claim["owner_id"] = self.owner_id to_claim.append(random_chosen_to_claim) else: # steal from another owner that has the most count active_ownership_count_group_by_owner = Counter( - (x, len(y)) for x, y in active_ownership_by_owner.items()) + dict((x, len(y)) for x, y in active_ownership_by_owner.items())) most_frequent_owner_id = active_ownership_count_group_by_owner.most_common(1)[0][0] # randomly choose a partition to steal from the most_frequent_owner to_steal_partition = random.choice(active_ownership_by_owner[most_frequent_owner_id]) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py index f9d9e46e7ed8..3be0c5d945c3 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py @@ -13,8 +13,8 @@ class CheckpointManager(object): The interaction with the chosen storage service is done via ~azure.eventhub.eventprocessor.PartitionManager. """ - def __init__(self, partition_id: str, eventhub_name: str, consumer_group_name: str, - owner_id: str, partition_manager: PartitionManager): + def __init__(self, eventhub_name: str, consumer_group_name: str, + partition_id: str, owner_id: str, partition_manager: PartitionManager): self.partition_id = partition_id self.eventhub_name = eventhub_name self.consumer_group_name = consumer_group_name diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index dfed927015c1..b300b7bd0a1c 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -20,7 +20,7 @@ OWNER_LEVEL = 0 -class EventProcessor(object): +class EventProcessor(object): # pylint:disable=too-many-instance-attributes """ An EventProcessor constantly receives events from all partitions of the Event Hub in the context of a given consumer group. The received data will be sent to PartitionProcessor to be processed. @@ -163,72 +163,62 @@ def _create_tasks_for_claimed_ownership(self, to_claim_ownership_list): async def _receive(self, ownership): log.info("start ownership, %r", ownership) partition_processor = self._partition_processor_factory() - if not hasattr(partition_processor, "process_events"): - log.error( - "Fatal error: a partition processor should at least have method process_events(events, checkpoint_manager). EventProcessor will stop.") - await self.stop() - raise TypeError("Partition processor must has method process_events(events, checkpoint_manager") - - partition_consumer = self._eventhub_client.create_consumer( - ownership["consumer_group_name"], - ownership["partition_id"], - EventPosition(ownership.get("offset", self._initial_event_position)) - ) + partition_id = ownership["partition_id"] + eventhub_name = ownership["eventhub_name"] + consumer_group_name = ownership["consumer_group_name"] + owner_id = ownership["owner_id"] checkpoint_manager = CheckpointManager( - ownership["partition_id"], - ownership["eventhub_name"], - ownership["consumer_group_name"], - ownership["owner_id"], + eventhub_name, + consumer_group_name, + partition_id, + owner_id, self._partition_manager ) - - async def initialize(): - if hasattr(partition_processor, "initialize"): - await partition_processor.initialize() - - async def process_error(err): - if hasattr(partition_processor, "process_error"): - await partition_processor.process_error(err, checkpoint_manager) - - async def close(close_reason): - if hasattr(partition_processor, "close"): - await partition_processor.close(close_reason, checkpoint_manager) + partition_processor.eventhub_name = ownership + partition_processor.checkpoint_manager = checkpoint_manager + partition_consumer = self._eventhub_client.create_consumer( + consumer_group_name, + partition_id, + EventPosition(ownership.get("offset", self._initial_event_position)) + ) try: while True: try: - await initialize() + await partition_processor.initialize() events = await partition_consumer.receive(timeout=self._receive_timeout) await partition_processor.process_events(events, checkpoint_manager) except asyncio.CancelledError as cancelled_error: log.info( - "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " - "is cancelled", + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" + " is cancelled", ownership["owner_id"], ownership["eventhub_name"], ownership["partition_id"], ownership["consumer_group_name"] ) - await process_error(cancelled_error) - await close(CloseReason.SHUTDOWN) + await partition_processor.process_error(cancelled_error, checkpoint_manager) + await partition_processor.close(CloseReason.SHUTDOWN, checkpoint_manager) # TODO: release the ownership immediately via partition manager break except EventHubError as eh_err: - reason = CloseReason.OWNERSHIP_LOST if eh_err.error == "link:stolen" else CloseReason.EVENTHUB_EXCEPTION + reason = CloseReason.OWNERSHIP_LOST if eh_err.error == "link:stolen" \ + else CloseReason.EVENTHUB_EXCEPTION log.warning( - "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " - "has met an exception receiving events. It's being closed. The exception is %r.", + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" + " has met an exception receiving events. It's being closed. The exception is %r.", ownership["owner_id"], ownership["eventhub_name"], ownership["partition_id"], ownership["consumer_group_name"], eh_err ) - await process_error(eh_err) - await close(reason) # An EventProcessor will pick up this partition again after the ownership is released + await partition_processor.process_error(eh_err, checkpoint_manager) + await partition_processor.close(reason, checkpoint_manager) + # An EventProcessor will pick up this partition again after the ownership is released # TODO: release the ownership immediately via partition manager break - except Exception as exp: + except Exception as exp: # pylint:disable=broad-except log.warning(exp) finally: await partition_consumer.close() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py index e4ecb1bec824..e6d3e5df2538 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py @@ -34,7 +34,7 @@ async def list_ownership(self, eventhub_name: str, consumer_group_name: str) -> last_modified_time etag """ - pass + @abstractmethod async def claim_ownership(self, partitions: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]: @@ -54,13 +54,14 @@ async def claim_ownership(self, partitions: Iterable[Dict[str, Any]]) -> Iterabl last_modified_time etag """ - pass + @abstractmethod async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_id, owner_id, offset, sequence_number) -> None: """ - Updates the checkpoint using the given information for the associated partition and consumer group in the chosen storage service. + Updates the checkpoint using the given information for the associated partition and + consumer group in the chosen storage service. :param eventhub_name: The name of the specific Event Hub the ownership are associated with, relative to the Event Hubs namespace that contains it. @@ -73,11 +74,11 @@ async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_ :type owner_id: str :param offset: The offset of the ~azure.eventhub.EventData the new checkpoint will be associated with. :type offset: str - :param sequence_number: The sequence_number of the ~azure.eventhub.EventData the new checkpoint will be associated with. + :param sequence_number: The sequence_number of the ~azure.eventhub.EventData the new checkpoint + will be associated with. :type sequence_number: int :return: """ - pass async def close(self): pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py index 60a95b99e628..37be7006e244 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py @@ -4,11 +4,10 @@ # ----------------------------------------------------------------------------------- from typing import List -from typing_extensions import Protocol +from abc import ABC from enum import Enum -from .checkpoint_manager import CheckpointManager - from azure.eventhub import EventData +from .checkpoint_manager import CheckpointManager class CloseReason(Enum): @@ -17,12 +16,14 @@ class CloseReason(Enum): EVENTHUB_EXCEPTION = 2 # Exception happens during receiving events -class PartitionProcessor(Protocol): +class PartitionProcessor(ABC): """ PartitionProcessor processes events received from the Azure Event Hubs service. A single instance of a class - implementing this abstract class will be created for every partition the associated ~azure.eventhub.eventprocessor.EventProcessor owns. + implementing this abstract class will be created for every partition the associated + ~azure.eventhub.eventprocessor.EventProcessor owns. """ + async def initialize(self): pass @@ -34,15 +35,18 @@ async def close(self, reason, checkpoint_manager: CheckpointManager): :param reason: Reason for closing the PartitionProcessor. :type reason: ~azure.eventhub.eventprocessor.CloseReason + :param checkpoint_manager: Use its method update_checkpoint to update checkpoint to the data store + :type checkpoint_manager: ~azure.eventhub.CheckpointManager """ - pass async def process_events(self, events: List[EventData], checkpoint_manager: CheckpointManager): """Called when a batch of events have been received. :param events: Received events. :type events: list[~azure.eventhub.common.EventData] + :param checkpoint_manager: Use its method update_checkpoint to update checkpoint to the data store + :type checkpoint_manager: ~azure.eventhub.CheckpointManager """ raise NotImplementedError @@ -52,6 +56,7 @@ async def process_error(self, error, checkpoint_manager: CheckpointManager): :param error: The error that happens. :type error: Exception + :param checkpoint_manager: Use its method update_checkpoint to update checkpoint to the data store + :type checkpoint_manager: ~azure.eventhub.CheckpointManager """ - pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py index bf4c46c95eb9..50a7d8caf29e 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py @@ -53,9 +53,6 @@ def __init__(self, db_filename: str = ":memory:", ownership_table: str = "owners + ",".join(self.primary_keys)\ + "))" c.execute(sql) - except sqlite3.OperationalError: - raise - pass finally: c.close() self.conn = conn @@ -100,13 +97,16 @@ async def claim_ownership(self, partitions): if p.get("etag") == cursor_fetch[0][0]: p["last_modified_time"] = time.time() p["etag"] = str(uuid.uuid4()) - other_fields_without_checkpoint = list(filter(lambda x: x not in self.checkpoint_fields, self.other_fields)) + other_fields_without_checkpoint = list( + filter(lambda x: x not in self.checkpoint_fields, self.other_fields) + ) sql = "update " + _check_table_name(self.ownership_table) + " set "\ + ','.join([field+"=?" for field in other_fields_without_checkpoint])\ + " where "\ + " and ".join([field+"=?" for field in self.primary_keys]) - cursor.execute(sql, tuple(p.get(field) for field in other_fields_without_checkpoint) + tuple(p.get(field) for field in self.primary_keys)) + cursor.execute(sql, tuple(p.get(field) for field in other_fields_without_checkpoint) + + tuple(p.get(field) for field in self.primary_keys)) result.append(p) else: logger.info("EventProcessor %r failed to claim partition %r " @@ -121,15 +121,19 @@ async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_ offset, sequence_number): cursor = self.conn.cursor() try: - cursor.execute("select owner_id from " + _check_table_name(self.ownership_table) + " where eventhub_name=? and consumer_group_name=? and partition_id=?", + cursor.execute("select owner_id from " + _check_table_name(self.ownership_table) + + " where eventhub_name=? and consumer_group_name=? and partition_id=?", (eventhub_name, consumer_group_name, partition_id)) cursor_fetch = cursor.fetchall() if cursor_fetch and owner_id == cursor_fetch[0][0]: - cursor.execute("update " + _check_table_name(self.ownership_table) + " set offset=?, sequence_number=? where eventhub_name=? and consumer_group_name=? and partition_id=?", + cursor.execute("update " + _check_table_name(self.ownership_table) + + " set offset=?, sequence_number=? " + "where eventhub_name=? and consumer_group_name=? and partition_id=?", (offset, sequence_number, eventhub_name, consumer_group_name, partition_id)) self.conn.commit() else: - logger.info("EventProcessor couldn't checkpoint to partition %r because it no longer has the ownership", partition_id) + logger.info("EventProcessor couldn't checkpoint to partition %r because it no longer has the ownership", + partition_id) finally: cursor.close() diff --git a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py index a2165eb81110..c17bb95bd4cf 100644 --- a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py +++ b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py @@ -2,7 +2,7 @@ import logging import os from azure.eventhub.aio import EventHubClient -from azure.eventhub.eventprocessor import EventProcessor +from azure.eventhub.eventprocessor import EventProcessor, PartitionProcessor from azure.eventhub.eventprocessor import Sqlite3PartitionManager RECEIVE_TIMEOUT = 5 # timeout in seconds for a receiving operation. 0 or None means no timeout @@ -17,7 +17,7 @@ async def do_operation(event): print(event) -class MyPartitionProcessor(object): +class MyPartitionProcessor(PartitionProcessor): async def process_events(self, events, checkpoint_manager): if events: await asyncio.gather(*[do_operation(event) for event in events]) From e5f3b5072f790a69379b05988d3f4738ad26a30e Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 30 Aug 2019 10:26:57 -0700 Subject: [PATCH 16/39] reduce dictionary access --- .../eventhub/eventprocessor/event_processor.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index b300b7bd0a1c..aaeb2de92dd0 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -192,10 +192,10 @@ async def _receive(self, ownership): log.info( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" " is cancelled", - ownership["owner_id"], - ownership["eventhub_name"], - ownership["partition_id"], - ownership["consumer_group_name"] + owner_id, + eventhub_name, + partition_id, + consumer_group_name ) await partition_processor.process_error(cancelled_error, checkpoint_manager) await partition_processor.close(CloseReason.SHUTDOWN, checkpoint_manager) @@ -207,10 +207,10 @@ async def _receive(self, ownership): log.warning( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" " has met an exception receiving events. It's being closed. The exception is %r.", - ownership["owner_id"], - ownership["eventhub_name"], - ownership["partition_id"], - ownership["consumer_group_name"], + owner_id, + eventhub_name, + partition_id, + consumer_group_name, eh_err ) await partition_processor.process_error(eh_err, checkpoint_manager) From 8343876f993d67f8c8583c1f8648904561e7b80c Mon Sep 17 00:00:00 2001 From: yijxie Date: Mon, 2 Sep 2019 12:23:53 -0700 Subject: [PATCH 17/39] Revert "Packaging update of azure-mgmt-storage" This reverts commit cf22c7c2e6f83d4e31a741c190ab63e19d417cc2. --- sdk/storage/azure-mgmt-storage/README.rst | 2 +- sdk/storage/azure-mgmt-storage/setup.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk/storage/azure-mgmt-storage/README.rst b/sdk/storage/azure-mgmt-storage/README.rst index 5b9e52dc5b2d..e8a573239aa7 100644 --- a/sdk/storage/azure-mgmt-storage/README.rst +++ b/sdk/storage/azure-mgmt-storage/README.rst @@ -6,7 +6,7 @@ This is the Microsoft Azure Storage Management Client Library. Azure Resource Manager (ARM) is the next generation of management APIs that replace the old Azure Service Management (ASM). -This package has been tested with Python 2.7, 3.5, 3.6 and 3.7. +This package has been tested with Python 2.7, 3.4, 3.5, 3.6 and 3.7. For the older Azure Service Management (ASM) libraries, see `azure-servicemanagement-legacy `__ library. diff --git a/sdk/storage/azure-mgmt-storage/setup.py b/sdk/storage/azure-mgmt-storage/setup.py index 8609a1e171a7..8bef269c261e 100644 --- a/sdk/storage/azure-mgmt-storage/setup.py +++ b/sdk/storage/azure-mgmt-storage/setup.py @@ -64,6 +64,7 @@ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', From 66c5b31e2157c522bbc5396f1640f25d74074f59 Mon Sep 17 00:00:00 2001 From: yijxie Date: Mon, 2 Sep 2019 12:25:58 -0700 Subject: [PATCH 18/39] Revert "Packaging update of azure-loganalytics" This reverts commit 40c7f03b3940069479340effe10afde00c41cbb4. --- .../azure-loganalytics/MANIFEST.in | 3 +-- .../azure-loganalytics/README.rst | 21 ++++++++++++++++++- sdk/loganalytics/azure-loganalytics/setup.py | 3 +-- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/sdk/loganalytics/azure-loganalytics/MANIFEST.in b/sdk/loganalytics/azure-loganalytics/MANIFEST.in index 74c0ac7e42a1..88830671bc4d 100644 --- a/sdk/loganalytics/azure-loganalytics/MANIFEST.in +++ b/sdk/loganalytics/azure-loganalytics/MANIFEST.in @@ -1,4 +1,3 @@ -recursive-include tests *.py *.yaml include *.rst -include azure/__init__.py +include azure/__init__.py \ No newline at end of file diff --git a/sdk/loganalytics/azure-loganalytics/README.rst b/sdk/loganalytics/azure-loganalytics/README.rst index 78bf6021ad9e..f2da77db1eca 100644 --- a/sdk/loganalytics/azure-loganalytics/README.rst +++ b/sdk/loganalytics/azure-loganalytics/README.rst @@ -3,11 +3,30 @@ Microsoft Azure SDK for Python This is the Microsoft Azure Log Analytics Client Library. -This package has been tested with Python 2.7, 3.5, 3.6 and 3.7. +This package has been tested with Python 2.7, 3.4, 3.5, 3.6 and 3.7. For a more complete set of Azure libraries, see the `azure `__ bundle package. +Compatibility +============= + +**IMPORTANT**: If you have an earlier version of the azure package +(version < 1.0), you should uninstall it before installing this package. + +You can check the version using pip: + +.. code:: shell + + pip freeze + +If you see azure==0.11.0 (or any version below 1.0), uninstall it first: + +.. code:: shell + + pip uninstall azure + + Usage ===== diff --git a/sdk/loganalytics/azure-loganalytics/setup.py b/sdk/loganalytics/azure-loganalytics/setup.py index 52e41c6df814..87ff80807710 100644 --- a/sdk/loganalytics/azure-loganalytics/setup.py +++ b/sdk/loganalytics/azure-loganalytics/setup.py @@ -53,7 +53,6 @@ version=version, description='Microsoft Azure {} Client Library for Python'.format(PACKAGE_PPRINT_NAME), long_description=readme + '\n\n' + history, - long_description_content_type='text/x-rst', license='MIT License', author='Microsoft Corporation', author_email='azpysdkhelp@microsoft.com', @@ -64,6 +63,7 @@ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', @@ -77,7 +77,6 @@ ]), install_requires=[ 'msrest>=0.5.0', - 'msrestazure>=0.4.32,<2.0.0', 'azure-common~=1.1', ], extras_require={ From bcd851a31259c9f9321ebf187663c1a504bd6a03 Mon Sep 17 00:00:00 2001 From: yijxie Date: Mon, 2 Sep 2019 12:26:29 -0700 Subject: [PATCH 19/39] Revert "Packaging update of azure-mgmt-datalake-analytics" This reverts commit c126bea5d4053c7c720dea12e8826e0f36f0a5e5. --- .../azure-mgmt-datalake-analytics/MANIFEST.in | 5 ----- .../azure-mgmt-datalake-analytics/README.rst | 21 ++++++++++++++++++- .../azure-mgmt-datalake-analytics/setup.py | 2 +- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/sdk/datalake/azure-mgmt-datalake-analytics/MANIFEST.in b/sdk/datalake/azure-mgmt-datalake-analytics/MANIFEST.in index c0cf549166ee..bb37a2723dae 100644 --- a/sdk/datalake/azure-mgmt-datalake-analytics/MANIFEST.in +++ b/sdk/datalake/azure-mgmt-datalake-analytics/MANIFEST.in @@ -1,6 +1 @@ -recursive-include tests *.py *.yaml include *.rst -include azure/__init__.py -include azure/mgmt/__init__.py -include azure/mgmt/datalake/__init__.py - diff --git a/sdk/datalake/azure-mgmt-datalake-analytics/README.rst b/sdk/datalake/azure-mgmt-datalake-analytics/README.rst index fb18ab8d347d..756ded7f07a9 100644 --- a/sdk/datalake/azure-mgmt-datalake-analytics/README.rst +++ b/sdk/datalake/azure-mgmt-datalake-analytics/README.rst @@ -6,7 +6,7 @@ This is the Microsoft Azure Data Lake Analytics Management Client Library. Azure Resource Manager (ARM) is the next generation of management APIs that replace the old Azure Service Management (ASM). -This package has been tested with Python 2.7, 3.5, 3.6 and 3.7. +This package has been tested with Python 2.7, 3.4, 3.5, 3.6 and 3.7. For the older Azure Service Management (ASM) libraries, see `azure-servicemanagement-legacy `__ library. @@ -14,6 +14,25 @@ For the older Azure Service Management (ASM) libraries, see For a more complete set of Azure libraries, see the `azure `__ bundle package. +Compatibility +============= + +**IMPORTANT**: If you have an earlier version of the azure package +(version < 1.0), you should uninstall it before installing this package. + +You can check the version using pip: + +.. code:: shell + + pip freeze + +If you see azure==0.11.0 (or any version below 1.0), uninstall it first: + +.. code:: shell + + pip uninstall azure + + Usage ===== diff --git a/sdk/datalake/azure-mgmt-datalake-analytics/setup.py b/sdk/datalake/azure-mgmt-datalake-analytics/setup.py index 4ba2e4e83639..68f340388091 100644 --- a/sdk/datalake/azure-mgmt-datalake-analytics/setup.py +++ b/sdk/datalake/azure-mgmt-datalake-analytics/setup.py @@ -53,7 +53,6 @@ version=version, description='Microsoft Azure {} Client Library for Python'.format(PACKAGE_PPRINT_NAME), long_description=readme + '\n\n' + history, - long_description_content_type='text/x-rst', license='MIT License', author='Microsoft Corporation', author_email='azpysdkhelp@microsoft.com', @@ -64,6 +63,7 @@ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', From d740bb02daae73250d5cf141f374b24f6b0b2f31 Mon Sep 17 00:00:00 2001 From: yijxie Date: Mon, 2 Sep 2019 15:13:16 -0700 Subject: [PATCH 20/39] Trivial code change --- .../azure/eventhub/eventprocessor/_ownership_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py index e0c3d213c63a..f1c0daf63eb9 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py @@ -83,7 +83,7 @@ async def _balance_ownership(self, all_partition_ids): :return: List[Dict[str, Any]], A list of ownership. """ ownership_list = await self.partition_manager.list_ownership( - self.eventhub_client.eh_name, self.consumer_group_name + self.eventhub_name, self.consumer_group_name ) now = time.time() ownership_dict = dict((x["partition_id"], x) for x in ownership_list) # put the list to dict for fast lookup @@ -116,7 +116,7 @@ async def _balance_ownership(self, all_partition_ids): random_partition_id = random.choice(claimable_partition_ids) random_chosen_to_claim = ownership_dict.get(random_partition_id, {"partition_id": random_partition_id, - "eventhub_name": self.eventhub_client.eh_name, + "eventhub_name": self.eventhub_name, "consumer_group_name": self.consumer_group_name }) random_chosen_to_claim["owner_id"] = self.owner_id From aad6978f934a3f843adb1de3879a0535c0a3ca8a Mon Sep 17 00:00:00 2001 From: yijxie Date: Tue, 3 Sep 2019 03:20:07 -0700 Subject: [PATCH 21/39] Refine exception handling for eventprocessor --- .../eventprocessor/event_processor.py | 69 +++++++++++++------ .../eventprocessor/partition_manager.py | 9 ++- .../eventprocessor/partition_processor.py | 1 + .../sqlite3_partition_manager.py | 3 +- 4 files changed, 59 insertions(+), 23 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index aaeb2de92dd0..e782f9acea52 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -11,7 +11,7 @@ from azure.eventhub import EventPosition, EventHubError from azure.eventhub.aio import EventHubClient from .checkpoint_manager import CheckpointManager -from .partition_manager import PartitionManager +from .partition_manager import PartitionManager, OwnershipLostError from ._ownership_manager import OwnershipManager from .partition_processor import CloseReason, PartitionProcessor @@ -182,13 +182,43 @@ async def _receive(self, ownership): EventPosition(ownership.get("offset", self._initial_event_position)) ) + async def process_error(err): + log.error( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" + " has met an error. The exception is %r.", + owner_id, eventhub_name, partition_id, consumer_group_name, err + ) + try: + await partition_processor.process_error(err, checkpoint_manager) + except Exception as err_again: # pylint:disable=broad-except + log.error( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" + " has another error during running process_error(). The exception is %r.", + owner_id, eventhub_name, partition_id, consumer_group_name, err_again + ) + + async def close(reason): + log.info( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" + " is being closed. Reason is: %r", + owner_id, eventhub_name, partition_id, consumer_group_name, reason + ) + try: + await partition_processor.close(reason, checkpoint_manager) + except Exception as err: # pylint:disable=broad-except + log.error( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" + " has an error during running close(). The exception is %r.", + owner_id, eventhub_name, partition_id, consumer_group_name, err + ) + try: while True: try: await partition_processor.initialize() events = await partition_consumer.receive(timeout=self._receive_timeout) await partition_processor.process_events(events, checkpoint_manager) - except asyncio.CancelledError as cancelled_error: + except asyncio.CancelledError: log.info( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" " is cancelled", @@ -197,28 +227,25 @@ async def _receive(self, ownership): partition_id, consumer_group_name ) - await partition_processor.process_error(cancelled_error, checkpoint_manager) - await partition_processor.close(CloseReason.SHUTDOWN, checkpoint_manager) - # TODO: release the ownership immediately via partition manager + if self._running is False: + await close(CloseReason.SHUTDOWN) + else: + await close(CloseReason.OWNERSHIP_LOST) + # TODO: release the ownership immediately via partition manager in preview 4 break except EventHubError as eh_err: - reason = CloseReason.OWNERSHIP_LOST if eh_err.error == "link:stolen" \ - else CloseReason.EVENTHUB_EXCEPTION - log.warning( - "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" - " has met an exception receiving events. It's being closed. The exception is %r.", - owner_id, - eventhub_name, - partition_id, - consumer_group_name, - eh_err - ) - await partition_processor.process_error(eh_err, checkpoint_manager) - await partition_processor.close(reason, checkpoint_manager) + await process_error(eh_err) + await close(CloseReason.EVENTHUB_EXCEPTION) # An EventProcessor will pick up this partition again after the ownership is released - # TODO: release the ownership immediately via partition manager + # TODO: release the ownership immediately via partition manager in preview 4 + break + except OwnershipLostError: + await close(CloseReason.OWNERSHIP_LOST) + break + except Exception as other_error: # pylint:disable=broad-except + await process_error(other_error) + await close(CloseReason.PROCESS_EVENTS_ERROR) + # TODO: release the ownership immediately via partition manager in preview 4 break - except Exception as exp: # pylint:disable=broad-except - log.warning(exp) finally: await partition_consumer.close() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py index e6d3e5df2538..f82ceed383b1 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py @@ -77,8 +77,15 @@ async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_ :param sequence_number: The sequence_number of the ~azure.eventhub.EventData the new checkpoint will be associated with. :type sequence_number: int - :return: + :return: None + :raise: `OwnershipLostError`, `CheckpointError` """ async def close(self): pass + + +class OwnershipLostError(Exception): + """Raises when update_checkpoint detects the ownership has been lost + + """ diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py index 37be7006e244..f33983a6fc03 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py @@ -14,6 +14,7 @@ class CloseReason(Enum): SHUTDOWN = 0 # user call EventProcessor.stop() OWNERSHIP_LOST = 1 # lose the ownership of a partition. EVENTHUB_EXCEPTION = 2 # Exception happens during receiving events + PROCESS_EVENTS_ERROR = 3 # Exception happens during process_events class PartitionProcessor(ABC): diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py index 50a7d8caf29e..76894184db51 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py @@ -7,7 +7,7 @@ import uuid import sqlite3 import logging -from .partition_manager import PartitionManager +from .partition_manager import PartitionManager, OwnershipLostError logger = logging.getLogger(__name__) @@ -134,6 +134,7 @@ async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_ else: logger.info("EventProcessor couldn't checkpoint to partition %r because it no longer has the ownership", partition_id) + raise OwnershipLostError() finally: cursor.close() From a55dc13765e36f3bff4aca82d51fb5d5b6ead14c Mon Sep 17 00:00:00 2001 From: yijxie Date: Tue, 3 Sep 2019 03:20:40 -0700 Subject: [PATCH 22/39] Enable pylint for eventprocessor --- pylintrc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pylintrc b/pylintrc index c5cddb6d9030..399344e8ea99 100644 --- a/pylintrc +++ b/pylintrc @@ -2,8 +2,8 @@ ignore-patterns=test_*,conftest,setup reports=no -# PYLINT DIRECTORY BLACKLIST. Ignore eventprocessor temporarily until new eventprocessor code is merged to master -ignore=_generated,samples,examples,test,tests,doc,.tox,eventprocessor +# PYLINT DIRECTORY BLACKLIST. +ignore=_generated,samples,examples,test,tests,doc,.tox init-hook='import sys; sys.path.insert(0, os.path.abspath(os.getcwd().rsplit("azure-sdk-for-python", 1)[0] + "azure-sdk-for-python/scripts/pylint_custom_plugin"))' load-plugins=pylint_guidelines_checker From a339985bbee3c3793ff399334b5307b6cc1922f0 Mon Sep 17 00:00:00 2001 From: yijxie Date: Tue, 3 Sep 2019 10:08:52 -0700 Subject: [PATCH 23/39] Expose OwnershipLostError --- .../azure-eventhubs/azure/eventhub/eventprocessor/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py index 6280a03fce12..90fa1f2c2e4f 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py @@ -6,7 +6,7 @@ from .event_processor import EventProcessor from .checkpoint_manager import CheckpointManager from .partition_processor import PartitionProcessor, CloseReason -from .partition_manager import PartitionManager +from .partition_manager import PartitionManager, OwnershipLostError from .sqlite3_partition_manager import Sqlite3PartitionManager __all__ = [ @@ -16,4 +16,5 @@ 'PartitionManager', 'CheckpointManager', 'Sqlite3PartitionManager', + 'OwnershipLostError', ] From 9102713c18db2f5c5799a4d0ef9cfa2d0483b005 Mon Sep 17 00:00:00 2001 From: yijxie Date: Wed, 4 Sep 2019 12:12:08 -0700 Subject: [PATCH 24/39] Move eventprocessor to aio rename Sqlite3PartitionManager to SamplePartitionManager --- .../eventhub/{ => aio}/eventprocessor/__init__.py | 3 --- .../{ => aio}/eventprocessor/_ownership_manager.py | 0 .../{ => aio}/eventprocessor/checkpoint_manager.py | 0 .../{ => aio}/eventprocessor/event_processor.py | 0 .../{ => aio}/eventprocessor/partition_manager.py | 0 .../{ => aio}/eventprocessor/partition_processor.py | 4 ++-- .../aio/eventprocessor/partitionmanagers/__init__.py | 10 ++++++++++ .../partitionmanagers/sample_partition_manager.py} | 7 ++++--- .../azure/eventhub/{ => aio}/eventprocessor/utils.py | 0 .../examples/eventprocessor/event_processor_example.py | 6 +++--- 10 files changed, 19 insertions(+), 11 deletions(-) rename sdk/eventhub/azure-eventhubs/azure/eventhub/{ => aio}/eventprocessor/__init__.py (85%) rename sdk/eventhub/azure-eventhubs/azure/eventhub/{ => aio}/eventprocessor/_ownership_manager.py (100%) rename sdk/eventhub/azure-eventhubs/azure/eventhub/{ => aio}/eventprocessor/checkpoint_manager.py (100%) rename sdk/eventhub/azure-eventhubs/azure/eventhub/{ => aio}/eventprocessor/event_processor.py (100%) rename sdk/eventhub/azure-eventhubs/azure/eventhub/{ => aio}/eventprocessor/partition_manager.py (100%) rename sdk/eventhub/azure-eventhubs/azure/eventhub/{ => aio}/eventprocessor/partition_processor.py (97%) create mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partitionmanagers/__init__.py rename sdk/eventhub/azure-eventhubs/azure/eventhub/{eventprocessor/sqlite3_partition_manager.py => aio/eventprocessor/partitionmanagers/sample_partition_manager.py} (96%) rename sdk/eventhub/azure-eventhubs/azure/eventhub/{ => aio}/eventprocessor/utils.py (100%) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/__init__.py similarity index 85% rename from sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/__init__.py index 90fa1f2c2e4f..86810f6053b4 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/__init__.py @@ -7,14 +7,11 @@ from .checkpoint_manager import CheckpointManager from .partition_processor import PartitionProcessor, CloseReason from .partition_manager import PartitionManager, OwnershipLostError -from .sqlite3_partition_manager import Sqlite3PartitionManager __all__ = [ 'CloseReason', 'EventProcessor', 'PartitionProcessor', 'PartitionManager', - 'CheckpointManager', - 'Sqlite3PartitionManager', 'OwnershipLostError', ] diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/_ownership_manager.py similarity index 100% rename from sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_ownership_manager.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/_ownership_manager.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/checkpoint_manager.py similarity index 100% rename from sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/checkpoint_manager.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py similarity index 100% rename from sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_manager.py similarity index 100% rename from sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_manager.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_processor.py similarity index 97% rename from sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_processor.py index f33983a6fc03..2f46145bfb76 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_processor.py @@ -4,7 +4,7 @@ # ----------------------------------------------------------------------------------- from typing import List -from abc import ABC +from abc import ABC, abstractmethod from enum import Enum from azure.eventhub import EventData from .checkpoint_manager import CheckpointManager @@ -41,6 +41,7 @@ async def close(self, reason, checkpoint_manager: CheckpointManager): """ + @abstractmethod async def process_events(self, events: List[EventData], checkpoint_manager: CheckpointManager): """Called when a batch of events have been received. @@ -50,7 +51,6 @@ async def process_events(self, events: List[EventData], checkpoint_manager: Chec :type checkpoint_manager: ~azure.eventhub.CheckpointManager """ - raise NotImplementedError async def process_error(self, error, checkpoint_manager: CheckpointManager): """Called when an error happens diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partitionmanagers/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partitionmanagers/__init__.py new file mode 100644 index 000000000000..d81fee2a97cd --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partitionmanagers/__init__.py @@ -0,0 +1,10 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +from .sample_partition_manager import SamplePartitionManager + +__all__ = [ + 'SamplePartitionManager', +] diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partitionmanagers/sample_partition_manager.py similarity index 96% rename from sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partitionmanagers/sample_partition_manager.py index 76894184db51..4859d675d5b9 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partitionmanagers/sample_partition_manager.py @@ -7,7 +7,7 @@ import uuid import sqlite3 import logging -from .partition_manager import PartitionManager, OwnershipLostError +from azure.eventhub.aio.eventprocessor import PartitionManager, OwnershipLostError logger = logging.getLogger(__name__) @@ -19,9 +19,10 @@ def _check_table_name(table_name: str): return table_name -class Sqlite3PartitionManager(PartitionManager): +class SamplePartitionManager(PartitionManager): """An implementation of PartitionManager by using the sqlite3 in Python standard library. Sqlite3 is a mini sql database that runs in memory or files. + Please don't use this PartitionManager for production use. """ @@ -41,7 +42,7 @@ def __init__(self, db_filename: str = ":memory:", ownership_table: str = "owners Sqlite3 will run in memory without a file when db_filename is ":memory:". :param ownership_table: The table name of the sqlite3 database. """ - super(Sqlite3PartitionManager, self).__init__() + super(SamplePartitionManager, self).__init__() self.ownership_table = _check_table_name(ownership_table) conn = sqlite3.connect(db_filename) c = conn.cursor() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/utils.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/utils.py similarity index 100% rename from sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/utils.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/utils.py diff --git a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py index c17bb95bd4cf..de1b88fc4203 100644 --- a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py +++ b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py @@ -2,8 +2,8 @@ import logging import os from azure.eventhub.aio import EventHubClient -from azure.eventhub.eventprocessor import EventProcessor, PartitionProcessor -from azure.eventhub.eventprocessor import Sqlite3PartitionManager +from azure.eventhub.aio.eventprocessor import EventProcessor, PartitionProcessor +from azure.eventhub.aio.eventprocessor.partitionmanagers import SamplePartitionManager RECEIVE_TIMEOUT = 5 # timeout in seconds for a receiving operation. 0 or None means no timeout RETRY_TOTAL = 3 # max number of retries for receive operations within the receive timeout. Actual number of retries clould be less if RECEIVE_TIMEOUT is too small @@ -29,7 +29,7 @@ async def process_events(self, events, checkpoint_manager): if __name__ == '__main__': loop = asyncio.get_event_loop() client = EventHubClient.from_connection_string(CONNECTION_STR, receive_timeout=RECEIVE_TIMEOUT, retry_total=RETRY_TOTAL) - partition_manager = Sqlite3PartitionManager(db_filename="eventprocessor_test_db") + partition_manager = SamplePartitionManager(db_filename="eventprocessor_test_db") event_processor = EventProcessor(client, "$default", MyPartitionProcessor, partition_manager, polling_interval=1) try: loop.run_until_complete(event_processor.start()) From 278592cfdb6d549515ed2b9e869713af4152d52b Mon Sep 17 00:00:00 2001 From: yijxie Date: Wed, 4 Sep 2019 13:48:06 -0700 Subject: [PATCH 25/39] change checkpoint_manager to partition context --- .../eventhub/aio/eventprocessor/__init__.py | 3 +- .../aio/eventprocessor/event_processor.py | 16 +++++----- ...kpoint_manager.py => partition_context.py} | 12 ++++---- .../aio/eventprocessor/partition_processor.py | 30 ++++++++++++------- .../eventprocessor/event_processor_example.py | 6 ++-- 5 files changed, 38 insertions(+), 29 deletions(-) rename sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/{checkpoint_manager.py => partition_context.py} (78%) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/__init__.py index 86810f6053b4..c7cd2def61a7 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/__init__.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/__init__.py @@ -4,9 +4,9 @@ # ----------------------------------------------------------------------------------- from .event_processor import EventProcessor -from .checkpoint_manager import CheckpointManager from .partition_processor import PartitionProcessor, CloseReason from .partition_manager import PartitionManager, OwnershipLostError +from .partition_context import PartitionContext __all__ = [ 'CloseReason', @@ -14,4 +14,5 @@ 'PartitionProcessor', 'PartitionManager', 'OwnershipLostError', + 'PartitionContext', ] diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py index e782f9acea52..b028d960e36b 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py @@ -10,7 +10,7 @@ from azure.eventhub import EventPosition, EventHubError from azure.eventhub.aio import EventHubClient -from .checkpoint_manager import CheckpointManager +from .partition_context import PartitionContext from .partition_manager import PartitionManager, OwnershipLostError from ._ownership_manager import OwnershipManager from .partition_processor import CloseReason, PartitionProcessor @@ -36,10 +36,10 @@ class EventProcessor(object): # pylint:disable=too-many-instance-attributes from azure.eventhub.eventprocessor import EventProcessor, PartitionProcessor, Sqlite3PartitionManager class MyPartitionProcessor(object): - async def process_events(self, events, checkpoint_manager): + async def process_events(self, events, partition_context): if events: # do something sync or async to process the events - await checkpoint_manager.update_checkpoint(events[-1].offset, events[-1].sequence_number) + await partition_context.update_checkpoint(events[-1].offset, events[-1].sequence_number) client = EventHubClient.from_connection_string("", receive_timeout=5, retry_total=3) @@ -167,7 +167,7 @@ async def _receive(self, ownership): eventhub_name = ownership["eventhub_name"] consumer_group_name = ownership["consumer_group_name"] owner_id = ownership["owner_id"] - checkpoint_manager = CheckpointManager( + partition_context = PartitionContext( eventhub_name, consumer_group_name, partition_id, @@ -175,7 +175,7 @@ async def _receive(self, ownership): self._partition_manager ) partition_processor.eventhub_name = ownership - partition_processor.checkpoint_manager = checkpoint_manager + partition_processor._partition_context = partition_context partition_consumer = self._eventhub_client.create_consumer( consumer_group_name, partition_id, @@ -189,7 +189,7 @@ async def process_error(err): owner_id, eventhub_name, partition_id, consumer_group_name, err ) try: - await partition_processor.process_error(err, checkpoint_manager) + await partition_processor.process_error(err, partition_context) except Exception as err_again: # pylint:disable=broad-except log.error( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" @@ -204,7 +204,7 @@ async def close(reason): owner_id, eventhub_name, partition_id, consumer_group_name, reason ) try: - await partition_processor.close(reason, checkpoint_manager) + await partition_processor.close(reason, partition_context) except Exception as err: # pylint:disable=broad-except log.error( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" @@ -217,7 +217,7 @@ async def close(reason): try: await partition_processor.initialize() events = await partition_consumer.receive(timeout=self._receive_timeout) - await partition_processor.process_events(events, checkpoint_manager) + await partition_processor.process_events(events, partition_context) except asyncio.CancelledError: log.info( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_context.py similarity index 78% rename from sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/checkpoint_manager.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_context.py index 3be0c5d945c3..6aaf939143a2 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/checkpoint_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_context.py @@ -7,11 +7,10 @@ from .partition_manager import PartitionManager -class CheckpointManager(object): - """ - CheckpointManager is responsible for the creation of checkpoints. - The interaction with the chosen storage service is done via ~azure.eventhub.eventprocessor.PartitionManager. +class PartitionContext(object): + """Contains partition related context information for a PartitionProcessor instance to use. + Users can use update_checkpoint() of this class to save checkpoint data. """ def __init__(self, eventhub_name: str, consumer_group_name: str, partition_id: str, owner_id: str, partition_manager: PartitionManager): @@ -19,7 +18,7 @@ def __init__(self, eventhub_name: str, consumer_group_name: str, self.eventhub_name = eventhub_name self.consumer_group_name = consumer_group_name self.owner_id = owner_id - self.partition_manager = partition_manager + self._partition_manager = partition_manager async def update_checkpoint(self, offset, sequence_number=None): """ @@ -33,7 +32,8 @@ async def update_checkpoint(self, offset, sequence_number=None): :type sequence_number: int :return: None """ - await self.partition_manager.update_checkpoint( + # TODO: whether change this method to accept event_data as well + await self._partition_manager.update_checkpoint( self.eventhub_name, self.consumer_group_name, self.partition_id, self.owner_id, offset, sequence_number ) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_processor.py index 2f46145bfb76..ecbbe75d543d 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_processor.py @@ -7,7 +7,7 @@ from abc import ABC, abstractmethod from enum import Enum from azure.eventhub import EventData -from .checkpoint_manager import CheckpointManager +from .partition_context import PartitionContext class CloseReason(Enum): @@ -25,10 +25,15 @@ class PartitionProcessor(ABC): """ - async def initialize(self): + async def initialize(self, partition_context: PartitionContext): + """ + + :param partition_context: The context information of this partition. + :type partition_context: ~azure.eventhub.aio.eventprocessor.PartitionContext + """ pass - async def close(self, reason, checkpoint_manager: CheckpointManager): + async def close(self, reason, partition_context: PartitionContext): """Called when EventProcessor stops processing this PartitionProcessor. There are different reasons to trigger the PartitionProcessor to close. @@ -36,28 +41,31 @@ async def close(self, reason, checkpoint_manager: CheckpointManager): :param reason: Reason for closing the PartitionProcessor. :type reason: ~azure.eventhub.eventprocessor.CloseReason - :param checkpoint_manager: Use its method update_checkpoint to update checkpoint to the data store - :type checkpoint_manager: ~azure.eventhub.CheckpointManager + :param partition_context: The context information of this partition. + Use its method update_checkpoint to save checkpoint to the data store. + :type partition_context: ~azure.eventhub.aio.eventprocessor.PartitionContext """ @abstractmethod - async def process_events(self, events: List[EventData], checkpoint_manager: CheckpointManager): + async def process_events(self, events: List[EventData], partition_context: PartitionContext): """Called when a batch of events have been received. :param events: Received events. :type events: list[~azure.eventhub.common.EventData] - :param checkpoint_manager: Use its method update_checkpoint to update checkpoint to the data store - :type checkpoint_manager: ~azure.eventhub.CheckpointManager + :param partition_context: The context information of this partition. + Use its method update_checkpoint to save checkpoint to the data store. + :type partition_context: ~azure.eventhub.aio.eventprocessor.PartitionContext """ - async def process_error(self, error, checkpoint_manager: CheckpointManager): + async def process_error(self, error, partition_context: PartitionContext): """Called when an error happens :param error: The error that happens. :type error: Exception - :param checkpoint_manager: Use its method update_checkpoint to update checkpoint to the data store - :type checkpoint_manager: ~azure.eventhub.CheckpointManager + :param partition_context: The context information of this partition. + Use its method update_checkpoint to save checkpoint to the data store. + :type partition_context: ~azure.eventhub.aio.eventprocessor.PartitionContext """ diff --git a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py index de1b88fc4203..c991a88d20de 100644 --- a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py +++ b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py @@ -18,12 +18,12 @@ async def do_operation(event): class MyPartitionProcessor(PartitionProcessor): - async def process_events(self, events, checkpoint_manager): + async def process_events(self, events, partition_context): if events: await asyncio.gather(*[do_operation(event) for event in events]) - await checkpoint_manager.update_checkpoint(events[-1].offset, events[-1].sequence_number) + await partition_context.update_checkpoint(events[-1].offset, events[-1].sequence_number) else: - print("empty events received", "partition:", checkpoint_manager.partition_id) + print("empty events received", "partition:", partition_context.partition_id) if __name__ == '__main__': From 665f28cd7251945f1428b397017ec7c239548fa7 Mon Sep 17 00:00:00 2001 From: yijxie Date: Wed, 4 Sep 2019 13:51:23 -0700 Subject: [PATCH 26/39] fix pylint error --- .../azure/eventhub/aio/eventprocessor/event_processor.py | 1 - .../azure/eventhub/aio/eventprocessor/partition_processor.py | 1 - 2 files changed, 2 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py index b028d960e36b..0ce70990f85f 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py @@ -175,7 +175,6 @@ async def _receive(self, ownership): self._partition_manager ) partition_processor.eventhub_name = ownership - partition_processor._partition_context = partition_context partition_consumer = self._eventhub_client.create_consumer( consumer_group_name, partition_id, diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_processor.py index ecbbe75d543d..8b0fb2ca7e5c 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_processor.py @@ -31,7 +31,6 @@ async def initialize(self, partition_context: PartitionContext): :param partition_context: The context information of this partition. :type partition_context: ~azure.eventhub.aio.eventprocessor.PartitionContext """ - pass async def close(self, reason, partition_context: PartitionContext): """Called when EventProcessor stops processing this PartitionProcessor. From 0060f9d7c688d7135e6f59c11966c679a0fa5911 Mon Sep 17 00:00:00 2001 From: yijxie Date: Wed, 4 Sep 2019 15:31:44 -0700 Subject: [PATCH 27/39] fix a small issue --- .../azure/eventhub/aio/eventprocessor/event_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py index 0ce70990f85f..3b0872327d06 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py @@ -214,7 +214,7 @@ async def close(reason): try: while True: try: - await partition_processor.initialize() + await partition_processor.initialize(partition_context) events = await partition_consumer.receive(timeout=self._receive_timeout) await partition_processor.process_events(events, partition_context) except asyncio.CancelledError: From 7b4273ab19ab1b00320365f921ed3a2428b828d4 Mon Sep 17 00:00:00 2001 From: yijxie Date: Wed, 4 Sep 2019 19:26:33 -0700 Subject: [PATCH 28/39] Catch list_ownership/claim_ownership exceptions and retry --- .../azure/eventhub/aio/eventprocessor/event_processor.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py index 3b0872327d06..b3815a4b6fe7 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py @@ -119,7 +119,14 @@ async def start(self): if not self._running: self._running = True while self._running: - claimed_ownership_list = await ownership_manager.claim_ownership() + try: + claimed_ownership_list = await ownership_manager.claim_ownership() + except Exception as err: + log.exception("An exception occurred during balancing and claiming ownership for eventhub %r " + "consumer group %r. Retrying after %r seconds", + self._eventhub_name, self._consumer_group_name, self._polling_interval, exc_info=err) + await asyncio.sleep(self._polling_interval) + continue if claimed_ownership_list: claimed_partition_ids = [x["partition_id"] for x in claimed_ownership_list] to_cancel_list = self._tasks.keys() - claimed_partition_ids From bdf97c85ef720a8e6dec8e6b1dc2bd552c9e535a Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 5 Sep 2019 18:53:51 -0700 Subject: [PATCH 29/39] Fix code review issues --- .../eventhub/aio/eventprocessor/__init__.py | 2 + .../aio/eventprocessor/_ownership_manager.py | 7 ++-- .../aio/eventprocessor/event_processor.py | 40 ++++++++----------- .../aio/eventprocessor/partition_manager.py | 8 ++-- .../partitionmanagers/__init__.py | 10 ----- .../sample_partition_manager.py | 0 .../eventprocessor/event_processor_example.py | 2 +- 7 files changed, 27 insertions(+), 42 deletions(-) delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partitionmanagers/__init__.py rename sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/{partitionmanagers => }/sample_partition_manager.py (100%) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/__init__.py index c7cd2def61a7..e3eefa4774f4 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/__init__.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/__init__.py @@ -7,6 +7,7 @@ from .partition_processor import PartitionProcessor, CloseReason from .partition_manager import PartitionManager, OwnershipLostError from .partition_context import PartitionContext +from .sample_partition_manager import SamplePartitionManager __all__ = [ 'CloseReason', @@ -15,4 +16,5 @@ 'PartitionManager', 'OwnershipLostError', 'PartitionContext', + 'SamplePartitionManager', ] diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/_ownership_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/_ownership_manager.py index f1c0daf63eb9..094ca8e0ce39 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/_ownership_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/_ownership_manager.py @@ -24,7 +24,7 @@ class OwnershipManager(object): """ def __init__( self, eventhub_client: EventHubClient, consumer_group_name: str, owner_id: str, - partition_manager: PartitionManager, ownership_timeout: int + partition_manager: PartitionManager, ownership_timeout: float ): self.cached_parition_ids = [] # type: List[str] self.eventhub_client = eventhub_client @@ -86,7 +86,7 @@ async def _balance_ownership(self, all_partition_ids): self.eventhub_name, self.consumer_group_name ) now = time.time() - ownership_dict = dict((x["partition_id"], x) for x in ownership_list) # put the list to dict for fast lookup + ownership_dict = {x["partition_id"]: x for x in ownership_list} # put the list to dict for fast lookup not_owned_partition_ids = [pid for pid in all_partition_ids if pid not in ownership_dict] timed_out_partition_ids = [ownership["partition_id"] for ownership in ownership_list if ownership["last_modified_time"] + self.ownership_timeout < now] @@ -100,6 +100,8 @@ async def _balance_ownership(self, all_partition_ids): # calculate expected count per owner all_partition_count = len(all_partition_ids) + # owners_count is the number of active owners. If self.owner_id is not yet among the active owners, + # then plus 1 to include self. This will make owners_count >= 1. owners_count = len(active_ownership_by_owner) + \ (0 if self.owner_id in active_ownership_by_owner else 1) expected_count_per_owner = all_partition_count // owners_count @@ -109,7 +111,6 @@ async def _balance_ownership(self, all_partition_ids): to_claim = active_ownership_self if len(active_ownership_self) > most_count_allowed_per_owner: # needs to abandon a partition to_claim.pop() # abandon one partition if owned too many - # TODO: Release an ownership immediately so other EventProcessors won't need to wait it to timeout elif len(active_ownership_self) < expected_count_per_owner: # Either claims an inactive partition, or steals from other owners if claimable_partition_ids: # claim an inactive partition if there is diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py index b3815a4b6fe7..fd5573d3eb92 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py @@ -56,7 +56,9 @@ async def process_events(self, events, partition_context): def __init__( self, eventhub_client: EventHubClient, consumer_group_name: str, partition_processor_factory: Callable[..., PartitionProcessor], - partition_manager: PartitionManager, **kwargs + partition_manager: PartitionManager, *, + initial_event_position: EventPosition = EventPosition("-1"), polling_interval: float = 10.0 + ): """ Instantiate an EventProcessor. @@ -84,14 +86,9 @@ def __init__( self._eventhub_name = eventhub_client.eh_name self._partition_processor_factory = partition_processor_factory self._partition_manager = partition_manager - self._initial_event_position = kwargs.get("initial_event_position", "-1") - # TODO: initial position provider will be a callable - # so users can create initial event position for every partition - self._max_batch_size = eventhub_client.config.max_batch_size - self._receive_timeout = eventhub_client.config.receive_timeout - self._polling_interval = kwargs.get("polling_interval", 10) + self._initial_event_position = initial_event_position # will be replaced by reset event position in preview 4 + self._polling_interval = polling_interval self._ownership_timeout = self._polling_interval * 2 - # TODO: Team haven't decided if this is a separate argument self._tasks = {} # type: Dict[str, asyncio.Task] self._id = str(uuid.uuid4()) self._running = False @@ -122,18 +119,19 @@ async def start(self): try: claimed_ownership_list = await ownership_manager.claim_ownership() except Exception as err: - log.exception("An exception occurred during balancing and claiming ownership for eventhub %r " - "consumer group %r. Retrying after %r seconds", - self._eventhub_name, self._consumer_group_name, self._polling_interval, exc_info=err) + log.warning("An exception (%r) occurred during balancing and claiming ownership for eventhub %r " + "consumer group %r. Retrying after %r seconds", + err, self._eventhub_name, self._consumer_group_name, self._polling_interval) await asyncio.sleep(self._polling_interval) continue + + to_cancel_list = self._tasks.keys() if claimed_ownership_list: claimed_partition_ids = [x["partition_id"] for x in claimed_ownership_list] to_cancel_list = self._tasks.keys() - claimed_partition_ids self._create_tasks_for_claimed_ownership(claimed_ownership_list) else: - log.warning("EventProcessor %r hasn't claimed an ownership. It keeps claiming.", self._id) - to_cancel_list = self._tasks.keys() + log.info("EventProcessor %r hasn't claimed an ownership. It keeps claiming.", self._id) if to_cancel_list: self._cancel_tasks_for_partitions(to_cancel_list) log.info("EventProcesor %r has cancelled partitions %r", self._id, to_cancel_list) @@ -153,7 +151,7 @@ async def stop(self): _, task = self._tasks.popitem() task.cancel() log.info("EventProcessor %r has been cancelled", self._id) - await asyncio.sleep(2) # give some time to finish after cancelled + await asyncio.sleep(2) # give some time to finish after cancelled. def _cancel_tasks_for_partitions(self, to_cancel_partitions): for partition_id in to_cancel_partitions: @@ -181,15 +179,14 @@ async def _receive(self, ownership): owner_id, self._partition_manager ) - partition_processor.eventhub_name = ownership partition_consumer = self._eventhub_client.create_consumer( consumer_group_name, partition_id, - EventPosition(ownership.get("offset", self._initial_event_position)) + EventPosition(ownership.get("offset", self._initial_event_position.value)) ) async def process_error(err): - log.error( + log.warning( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" " has met an error. The exception is %r.", owner_id, eventhub_name, partition_id, consumer_group_name, err @@ -197,7 +194,7 @@ async def process_error(err): try: await partition_processor.process_error(err, partition_context) except Exception as err_again: # pylint:disable=broad-except - log.error( + log.warning( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" " has another error during running process_error(). The exception is %r.", owner_id, eventhub_name, partition_id, consumer_group_name, err_again @@ -212,7 +209,7 @@ async def close(reason): try: await partition_processor.close(reason, partition_context) except Exception as err: # pylint:disable=broad-except - log.error( + log.warning( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" " has an error during running close(). The exception is %r.", owner_id, eventhub_name, partition_id, consumer_group_name, err @@ -222,7 +219,7 @@ async def close(reason): while True: try: await partition_processor.initialize(partition_context) - events = await partition_consumer.receive(timeout=self._receive_timeout) + events = await partition_consumer.receive() await partition_processor.process_events(events, partition_context) except asyncio.CancelledError: log.info( @@ -237,13 +234,11 @@ async def close(reason): await close(CloseReason.SHUTDOWN) else: await close(CloseReason.OWNERSHIP_LOST) - # TODO: release the ownership immediately via partition manager in preview 4 break except EventHubError as eh_err: await process_error(eh_err) await close(CloseReason.EVENTHUB_EXCEPTION) # An EventProcessor will pick up this partition again after the ownership is released - # TODO: release the ownership immediately via partition manager in preview 4 break except OwnershipLostError: await close(CloseReason.OWNERSHIP_LOST) @@ -251,7 +246,6 @@ async def close(reason): except Exception as other_error: # pylint:disable=broad-except await process_error(other_error) await close(CloseReason.PROCESS_EVENTS_ERROR) - # TODO: release the ownership immediately via partition manager in preview 4 break finally: await partition_consumer.close() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_manager.py index f82ceed383b1..9014b22a2f79 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_manager.py @@ -35,14 +35,13 @@ async def list_ownership(self, eventhub_name: str, consumer_group_name: str) -> etag """ - @abstractmethod - async def claim_ownership(self, partitions: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]: + async def claim_ownership(self, ownership_list: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]: """ Tries to claim a list of specified ownership. - :param partitions: Iterable of dictionaries containing all the ownership to claim. - :type partitions: Iterable of dict + :param ownership_list: Iterable of dictionaries containing all the ownership to claim. + :type ownership_list: Iterable of dict :return: Iterable of dictionaries containing the following partition ownership information: eventhub_name consumer_group_name @@ -55,7 +54,6 @@ async def claim_ownership(self, partitions: Iterable[Dict[str, Any]]) -> Iterabl etag """ - @abstractmethod async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_id, owner_id, offset, sequence_number) -> None: diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partitionmanagers/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partitionmanagers/__init__.py deleted file mode 100644 index d81fee2a97cd..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partitionmanagers/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -from .sample_partition_manager import SamplePartitionManager - -__all__ = [ - 'SamplePartitionManager', -] diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partitionmanagers/sample_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/sample_partition_manager.py similarity index 100% rename from sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partitionmanagers/sample_partition_manager.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/sample_partition_manager.py diff --git a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py index c991a88d20de..c0826e274704 100644 --- a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py +++ b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py @@ -3,7 +3,7 @@ import os from azure.eventhub.aio import EventHubClient from azure.eventhub.aio.eventprocessor import EventProcessor, PartitionProcessor -from azure.eventhub.aio.eventprocessor.partitionmanagers import SamplePartitionManager +from azure.eventhub.aio.eventprocessor import SamplePartitionManager RECEIVE_TIMEOUT = 5 # timeout in seconds for a receiving operation. 0 or None means no timeout RETRY_TOTAL = 3 # max number of retries for receive operations within the receive timeout. Actual number of retries clould be less if RECEIVE_TIMEOUT is too small From 02a4daf2d0c2ae68cce59e371dbec5a887acb4fa Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 6 Sep 2019 10:07:22 -0700 Subject: [PATCH 30/39] fix event processor long running test --- .../test_long_running_eventprocessor.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_long_running_eventprocessor.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_long_running_eventprocessor.py index 741a521d8fef..1e3cae9eefa7 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_long_running_eventprocessor.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_long_running_eventprocessor.py @@ -13,7 +13,7 @@ from logging.handlers import RotatingFileHandler from azure.eventhub.aio import EventHubClient -from azure.eventhub.eventprocessor import EventProcessor, PartitionProcessor, Sqlite3PartitionManager +from azure.eventhub.aio.eventprocessor import EventProcessor, PartitionProcessor, SamplePartitionManager from azure.eventhub import EventData @@ -44,23 +44,23 @@ def get_logger(filename, level=logging.INFO): class MyEventProcessor(PartitionProcessor): - async def close(self, reason): + async def close(self, reason, partition_context): logger.info("PartitionProcessor closed (reason {}, id {})".format( reason, - self._checkpoint_manager.partition_id + partition_context.partition_id )) - async def process_events(self, events): + async def process_events(self, events, partition_context): if events: event = events[-1] print("Processing id {}, offset {}, sq_number {})".format( - self._checkpoint_manager.partition_id, + partition_context.partition_id, event.offset, event.sequence_number)) - await self._checkpoint_manager.update_checkpoint(event.offset, event.sequence_number) + await partition_context.update_checkpoint(event.offset, event.sequence_number) - async def process_error(self, error): - logger.info("Event Processor Error for partition {}, {!r}".format(self._checkpoint_manager.partition_id, error)) + async def process_error(self, error, partition_context): + logger.info("Event Processor Error for partition {}, {!r}".format(partition_context.partition_id, error)) async def wait_and_close(host, duration): @@ -133,7 +133,7 @@ async def test_long_running_eph(live_eventhub): client, live_eventhub['consumer_group'], MyEventProcessor, - Sqlite3PartitionManager() + SamplePartitionManager() ) tasks = asyncio.gather( @@ -153,4 +153,4 @@ async def test_long_running_eph(live_eventhub): config['consumer_group'] = "$Default" config['partition'] = "0" loop = asyncio.get_event_loop() - loop.run_until_complete(test_long_running_eph(config)) \ No newline at end of file + loop.run_until_complete(test_long_running_eph(config)) From a9446de31a6f33e8c86aeec0410c8fbb182f3188 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 6 Sep 2019 12:57:38 -0700 Subject: [PATCH 31/39] Remove utils.py --- .../azure/eventhub/aio/eventprocessor/utils.py | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/utils.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/utils.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/utils.py deleted file mode 100644 index 1d8add0f49a0..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/utils.py +++ /dev/null @@ -1,16 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -import asyncio - - -def get_running_loop(): - try: - return asyncio.get_running_loop() - except AttributeError: # 3.5 / 3.6 - loop = asyncio._get_running_loop() # pylint: disable=protected-access, no-member - if loop is None: - raise RuntimeError('No running event loop') - return loop From 8dfdec9c3864e0bafcda25c03bb45ab7003531c0 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 6 Sep 2019 15:32:07 -0700 Subject: [PATCH 32/39] Remove close() method --- .../azure/eventhub/aio/eventprocessor/partition_manager.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_manager.py index 9014b22a2f79..4bb84779dd53 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/partition_manager.py @@ -79,9 +79,6 @@ async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_ :raise: `OwnershipLostError`, `CheckpointError` """ - async def close(self): - pass - class OwnershipLostError(Exception): """Raises when update_checkpoint detects the ownership has been lost From 2aace8280da6e60d965306759e4090c804f464f4 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 6 Sep 2019 16:52:15 -0700 Subject: [PATCH 33/39] Updated docstrings --- .../aio/eventprocessor/event_processor.py | 65 ++++++++++++------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py index fd5573d3eb92..030eaadd6c59 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py @@ -3,7 +3,7 @@ # Licensed under the MIT License. See License.txt in the project root for license information. # ----------------------------------------------------------------------------------- -from typing import Callable, Dict +from typing import Callable, Dict, Type import uuid import asyncio import logging @@ -32,33 +32,52 @@ class EventProcessor(object): # pylint:disable=too-many-instance-attributes .. code-block:: python import asyncio + import logging + import os from azure.eventhub.aio import EventHubClient - from azure.eventhub.eventprocessor import EventProcessor, PartitionProcessor, Sqlite3PartitionManager + from azure.eventhub.aio.eventprocessor import EventProcessor, PartitionProcessor + from azure.eventhub.aio.eventprocessor import SamplePartitionManager - class MyPartitionProcessor(object): + RECEIVE_TIMEOUT = 5 # timeout in seconds for a receiving operation. 0 or None means no timeout + RETRY_TOTAL = 3 # max number of retries for receive operations within the receive timeout. + # Actual number of retries clould be less if RECEIVE_TIMEOUT is too small + CONNECTION_STR = os.environ["EVENT_HUB_CONN_STR"] + + logging.basicConfig(level=logging.INFO) + + async def do_operation(event): + # do some sync or async operations. If the operation is i/o bound, async will have better performance + print(event) + + + class MyPartitionProcessor(PartitionProcessor): async def process_events(self, events, partition_context): if events: - # do something sync or async to process the events + await asyncio.gather(*[do_operation(event) for event in events]) await partition_context.update_checkpoint(events[-1].offset, events[-1].sequence_number) + async def main(): + client = EventHubClient.from_connection_string(CONNECTION_STR, receive_timeout=RECEIVE_TIMEOUT, + retry_total=RETRY_TOTAL) + partition_manager = SamplePartitionManager(db_filename=":memory:") # a filename to persist checkpoint + try: + event_processor = EventProcessor(client, "$default", MyPartitionProcessor, + partition_manager, polling_interval=10) + asyncio.create_task(event_processor.start()) + await asyncio.sleep(60) + await event_processor.stop() + finally: + await partition_manager.close() - client = EventHubClient.from_connection_string("", receive_timeout=5, retry_total=3) - partition_manager = Sqlite3PartitionManager() - try: - event_processor = EventProcessor(client, "$default", MyPartitionProcessor, partition_manager) - asyncio.ensure_future(event_processor.start()) - await asyncio.sleep(100) # allow it to run 100 seconds - await event_processor.stop() - finally: - await partition_manager.close() + if __name__ == '__main__': + asyncio.get_event_loop().run_until_complete(main()) """ def __init__( self, eventhub_client: EventHubClient, consumer_group_name: str, - partition_processor_factory: Callable[..., PartitionProcessor], + partition_processor_type: Type[PartitionProcessor], partition_manager: PartitionManager, *, initial_event_position: EventPosition = EventPosition("-1"), polling_interval: float = 10.0 - ): """ Instantiate an EventProcessor. @@ -68,23 +87,23 @@ def __init__( :param consumer_group_name: The name of the consumer group this event processor is associated with. Events will be read only in the context of this group. :type consumer_group_name: str - :param partition_processor_factory: A callable(type or function) object that creates an instance of a class - implementing the ~azure.eventhub.eventprocessor.PartitionProcessor. - :type partition_processor_factory: callable object + :param partition_processor_type: A subclass type of ~azure.eventhub.eventprocessor.PartitionProcessor. + :type partition_processor_type: type :param partition_manager: Interacts with the storage system, dealing with ownership and checkpoints. - For preview 2, sample Sqlite3PartitionManager is provided. + For an easy start, SamplePartitionManager comes with the package. :type partition_manager: Class implementing the ~azure.eventhub.eventprocessor.PartitionManager. - :param initial_event_position: The offset to start a partition consumer if the partition has no checkpoint yet. - :type initial_event_position: int or str + :param initial_event_position: The event position to start a partition consumer. + if the partition has no checkpoint yet. This will be replaced by "reset" checkpoint in the near future. + :type initial_event_position: EventPosition :param polling_interval: The interval between any two pollings of balancing and claiming - :type float + :type polling_interval: float """ self._consumer_group_name = consumer_group_name self._eventhub_client = eventhub_client self._eventhub_name = eventhub_client.eh_name - self._partition_processor_factory = partition_processor_factory + self._partition_processor_factory = partition_processor_type self._partition_manager = partition_manager self._initial_event_position = initial_event_position # will be replaced by reset event position in preview 4 self._polling_interval = polling_interval From 36ba0a332548852913c203a18f671fb8482c37b9 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 6 Sep 2019 17:01:11 -0700 Subject: [PATCH 34/39] add pytest --- .../test_eventprocessor.py | 311 ++++++++++++++++++ 1 file changed, 311 insertions(+) create mode 100644 sdk/eventhub/azure-eventhubs/tests/eventprocessor_tests/test_eventprocessor.py diff --git a/sdk/eventhub/azure-eventhubs/tests/eventprocessor_tests/test_eventprocessor.py b/sdk/eventhub/azure-eventhubs/tests/eventprocessor_tests/test_eventprocessor.py new file mode 100644 index 000000000000..733eca3569ab --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/tests/eventprocessor_tests/test_eventprocessor.py @@ -0,0 +1,311 @@ +#------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +#-------------------------------------------------------------------------- + +import pytest +import asyncio + +from azure.eventhub import EventData, EventHubError +from azure.eventhub.aio import EventHubClient +from azure.eventhub.aio.eventprocessor import EventProcessor, SamplePartitionManager, PartitionProcessor, \ + CloseReason, OwnershipLostError + + +class LoadBalancerPartitionProcessor(PartitionProcessor): + async def process_events(self, events, partition_context): + pass + +@pytest.mark.liveTest +@pytest.mark.asyncio +async def test_loadbalancer_balance(connstr_senders): + + connection_str, senders = connstr_senders + for sender in senders: + sender.send(EventData("EventProcessor Test")) + eventhub_client = EventHubClient.from_connection_string(connection_str, receive_timeout=3) + partition_manager = SamplePartitionManager() + + event_processor1 = EventProcessor(eventhub_client, "$default", LoadBalancerPartitionProcessor, + partition_manager, polling_interval=1) + asyncio.create_task(event_processor1.start()) + await asyncio.sleep(5) + assert len(event_processor1._tasks) == 2 # event_processor1 claims two partitions + + event_processor2 = EventProcessor(eventhub_client, "$default", LoadBalancerPartitionProcessor, + partition_manager, polling_interval=1) + + asyncio.create_task(event_processor2.start()) + await asyncio.sleep(5) + assert len(event_processor1._tasks) == 1 # two event processors balance. So each has 1 task + assert len(event_processor2._tasks) == 1 + + event_processor3 = EventProcessor(eventhub_client, "$default", LoadBalancerPartitionProcessor, + partition_manager, polling_interval=1) + asyncio.create_task(event_processor3.start()) + await asyncio.sleep(5) + assert len(event_processor3._tasks) == 0 + await event_processor3.stop() + + await event_processor1.stop() + await asyncio.sleep(5) + assert len(event_processor2._tasks) == 2 # event_procesor2 takes another one after event_processor1 stops + await event_processor2.stop() + + +@pytest.mark.asyncio +async def test_load_balancer_abandon(): + class TestPartitionProcessor(PartitionProcessor): + async def process_events(self, events, partition_context): + await asyncio.sleep(0.1) + + class MockEventHubClient(object): + eh_name = "test_eh_name" + + def create_consumer(self, consumer_group_name, partition_id, event_position): + return MockEventhubConsumer() + + async def get_partition_ids(self): + return [str(pid) for pid in range(6)] + + class MockEventhubConsumer(object): + async def receive(self): + return [] + + partition_manager = SamplePartitionManager() + + event_processor = EventProcessor(MockEventHubClient(), "$default", TestPartitionProcessor, + partition_manager, polling_interval=0.5) + asyncio.get_running_loop().create_task(event_processor.start()) + await asyncio.sleep(5) + + ep_list = [] + for _ in range(2): + ep = EventProcessor(MockEventHubClient(), "$default", TestPartitionProcessor, + partition_manager, polling_interval=0.5) + asyncio.get_running_loop().create_task(ep.start()) + ep_list.append(ep) + await asyncio.sleep(5) + assert len(event_processor._tasks) == 2 + for ep in ep_list: + await ep.stop() + await event_processor.stop() + +@pytest.mark.liveTest +@pytest.mark.asyncio +async def test_loadbalancer_list_ownership_error(connstr_senders): + class ErrorPartitionManager(SamplePartitionManager): + async def list_ownership(self, eventhub_name, consumer_group_name): + raise RuntimeError("Test runtime error") + + connection_str, senders = connstr_senders + for sender in senders: + sender.send(EventData("EventProcessor Test")) + eventhub_client = EventHubClient.from_connection_string(connection_str, receive_timeout=3) + partition_manager = ErrorPartitionManager() + + event_processor = EventProcessor(eventhub_client, "$default", LoadBalancerPartitionProcessor, + partition_manager, polling_interval=1) + asyncio.create_task(event_processor.start()) + await asyncio.sleep(5) + assert event_processor._running is True + assert len(event_processor._tasks) == 0 + await event_processor.stop() + + +@pytest.mark.liveTest +@pytest.mark.asyncio +async def test_partition_processor(connstr_senders): + partition_processor1 = None + partition_processor2 = None + + class TestPartitionProcessor(PartitionProcessor): + def __init__(self): + self.initialize_called = False + self.error = None + self.close_reason = None + self.received_events = [] + self.checkpoint = None + + async def initialize(self, partition_context): + nonlocal partition_processor1, partition_processor2 + if partition_context.partition_id == "1": + partition_processor1 = self + else: + partition_processor2 = self + + async def process_events(self, events, partition_context): + self.received_events.extend(events) + if events: + offset, sn = events[-1].offset, events[-1].sequence_number + await partition_context.update_checkpoint(offset, sn) + self.checkpoint = (offset, sn) + + async def process_error(self, error, partition_context): + self.error = error + assert partition_context is not None + + async def close(self, reason, partition_context): + self.close_reason = reason + assert partition_context is not None + + connection_str, senders = connstr_senders + for sender in senders: + sender.send(EventData("EventProcessor Test")) + eventhub_client = EventHubClient.from_connection_string(connection_str, receive_timeout=3) + partition_manager = SamplePartitionManager() + + event_processor = EventProcessor(eventhub_client, "$default", TestPartitionProcessor, + partition_manager, polling_interval=1) + asyncio.create_task(event_processor.start()) + await asyncio.sleep(10) + await event_processor.stop() + assert partition_processor1 is not None and partition_processor2 is not None + assert len(partition_processor1.received_events) == 1 and len(partition_processor2.received_events) == 1 + assert partition_processor1.checkpoint is not None + assert partition_processor1.close_reason == CloseReason.SHUTDOWN + assert partition_processor1.error is None + + +@pytest.mark.liveTest +@pytest.mark.asyncio +async def test_partition_processor_process_events_error(connstr_senders): + class ErrorPartitionProcessor(PartitionProcessor): + async def process_events(self, events, partition_context): + if partition_context.partition_id == "1": + raise RuntimeError("processing events error") + else: + pass + + async def process_error(self, error, partition_context): + if partition_context.partition_id == "1": + assert isinstance(error, RuntimeError) + else: + raise RuntimeError("There shouldn't be an error for partition other than 1") + + async def close(self, reason, partition_context): + if partition_context.partition_id == "1": + assert reason == CloseReason.PROCESS_EVENTS_ERROR + else: + assert reason == CloseReason.SHUTDOWN + + connection_str, senders = connstr_senders + for sender in senders: + sender.send(EventData("EventProcessor Test")) + eventhub_client = EventHubClient.from_connection_string(connection_str, receive_timeout=3) + partition_manager = SamplePartitionManager() + + event_processor = EventProcessor(eventhub_client, "$default", ErrorPartitionProcessor, + partition_manager, polling_interval=1) + asyncio.create_task(event_processor.start()) + await asyncio.sleep(10) + await event_processor.stop() + + +@pytest.mark.asyncio +async def test_partition_processor_process_eventhub_consumer_error(): + class TestPartitionProcessor(PartitionProcessor): + async def process_events(self, events, partition_context): + pass + + async def process_error(self, error, partition_context): + assert isinstance(error, EventHubError) + + async def close(self, reason, partition_context): + assert reason == CloseReason.EVENTHUB_EXCEPTION + + class MockEventHubClient(object): + eh_name = "test_eh_name" + + def create_consumer(self, consumer_group_name, partition_id, event_position): + return MockEventhubConsumer() + + async def get_partition_ids(self): + return ["0", "1"] + + class MockEventhubConsumer(object): + async def receive(self): + raise EventHubError("Mock EventHubConsumer EventHubError") + + eventhub_client = MockEventHubClient() + partition_manager = SamplePartitionManager() + + event_processor = EventProcessor(eventhub_client, "$default", TestPartitionProcessor, + partition_manager, polling_interval=1) + asyncio.create_task(event_processor.start()) + await asyncio.sleep(5) + await event_processor.stop() + + +@pytest.mark.asyncio +async def test_partition_processor_process_error_close_error(): + class TestPartitionProcessor(PartitionProcessor): + async def process_events(self, events, partition_context): + raise RuntimeError("process_error") + + async def process_error(self, error, partition_context): + assert isinstance(error, RuntimeError) + raise RuntimeError("error from process_error") + + async def close(self, reason, partition_context): + assert reason == CloseReason.PROCESS_EVENTS_ERROR + raise RuntimeError("close error") + + class MockEventHubClient(object): + eh_name = "test_eh_name" + + def create_consumer(self, consumer_group_name, partition_id, event_position): + return MockEventhubConsumer() + + async def get_partition_ids(self): + return ["0", "1"] + + class MockEventhubConsumer(object): + async def receive(self): + return [EventData("mock events")] + + eventhub_client = MockEventHubClient() #EventHubClient.from_connection_string(connection_str, receive_timeout=3) + partition_manager = SamplePartitionManager() + + event_processor = EventProcessor(eventhub_client, "$default", TestPartitionProcessor, + partition_manager, polling_interval=1) + asyncio.create_task(event_processor.start()) + await asyncio.sleep(5) + await event_processor.stop() + + +@pytest.mark.liveTest +@pytest.mark.asyncio +async def test_partition_processor_process_update_checkpoint_error(connstr_senders): + class ErrorPartitionManager(SamplePartitionManager): + async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_id, owner_id, + offset, sequence_number): + if partition_id == "1": + raise OwnershipLostError("Mocked ownership lost") + + class TestPartitionProcessor(PartitionProcessor): + async def process_events(self, events, partition_context): + if events: + await partition_context.update_checkpoint(events[-1].offset, events[-1].sequence_number) + + async def process_error(self, error, partition_context): + assert isinstance(error, OwnershipLostError) + + async def close(self, reason, partition_context): + if partition_context.partition_id == "1": + assert reason == CloseReason.OWNERSHIP_LOST + else: + assert reason == CloseReason.SHUTDOWN + + connection_str, senders = connstr_senders + for sender in senders: + sender.send(EventData("EventProcessor Test")) + eventhub_client = EventHubClient.from_connection_string(connection_str, receive_timeout=3) + partition_manager = ErrorPartitionManager() + + event_processor = EventProcessor(eventhub_client, "$default", TestPartitionProcessor, + partition_manager, polling_interval=1) + asyncio.create_task(event_processor.start()) + await asyncio.sleep(10) + await event_processor.stop() From 7f95d9e17161e4e14348dce12c29b6a0cefa3174 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 6 Sep 2019 17:04:32 -0700 Subject: [PATCH 35/39] small fixes --- .../aio/eventprocessor/sample_partition_manager.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/sample_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/sample_partition_manager.py index 4859d675d5b9..82559fc8c274 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/sample_partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/sample_partition_manager.py @@ -69,11 +69,11 @@ async def list_ownership(self, eventhub_name, consumer_group_name): finally: cursor.close() - async def claim_ownership(self, partitions): + async def claim_ownership(self, ownership_list): result = [] cursor = self.conn.cursor() try: - for p in partitions: + for p in ownership_list: cursor.execute("select etag from " + _check_table_name(self.ownership_table) + " where "+ " and ".join([field+"=?" for field in self.primary_keys]), tuple(p.get(field) for field in self.primary_keys)) @@ -91,7 +91,7 @@ async def claim_ownership(self, partitions): logger.info("EventProcessor %r failed to claim partition %r " "because it was claimed by another EventProcessor at the same time. " "The Sqlite3 exception is %r", p["owner_id"], p["partition_id"], op_err) - break + continue else: result.append(p) else: @@ -114,7 +114,7 @@ async def claim_ownership(self, partitions): "because it was claimed by another EventProcessor at the same time", p["owner_id"], p["partition_id"]) self.conn.commit() - return partitions + return result finally: cursor.close() From f30d143b700b2927250e9367c2766a1e9fc4f2a7 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 6 Sep 2019 17:27:11 -0700 Subject: [PATCH 36/39] Revert "Remove utils.py" This reverts commit a9446de31a6f33e8c86aeec0410c8fbb182f3188. --- .../azure/eventhub/aio/eventprocessor/utils.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/utils.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/utils.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/utils.py new file mode 100644 index 000000000000..1d8add0f49a0 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/utils.py @@ -0,0 +1,16 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +import asyncio + + +def get_running_loop(): + try: + return asyncio.get_running_loop() + except AttributeError: # 3.5 / 3.6 + loop = asyncio._get_running_loop() # pylint: disable=protected-access, no-member + if loop is None: + raise RuntimeError('No running event loop') + return loop From 893bee041a4914efc22e4d1a8e88946b46b40330 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 6 Sep 2019 17:34:10 -0700 Subject: [PATCH 37/39] change asyncio.create_task to 3.5 friendly code --- .../aio/eventprocessor/event_processor.py | 3 ++- .../test_eventprocessor.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py index 030eaadd6c59..6fbe628905ad 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py @@ -14,6 +14,7 @@ from .partition_manager import PartitionManager, OwnershipLostError from ._ownership_manager import OwnershipManager from .partition_processor import CloseReason, PartitionProcessor +from .utils import get_running_loop log = logging.getLogger(__name__) @@ -182,7 +183,7 @@ def _create_tasks_for_claimed_ownership(self, to_claim_ownership_list): for ownership in to_claim_ownership_list: partition_id = ownership["partition_id"] if partition_id not in self._tasks: - self._tasks[partition_id] = asyncio.create_task(self._receive(ownership)) + self._tasks[partition_id] = get_running_loop().create_task(self._receive(ownership)) async def _receive(self, ownership): log.info("start ownership, %r", ownership) diff --git a/sdk/eventhub/azure-eventhubs/tests/eventprocessor_tests/test_eventprocessor.py b/sdk/eventhub/azure-eventhubs/tests/eventprocessor_tests/test_eventprocessor.py index 733eca3569ab..93cf137e1af5 100644 --- a/sdk/eventhub/azure-eventhubs/tests/eventprocessor_tests/test_eventprocessor.py +++ b/sdk/eventhub/azure-eventhubs/tests/eventprocessor_tests/test_eventprocessor.py @@ -29,21 +29,21 @@ async def test_loadbalancer_balance(connstr_senders): event_processor1 = EventProcessor(eventhub_client, "$default", LoadBalancerPartitionProcessor, partition_manager, polling_interval=1) - asyncio.create_task(event_processor1.start()) + asyncio.ensure_future(event_processor1.start()) await asyncio.sleep(5) assert len(event_processor1._tasks) == 2 # event_processor1 claims two partitions event_processor2 = EventProcessor(eventhub_client, "$default", LoadBalancerPartitionProcessor, partition_manager, polling_interval=1) - asyncio.create_task(event_processor2.start()) + asyncio.ensure_future(event_processor2.start()) await asyncio.sleep(5) assert len(event_processor1._tasks) == 1 # two event processors balance. So each has 1 task assert len(event_processor2._tasks) == 1 event_processor3 = EventProcessor(eventhub_client, "$default", LoadBalancerPartitionProcessor, partition_manager, polling_interval=1) - asyncio.create_task(event_processor3.start()) + asyncio.ensure_future(event_processor3.start()) await asyncio.sleep(5) assert len(event_processor3._tasks) == 0 await event_processor3.stop() @@ -107,7 +107,7 @@ async def list_ownership(self, eventhub_name, consumer_group_name): event_processor = EventProcessor(eventhub_client, "$default", LoadBalancerPartitionProcessor, partition_manager, polling_interval=1) - asyncio.create_task(event_processor.start()) + asyncio.ensure_future(event_processor.start()) await asyncio.sleep(5) assert event_processor._running is True assert len(event_processor._tasks) == 0 @@ -158,7 +158,7 @@ async def close(self, reason, partition_context): event_processor = EventProcessor(eventhub_client, "$default", TestPartitionProcessor, partition_manager, polling_interval=1) - asyncio.create_task(event_processor.start()) + asyncio.ensure_future(event_processor.start()) await asyncio.sleep(10) await event_processor.stop() assert partition_processor1 is not None and partition_processor2 is not None @@ -198,7 +198,7 @@ async def close(self, reason, partition_context): event_processor = EventProcessor(eventhub_client, "$default", ErrorPartitionProcessor, partition_manager, polling_interval=1) - asyncio.create_task(event_processor.start()) + asyncio.ensure_future(event_processor.start()) await asyncio.sleep(10) await event_processor.stop() @@ -233,7 +233,7 @@ async def receive(self): event_processor = EventProcessor(eventhub_client, "$default", TestPartitionProcessor, partition_manager, polling_interval=1) - asyncio.create_task(event_processor.start()) + asyncio.ensure_future(event_processor.start()) await asyncio.sleep(5) await event_processor.stop() @@ -270,7 +270,7 @@ async def receive(self): event_processor = EventProcessor(eventhub_client, "$default", TestPartitionProcessor, partition_manager, polling_interval=1) - asyncio.create_task(event_processor.start()) + asyncio.ensure_future(event_processor.start()) await asyncio.sleep(5) await event_processor.stop() @@ -306,6 +306,6 @@ async def close(self, reason, partition_context): event_processor = EventProcessor(eventhub_client, "$default", TestPartitionProcessor, partition_manager, polling_interval=1) - asyncio.create_task(event_processor.start()) + asyncio.ensure_future(event_processor.start()) await asyncio.sleep(10) await event_processor.stop() From 4b41fa573a36c3bc9889cba6d25e10bea644768a Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 6 Sep 2019 21:12:17 -0700 Subject: [PATCH 38/39] Remove Callable --- .../azure/eventhub/aio/eventprocessor/event_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py index 6fbe628905ad..6b76734ecfe0 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py @@ -3,7 +3,7 @@ # Licensed under the MIT License. See License.txt in the project root for license information. # ----------------------------------------------------------------------------------- -from typing import Callable, Dict, Type +from typing import Dict, Type import uuid import asyncio import logging From fef055189dde1f9d1eea20fe7cee1bcb42d1248a Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 6 Sep 2019 21:34:02 -0700 Subject: [PATCH 39/39] raise CancelledError instead of break --- .../azure/eventhub/aio/eventprocessor/event_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py index 6b76734ecfe0..37f9a20d67c5 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/eventprocessor/event_processor.py @@ -254,7 +254,7 @@ async def close(reason): await close(CloseReason.SHUTDOWN) else: await close(CloseReason.OWNERSHIP_LOST) - break + raise except EventHubError as eh_err: await process_error(eh_err) await close(CloseReason.EVENTHUB_EXCEPTION)