Azure · tvaron3 · Nov 1, 2024 · Aug 14, 2024 · Aug 18, 2024 · Aug 18, 2024
@@ -10,6 +10,8 @@ This version and all future versions will support Python 3.13.
 * Added option to disable write payload on writes. See [PR 37365](https://github.com/Azure/azure-sdk-for-python/pull/37365)
 * Added get feed ranges API. See [PR 37687](https://github.com/Azure/azure-sdk-for-python/pull/37687)
 * Added feed range support in `query_items_change_feed`. See [PR 37687](https://github.com/Azure/azure-sdk-for-python/pull/37687)
+* Added **provisional** helper APIs for managing session tokens. See [PR 36971](https://github.com/Azure/azure-sdk-for-python/pull/36971)
+* Added ability to get feed range for a partition key. See [PR 36971](https://github.com/Azure/azure-sdk-for-python/pull/36971)
 
 #### Breaking Changes
 * Item-level point operations will now return `CosmosDict` and `CosmosList` response types. 

@@ -186,7 +186,6 @@ def _compare_helper(a, b):
 
     @staticmethod
     def overlaps(range1, range2):
-
         if range1 is None or range2 is None:
             return False
         if range1.isEmpty() or range2.isEmpty():
@@ -195,10 +194,35 @@ def overlaps(range1, range2):
         cmp1 = Range._compare_helper(range1.min, range2.max)
         cmp2 = Range._compare_helper(range2.min, range1.max)
 
-        if cmp1 <= 0 or cmp2 <= 0:
+        if cmp1 <= 0 and cmp2 <= 0:
             if (cmp1 == 0 and not (range1.isMinInclusive and range2.isMaxInclusive)) or (
                 cmp2 == 0 and not (range2.isMinInclusive and range1.isMaxInclusive)
             ):
                 return False
             return True
         return False
+
+    def can_merge(self, other: 'Range') -> bool:
+        if self.isSingleValue() and other.isSingleValue():
+            return self.min == other.min
+        # if share the same boundary, they can merge
+        overlap_boundary1 = self.max == other.min and self.isMaxInclusive or other.isMinInclusive
+        overlap_boundary2 = other.max == self.min and other.isMaxInclusive or self.isMinInclusive
+        if overlap_boundary1 or overlap_boundary2:
+            return True
+        return self.overlaps(self, other)
+
+    def merge(self, other: 'Range') -> 'Range':
+        if not self.can_merge(other):
+            raise ValueError("Ranges do not overlap")
+        min_val = self.min if self.min < other.min else other.min
+        max_val = self.max if self.max > other.max else other.max
+        is_min_inclusive = self.isMinInclusive if self.min < other.min else other.isMinInclusive
+        is_max_inclusive = self.isMaxInclusive if self.max > other.max else other.isMaxInclusive
+        return Range(min_val, max_val, is_min_inclusive, is_max_inclusive)
+
+    def is_subset(self, parent_range: 'Range') -> bool:
+        normalized_parent_range = parent_range.to_normalized_range()
+        normalized_child_range = self.to_normalized_range()
+        return (normalized_parent_range.min <= normalized_child_range.min and
+                normalized_parent_range.max >= normalized_child_range.max)
@@ -0,0 +1,224 @@
+# The MIT License (MIT)
+# Copyright (c) 2014 Microsoft Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""Internal Helper functions for manipulating session tokens.
+"""
+from typing import Tuple, List, Dict, Any
+
+from azure.cosmos._routing.routing_range import Range
+from azure.cosmos._vector_session_token import VectorSessionToken
+from ._change_feed.feed_range_internal import FeedRangeInternalEpk
+
+# pylint: disable=protected-access
+
+
+# ex inputs:
+# 1. "1:1#51", "1:1#55" -> "1:1#55"
+# 2. "0:1#57", "1:1#52" -> "0:1#57"
+def merge_session_tokens_with_same_range(session_token1: str, session_token2: str) -> str:
+    pk_range_id1, vector_session_token1 = parse_session_token(session_token1)
+    pk_range_id2, vector_session_token2 = parse_session_token(session_token2)
+    pk_range_id = pk_range_id1
+    # The partition key range id could be different in this scenario
+    # Ex. get_updated_session_token([("AA", "BB"), "1:1#51"], ("AA", "DD")) -> "1:1#51"
+    # Then we input this back into get_updated_session_token after a merge happened
+    # get_updated_session_token([("AA", "DD"), "1:1#51", ("AA", "DD"), "0:1#55"], ("AA", "DD")) -> "0:1#55"
+    if pk_range_id1 != pk_range_id2:
+        pk_range_id = pk_range_id1 \
+            if vector_session_token1.global_lsn > vector_session_token2.global_lsn else pk_range_id2
+    vector_session_token = vector_session_token1.merge(vector_session_token2)
+    return pk_range_id + ":" +  vector_session_token.session_token
+
+def is_compound_session_token(session_token: str) -> bool:
+    return "," in session_token
+
+def parse_session_token(session_token: str) -> Tuple[str, VectorSessionToken]:
+    tokens = session_token.split(":")
+    return tokens[0], VectorSessionToken.create(tokens[1])
+
+def split_compound_session_tokens(compound_session_tokens: List[Tuple[Range, str]]) -> List[str]:
+    session_tokens = []
+    for _, session_token in compound_session_tokens:
+        if is_compound_session_token(session_token):
+            tokens = session_token.split(",")
+            for token in tokens:
+                session_tokens.append(token)
+        else:
+            session_tokens.append(session_token)
+    return session_tokens
+
+# ex inputs:
+# ["1:1#51", "1:1#55", "1:1#57"] -> ["1:1#57"]
+def merge_session_tokens_for_same_partition(session_tokens: List[str]) -> List[str]:
+    i = 0
+    while i < len(session_tokens):
+        j = i + 1
+        while j < len(session_tokens):
+            pk_range_id1, vector_session_token1 = parse_session_token(session_tokens[i])
+            pk_range_id2, vector_session_token2 = parse_session_token(session_tokens[j])
+            if pk_range_id1 == pk_range_id2:
+                vector_session_token = vector_session_token1.merge(vector_session_token2)
+                session_tokens.append(pk_range_id1 + ":" + vector_session_token.session_token)
+                remove_session_tokens = [session_tokens[i], session_tokens[j]]
+                for token in remove_session_tokens:
+                    session_tokens.remove(token)
+                i = -1
+                break
+            j += 1
+        i += 1
+
+    return session_tokens
+
+# ex inputs:
+# 1. [(("AA", "BB"), "1:1#51"), (("BB", "DD"), "2:1#51"), (("AA", "DD"), "0:1#55")] ->
+# [("AA", "DD"), "0:1#55"]
+# 2. [(("AA", "BB"), "1:1#57"), (("BB", "DD"), "2:1#58"), (("AA", "DD"), "0:1#55")] ->
+# [("AA", "DD"), "1:1#57,2:1#58"]
+# 3. [(("AA", "BB"), "1:1#57"), (("BB", "DD"), "2:1#52"), (("AA", "DD"), "0:1#55")] ->
+# [("AA", "DD"), "1:1#57,2:1#52,0:1#55"]
+# goal here is to detect any obvious merges or splits that happened
+# compound session tokens are not considered will just pass them along
+def merge_ranges_with_subsets(overlapping_ranges: List[Tuple[Range, str]]) -> List[Tuple[Range, str]]:
+    processed_ranges = []
+    while len(overlapping_ranges) != 0: # pylint: disable=too-many-nested-blocks
+        feed_range_cmp, session_token_cmp = overlapping_ranges[0]
+        # compound session tokens are not considered for merging
+        if is_compound_session_token(session_token_cmp):
+            processed_ranges.append(overlapping_ranges[0])
+            overlapping_ranges.remove(overlapping_ranges[0])
+            continue
+        _, vector_session_token_cmp = parse_session_token(session_token_cmp)
+        subsets = []
+        # finding the subset feed ranges of the current feed range
+        for j in range(1, len(overlapping_ranges)):
+            feed_range = overlapping_ranges[j][0]
+            if not is_compound_session_token(overlapping_ranges[j][1]) and \
+                    feed_range.is_subset(feed_range_cmp):
+                subsets.append(overlapping_ranges[j] + (j,))
+
+        # go through subsets to see if can create current feed range from the subsets
+        not_found = True
+        j = 0
+        while not_found and j < len(subsets):
+            merged_range = subsets[j][0]
+            session_tokens = [subsets[j][1]]
+            merged_indices = [subsets[j][2]]
+            if len(subsets) == 1:
+                _, vector_session_token = parse_session_token(session_tokens[0])
+                if vector_session_token_cmp.global_lsn > vector_session_token.global_lsn:
+                    overlapping_ranges.remove(overlapping_ranges[merged_indices[0]])
+            else:
+                for k, subset in enumerate(subsets):
+                    if j == k:
+                        continue
+                    if merged_range.can_merge(subset[0]):
+                        merged_range = merged_range.merge(subset[0])
+                        session_tokens.append(subset[1])
+                        merged_indices.append(subset[2])
+                    if feed_range_cmp == merged_range:
+                        # if feed range can be created from the subsets
+                        # take the subsets if their global lsn is larger
+                        # else take the current feed range
+                        children_more_updated = True
+                        parent_more_updated = True
+                        for session_token in session_tokens:
+                            _, vector_session_token = parse_session_token(session_token)
+                            if vector_session_token_cmp.global_lsn > vector_session_token.global_lsn:
+                                children_more_updated = False
+                            else:
+                                parent_more_updated = False
+                        feed_ranges_to_remove = [overlapping_ranges[i] for i in merged_indices]
+                        for feed_range_to_remove in feed_ranges_to_remove:
+                            overlapping_ranges.remove(feed_range_to_remove)
+                        if children_more_updated:
+                            overlapping_ranges.append((merged_range, ','.join(map(str, session_tokens))))
+                            overlapping_ranges.remove(overlapping_ranges[0])
+                        elif not parent_more_updated and not children_more_updated:
+                            session_tokens.append(session_token_cmp)
+                            overlapping_ranges.append((merged_range, ','.join(map(str, session_tokens))))
+                        not_found = False
+                        break
+
+            j += 1
+
+        processed_ranges.append(overlapping_ranges[0])
+        overlapping_ranges.remove(overlapping_ranges[0])
+    return processed_ranges
+
+def get_latest_session_token(feed_ranges_to_session_tokens: List[Tuple[Dict[str, Any], str]],
+                             target_feed_range: Dict[str, Any]):
+
+    target_feed_range_epk = FeedRangeInternalEpk.from_json(target_feed_range)
+    target_feed_range_normalized = target_feed_range_epk.get_normalized_range()
+    # filter out tuples that overlap with target_feed_range and normalizes all the ranges
+    overlapping_ranges = []
+    for feed_range_to_session_token in feed_ranges_to_session_tokens:
+        feed_range_epk = FeedRangeInternalEpk.from_json(feed_range_to_session_token[0])
+        if Range.overlaps(target_feed_range_normalized,
+                          feed_range_epk.get_normalized_range()):
+            overlapping_ranges.append((feed_range_epk.get_normalized_range(),
+                                       feed_range_to_session_token[1]))
+
+    if len(overlapping_ranges) == 0:
+        raise ValueError('There were no overlapping feed ranges with the target.')
+
+    # merge any session tokens that are the same exact feed range
+    i = 0
+    j = 1
+    while i < len(overlapping_ranges) and j < len(overlapping_ranges):
+        cur_feed_range = overlapping_ranges[i][0]
+        session_token = overlapping_ranges[i][1]
+        session_token_1 = overlapping_ranges[j][1]
+        if (not is_compound_session_token(session_token) and
+                not is_compound_session_token(overlapping_ranges[j][1]) and
+                cur_feed_range == overlapping_ranges[j][0]):
+            session_token = merge_session_tokens_with_same_range(session_token, session_token_1)
+            feed_ranges_to_remove = [overlapping_ranges[i], overlapping_ranges[j]]
+            for feed_range_to_remove in feed_ranges_to_remove:
+                overlapping_ranges.remove(feed_range_to_remove)
+            overlapping_ranges.append((cur_feed_range, session_token))
+            i, j = 0, 1
+        else:
+            j += 1
+            if j == len(overlapping_ranges):
+                i += 1
+                j = i + 1
+
+    # checking for merging of feed ranges that can be created from other feed ranges
+    processed_ranges = merge_ranges_with_subsets(overlapping_ranges)
+
+    # break up session tokens that are compound
+    remaining_session_tokens = split_compound_session_tokens(processed_ranges)
+
+    if len(remaining_session_tokens) == 1:
+        return remaining_session_tokens[0]
+    # merging any session tokens with same physical partition key range id
+    remaining_session_tokens = merge_session_tokens_for_same_partition(remaining_session_tokens)
+
+    updated_session_token = ""
+    # compound the remaining session tokens
+    for i, remaining_session_token in enumerate(remaining_session_tokens):
+        if i == len(remaining_session_tokens) - 1:
+            updated_session_token += remaining_session_token
+        else:
+            updated_session_token += remaining_session_token + ","
+
+    return updated_session_token
@@ -44,6 +44,7 @@
     _set_properties_cache
 )
 from .._routing.routing_range import Range
+from .._session_token_helpers import get_latest_session_token
 from ..offer import ThroughputProperties
 from ..partition_key import (
     NonePartitionKeyValue,
@@ -1321,5 +1322,56 @@ async def read_feed_ranges(
 
         feed_ranges = [FeedRangeInternalEpk(Range.PartitionKeyRangeToRange(partitionKeyRange)).to_dict()
                 for partitionKeyRange in partition_key_ranges]
-
         return (feed_range for feed_range in feed_ranges)
+
+    async def get_latest_session_token(self,
+                                       feed_ranges_to_session_tokens: List[Tuple[Dict[str, Any], str]],
+                                       target_feed_range: Dict[str, Any]
+                                       ) -> str:
+        """ **provisional** Gets the the most up to date session token from the list of session token and feed
+        range tuples for a specific target feed range. The feed range can be obtained from a logical partition
+        or by reading the container feed ranges. This should only be used if maintaining own session token or else
+        the sdk willkeep track of session token. Session tokens and feed ranges are scoped to a container.
+        Only input session tokens and feed ranges obtained from the same container.
+        :param feed_ranges_to_session_tokens: List of partition key and session token tuples.
+        :type feed_ranges_to_session_tokens: List[Tuple[Dict[str, Any], str]]
+        :param target_feed_range: feed range to get most up to date session token.
+        :type target_feed_range: Dict[str, Any]
+        :returns: a session token
+        :rtype: str
+        """
+        return get_latest_session_token(feed_ranges_to_session_tokens, target_feed_range)
+
+    async def feed_range_from_partition_key(self, partition_key: PartitionKeyType) -> Dict[str, Any]:
+        """Gets the feed range for a given partition key.
+        :param partition_key: partition key to get feed range.
+        :type partition_key: PartitionKey
+        :returns: a feed range
+        :rtype: Dict[str, Any]
+
+        .. note::
+          For the feed range, even through a Dict has been returned, but in the future,
+          the structure may change. Please just treat it as opaque and do not take any dependence on it.
+
+        """
+        return FeedRangeInternalEpk(await self._get_epk_range_for_partition_key(partition_key)).to_dict()
+
+    async def is_feed_range_subset(self, parent_feed_range: Dict[str, Any],
+                                   child_feed_range: Dict[str, Any]) -> bool:
+        """Checks if child feed range is a subset of parent feed range.
+        :param parent_feed_range: left feed range
+        :type parent_feed_range: Dict[str, Any]
+        :param child_feed_range: right feed range
+        :type child_feed_range: Dict[str, Any]
+        :returns: a boolean indicating if child feed range is a subset of parent feed range
+        :rtype: bool
+
+        .. note::
+          For the feed range, even through a Dict has been returned, but in the future,
+          the structure may change. Please just treat it as opaque and do not take any dependence on it.
+
+        """
+        parent_feed_range_epk = FeedRangeInternalEpk.from_json(parent_feed_range)
+        child_feed_range_epk = FeedRangeInternalEpk.from_json(child_feed_range)
+        return child_feed_range_epk.get_normalized_range().is_subset(
+            parent_feed_range_epk.get_normalized_range())