UNB/ CS/ David Bremner/ teaching/ cs3383/ lectures/ 21.0-demos/ pqdict/ init .py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Priority Queue Dictionary (pqdict)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

A Pythonic indexed priority queue.

A dict-like heap queue to prioritize hashable objects while providing random
access and updatable priorities. Inspired by the ``heapq`` standard library
module, which was written by Kevin O'Connor and augmented by Tim Peters and
Raymond Hettinger.

The priority queue is implemented as a binary heap of (key, priority value)
pairs, which supports:

- O(1) search for the item with highest priority

- O(log n) removal of the item with highest priority

- O(log n) insertion of a new item

Additionally, an index maps elements to their location in the heap and is kept
up to date as the heap is manipulated. As a result, pqdict also supports:

- O(1) lookup of any item by key

- O(log n) removal of any item

- O(log n) updating of any item's priority level

Documentation at <http://pqdict.readthedocs.org/en/latest>.

:copyright: (c) 2012-2015 by Nezar Abdennur.
:license: MIT, see LICENSE for more details.

"""
try:
    from collections.abc import MutableMapping as _MutableMapping
except ImportError:
    # 2.7 compatability
    from collections import MutableMapping as _MutableMapping

from six.moves import range
from operator import lt, gt


__version__ = "1.1.1"
__all__ = ["pqdict", "PQDict", "minpq", "maxpq", "nlargest", "nsmallest"]


class _Node(object):
    __slots__ = ("key", "value", "prio")

    def __init__(self, key, value, prio):
        self.key = key
        self.value = value
        self.prio = prio

    def __repr__(self):
        return self.__class__.__name__ + "(%s, %s, %s)" % (
            repr(self.key),
            repr(self.value),
            repr(self.prio),
        )


class pqdict(_MutableMapping):
    """
    A collection that maps hashable objects (keys) to priority-determining
    values. The mapping is mutable so items may be added, removed and have
    their priority level updated.

    Parameters
    ----------
    data : mapping or iterable, optional
        Input data, e.g. a dictionary or a sequence of items.
    key : callable, optional
        Optional priority key function to transform values into priority keys
        for sorting. By default, the values are not transformed.
    reverse : bool, optional
        If ``True``, *larger* priority keys give items *higher* priority.
        Default is ``False``.
    precedes : callable, optional (overrides ``reverse``)
        Function that determines precedence of a pair of priority keys. The
        default comparator is ``operator.lt``, meaning *smaller* priority keys
        give items *higher* priority.

    """

    def __init__(self, data=None, key=None, reverse=False, precedes=lt):
        if reverse:
            if precedes == lt:
                precedes = gt
            else:
                raise ValueError("Got both `reverse=True` and a custom `precedes`.")

        if key is None or callable(key):
            self._keyfn = key
        else:
            raise ValueError(
                "`key` function must be a callable; got {}".format(type(key))
            )

        if callable(precedes):
            self._precedes = precedes
        else:
            raise ValueError(
                "`precedes` function must be a callable; got {}".format(type(precedes))
            )

        # The heap
        self._heap = []

        # The index
        self._position = {}

        if data is not None:
            self.update(data)

    @property
    def precedes(self):
        """Priority key precedence function"""
        return self._precedes

    @property
    def keyfn(self):
        """Priority key function"""
        return self._keyfn if self._keyfn is not None else lambda x: x

    def __repr__(self):
        things = ", ".join(
            ["%s: %s" % (repr(node.key), repr(node.value)) for node in self._heap]
        )
        return self.__class__.__name__ + "({" + things + "})"

    ############
    # dict API #
    ############
    __marker = object()
    __eq__ = _MutableMapping.__eq__
    __ne__ = _MutableMapping.__ne__
    keys = _MutableMapping.keys
    values = _MutableMapping.values
    items = _MutableMapping.items
    get = _MutableMapping.get
    clear = _MutableMapping.clear
    update = _MutableMapping.update
    setdefault = _MutableMapping.setdefault

    @classmethod
    def fromkeys(cls, iterable, value, **kwargs):
        """
        Return a new pqict mapping keys from an iterable to the same value.

        """
        return cls(((k, value) for k in iterable), **kwargs)

    def __len__(self):
        """
        Return number of items in the pqdict.

        """
        return len(self._heap)

    def __contains__(self, key):
        """
        Return ``True`` if key is in the pqdict.

        """
        return key in self._position

    def __iter__(self):
        """
        Return an iterator over the keys of the pqdict. The order of iteration
        is arbitrary! Use ``popkeys`` to iterate over keys in priority order.

        """
        for node in self._heap:
            yield node.key

    def __getitem__(self, key):
        """
        Return the priority value of ``key``. Raises a ``KeyError`` if not in
        the pqdict.

        """
        return self._heap[self._position[key]].value  # raises KeyError

    def __setitem__(self, key, value):
        """
        Assign a priority value to ``key``.

        """
        heap = self._heap
        position = self._position
        keygen = self._keyfn
        try:
            pos = position[key]
        except KeyError:
            # add
            n = len(heap)
            prio = keygen(value) if keygen is not None else value
            heap.append(_Node(key, value, prio))
            position[key] = n
            self._swim(n)
        else:
            # update
            prio = keygen(value) if keygen is not None else value
            heap[pos].value = value
            heap[pos].prio = prio
            self._reheapify(pos)

    def __delitem__(self, key):
        """
        Remove item. Raises a ``KeyError`` if key is not in the pqdict.

        """
        heap = self._heap
        position = self._position
        pos = position.pop(key)  # raises KeyError
        node_to_delete = heap[pos]
        # Take the very last node and place it in the vacated spot. Let it
        # sink or swim until it reaches its new resting place.
        end = heap.pop(-1)
        if end is not node_to_delete:
            heap[pos] = end
            position[end.key] = pos
            self._reheapify(pos)
        del node_to_delete

    def copy(self):
        """
        Return a shallow copy of a pqdict.

        """
        other = self.__class__(key=self._keyfn, precedes=self._precedes)
        other._position = self._position.copy()
        other._heap = [_Node(node.key, node.value, node.prio) for node in self._heap]
        return other

    def pop(self, key=__marker, default=__marker):
        """
        If ``key`` is in the pqdict, remove it and return its priority value,
        else return ``default``. If ``default`` is not provided and ``key`` is
        not in the pqdict, raise a ``KeyError``.

        If ``key`` is not provided, remove the top item and return its key, or
        raise ``KeyError`` if the pqdict is empty.

        """
        heap = self._heap
        position = self._position
        # pq semantics: remove and return top *key* (value is discarded)
        if key is self.__marker:
            if not heap:
                raise KeyError("pqdict is empty")
            key = heap[0].key
            del self[key]
            return key
        # dict semantics: remove and return *value* mapped from key
        try:
            pos = position.pop(key)  # raises KeyError
        except KeyError:
            if default is self.__marker:
                raise
            return default
        else:
            node_to_delete = heap[pos]
            end = heap.pop()
            if end is not node_to_delete:
                heap[pos] = end
                position[end.key] = pos
                self._reheapify(pos)
            value = node_to_delete.value
            del node_to_delete
            return value

    ######################
    # Priority Queue API #
    ######################
    def top(self):
        """
        Return the key of the item with highest priority. Raises ``KeyError``
        if pqdict is empty.

        """
        try:
            node = self._heap[0]
        except IndexError:
            raise KeyError("pqdict is empty")
        return node.key

    def popitem(self):
        """
        Remove and return the item with highest priority. Raises ``KeyError``
        if pqdict is empty.

        """
        heap = self._heap
        position = self._position

        try:
            end = heap.pop(-1)
        except IndexError:
            raise KeyError("pqdict is empty")

        if heap:
            node = heap[0]
            heap[0] = end
            position[end.key] = 0
            self._sink(0)
        else:
            node = end
        del position[node.key]
        return node.key, node.value

    def topitem(self):
        """
        Return the item with highest priority. Raises ``KeyError`` if pqdict is
        empty.

        """
        try:
            node = self._heap[0]
        except IndexError:
            raise KeyError("pqdict is empty")
        return node.key, node.value

    def additem(self, key, value):
        """
        Add a new item. Raises ``KeyError`` if key is already in the pqdict.

        """
        if key in self._position:
            raise KeyError("%s is already in the queue" % repr(key))
        self[key] = value

    def pushpopitem(self, key, value):
        """
        Equivalent to inserting a new item followed by removing the top
        priority item, but faster. Raises ``KeyError`` if the new key is
        already in the pqdict.

        """
        heap = self._heap
        position = self._position
        precedes = self._precedes
        prio = self._keyfn(value) if self._keyfn else value
        node = _Node(key, value, prio)
        if key in self:
            raise KeyError("%s is already in the queue" % repr(key))
        if heap and precedes(heap[0].prio, node.prio):
            node, heap[0] = heap[0], node
            position[key] = 0
            del position[node.key]
            self._sink(0)
        return node.key, node.value

    def updateitem(self, key, new_val):
        """
        Update the priority value of an existing item. Raises ``KeyError`` if
        key is not in the pqdict.

        """
        if key not in self._position:
            raise KeyError(key)
        self[key] = new_val

    def replace_key(self, key, new_key):
        """
        Replace the key of an existing heap node in place. Raises ``KeyError``
        if the key to replace does not exist or if the new key is already in
        the pqdict.

        """
        heap = self._heap
        position = self._position
        if new_key in self:
            raise KeyError("%s is already in the queue" % repr(new_key))
        pos = position.pop(key)  # raises appropriate KeyError
        position[new_key] = pos
        heap[pos].key = new_key

    def swap_priority(self, key1, key2):
        """
        Fast way to swap the priority level of two items in the pqdict. Raises
        ``KeyError`` if either key does not exist.

        """
        heap = self._heap
        position = self._position
        if key1 not in self or key2 not in self:
            raise KeyError
        pos1, pos2 = position[key1], position[key2]
        heap[pos1].key, heap[pos2].key = key2, key1
        position[key1], position[key2] = pos2, pos1

    def popkeys(self):
        """
        Heapsort iterator over keys in descending order of priority level.

        """
        try:
            while True:
                yield self.popitem()[0]
        except KeyError:
            return

    def popvalues(self):
        """
        Heapsort iterator over values in descending order of priority level.

        """
        try:
            while True:
                yield self.popitem()[1]
        except KeyError:
            return

    def popitems(self):
        """
        Heapsort iterator over items in descending order of priority level.

        """
        try:
            while True:
                yield self.popitem()
        except KeyError:
            return

    def heapify(self, key=__marker):
        """
        Repair a broken heap. If the state of an item's priority value changes
        you can re-sort the relevant item only by providing ``key``.

        """
        if key is self.__marker:
            n = len(self._heap)
            # No need to look at any node without a child.
            for pos in reversed(range(n // 2)):
                self._sink(pos)
        else:
            try:
                pos = self._position[key]
            except KeyError:
                raise KeyError(key)
            self._reheapify(pos)

    # Heap algorithms
    # The names of the heap operations in `heapq` (sift up/down) refer to the
    # motion of the nodes being compared to, rather than the node being
    # operated on as is usually done in textbooks (i.e. bubble down/up,
    # instead). Here I use the sink/swim nomenclature from
    # http://algs4.cs.princeton.edu/24pq/. The way I like to think of it, an
    # item that is too "heavy" (low-priority) should sink down the tree, while
    # one that is too "light" should float or swim up.
    def _reheapify(self, pos):
        # update existing node:
        # bubble up or down depending on values of parent and children
        heap = self._heap
        precedes = self._precedes
        parent_pos = (pos - 1) >> 1
        child_pos = 2 * pos + 1
        if parent_pos > -1 and precedes(heap[pos].prio, heap[parent_pos].prio):
            self._swim(pos)
        elif child_pos < len(heap):
            other_pos = child_pos + 1
            if other_pos < len(heap) and not precedes(
                heap[child_pos].prio, heap[other_pos].prio
            ):
                child_pos = other_pos
            if precedes(heap[child_pos].prio, heap[pos].prio):
                self._sink(pos)

    def _sink(self, top=0):
        # "Sink-to-the-bottom-then-swim" algorithm (Floyd, 1964)
        # Tends to reduce the number of comparisons when inserting "heavy"
        # items at the top, e.g. during a heap pop. See heapq for more details.
        heap = self._heap
        position = self._position
        precedes = self._precedes
        endpos = len(heap)
        # Grab the top node
        pos = top
        node = heap[pos]
        # Sift up a chain of child nodes
        child_pos = 2 * pos + 1
        while child_pos < endpos:
            # Choose the smaller child.
            other_pos = child_pos + 1
            if other_pos < endpos and not precedes(
                heap[child_pos].prio, heap[other_pos].prio
            ):
                child_pos = other_pos
            child_node = heap[child_pos]
            # Move it up one level.
            heap[pos] = child_node
            position[child_node.key] = pos
            # Next level
            pos = child_pos
            child_pos = 2 * pos + 1
        # We are left with a "vacant" leaf. Put our node there and let it swim
        # until it reaches its new resting place.
        heap[pos] = node
        position[node.key] = pos
        self._swim(pos, top)

    def _swim(self, pos, top=0):
        heap = self._heap
        position = self._position
        precedes = self._precedes
        # Grab the node from its place
        node = heap[pos]
        # Sift parents down until we find a place where the node fits.
        while pos > top:
            parent_pos = (pos - 1) >> 1
            parent_node = heap[parent_pos]
            if precedes(node.prio, parent_node.prio):
                heap[pos] = parent_node
                position[parent_node.key] = pos
                pos = parent_pos
                continue
            break
        # Put node in its new place
        heap[pos] = node
        position[node.key] = pos


###########
# Aliases #
###########

PQDict = pqdict  # deprecated


def minpq(*args, **kwargs):
    return pqdict(dict(*args, **kwargs), precedes=lt)


def maxpq(*args, **kwargs):
    return pqdict(dict(*args, **kwargs), precedes=gt)


#############
# Functions #
#############


def nlargest(n, mapping, key=None):
    """
    Takes a mapping and returns the n keys associated with the largest values
    in descending order. If the mapping has fewer than n items, all its keys
    are returned.

    Equivalent to:
        ``next(zip(*heapq.nlargest(mapping.items(), key=lambda x: x[1])))``

    Parameters
    ----------
    n : int
        The number of keys associated with the largest values
        in descending order
    mapping : Mapping
        A mapping object
    key : callable, optional
        Optional priority key function to transform values into priority keys
        for sorting. By default, the values are not transformed.

    Returns
    -------
    list of up to n keys from the mapping

    """
    try:
        it = mapping.iteritems()
    except AttributeError:
        it = iter(mapping.items())
    pq = pqdict(key=key, precedes=lt)
    try:
        for i in range(n):
            pq.additem(*next(it))
    except StopIteration:
        pass
    try:
        while it:
            pq.pushpopitem(*next(it))
    except StopIteration:
        pass
    out = list(pq.popkeys())
    out.reverse()
    return out


def nsmallest(n, mapping, key=None):
    """
    Takes a mapping and returns the n keys associated with the smallest values
    in ascending order. If the mapping has fewer than n items, all its keys are
    returned.

    Equivalent to:
        ``next(zip(*heapq.nsmallest(mapping.items(), key=lambda x: x[1])))``

    Parameters
    ----------
    n : int
        The number of keys associated with the smallest values
        in ascending order
    mapping : Mapping
        A mapping object
    key : callable, optional
        Optional priority key function to transform values into priority keys
        for sorting. By default, the values are not transformed.

    Returns
    -------
    list of up to n keys from the mapping

    """
    try:
        it = mapping.iteritems()
    except AttributeError:
        it = iter(mapping.items())
    pq = pqdict(key=key, precedes=gt)
    try:
        for i in range(n):
            pq.additem(*next(it))
    except StopIteration:
        pass
    try:
        while it:
            pq.pushpopitem(*next(it))
    except StopIteration:
        pass
    out = list(pq.popkeys())
    out.reverse()
    return out