�&ǐk�@'bJ�h�ۊL'}T� :��'2�Z#$��n�a��� �>a��`��_3d�Qpt�/�P -��#5�,�M��� �pA:©�q�����NW��ډ�A���� �9nʺج���� �TSM��{J6?7��r�@�\����D��� �׶���s�f�TJj?"��D��`?��̒� b�#�%�C*v�$�{�$����5Ծ�F�s��y�e/8��h-�f�̰&(����Gj�L:U� 2�� ����v�_k����Y��gp,�k�WF�R������_C�R��N@���R�@�ߔ?A�w9���F("iNa-S���Q�o�3tDMLh*�#4k�T/iQ��Y*�G��m����)��8�hBm/�I�,g�ﯖ���Z��}�Cz�q@´��d.����L�ŕ�,��1�Z�܌�: ̪���F+J-'��c�tvJ8��]Q-��b��y �6;*J`r_�d ��'�G ~p��)'�C,�%F��E(��2�k�����lР�z�!�=t ��_�0��f7��� ;�p�|�U �% heap[0]: item = heapreplace(heap, item) """ returnitem = heap[0] # raises appropriate IndexError if heap is empty heap[0] = item _siftup(heap, 0) return returnitem def heappushpop(heap, item): """Fast version of a heappush followed by a heappop.""" if heap and heap[0] < item: item, heap[0] = heap[0], item _siftup(heap, 0) return item def heapify(x): """Transform list into a heap, in-place, in O(len(x)) time.""" n = len(x) # Transform bottom-up. The largest index there's any point to looking at # is the largest with a child index in-range, so must have 2*i + 1 < n, # or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so # j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1. for i in reversed(range(n//2)): _siftup(x, i) def _heappop_max(heap): """Maxheap version of a heappop.""" lastelt = heap.pop() # raises appropriate IndexError if heap is empty if heap: returnitem = heap[0] heap[0] = lastelt _siftup_max(heap, 0) return returnitem return lastelt def _heapreplace_max(heap, item): """Maxheap version of a heappop followed by a heappush.""" returnitem = heap[0] # raises appropriate IndexError if heap is empty heap[0] = item _siftup_max(heap, 0) return returnitem def _heapify_max(x): """Transform list into a maxheap, in-place, in O(len(x)) time.""" n = len(x) for i in reversed(range(n//2)): _siftup_max(x, i) # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos # is the index of a leaf with a possibly out-of-order value. Restore the # heap invariant. def _siftdown(heap, startpos, pos): newitem = heap[pos] # Follow the path to the root, moving parents down until finding a place # newitem fits. while pos > startpos: parentpos = (pos - 1) >> 1 parent = heap[parentpos] if newitem < parent: heap[pos] = parent pos = parentpos continue break heap[pos] = newitem # The child indices of heap index pos are already heaps, and we want to make # a heap at index pos too. We do this by bubbling the smaller child of # pos up (and so on with that child's children, etc) until hitting a leaf, # then using _siftdown to move the oddball originally at index pos into place. # # We *could* break out of the loop as soon as we find a pos where newitem <= # both its children, but turns out that's not a good idea, and despite that # many books write the algorithm that way. During a heap pop, the last array # element is sifted in, and that tends to be large, so that comparing it # against values starting from the root usually doesn't pay (= usually doesn't # get us out of the loop early). See Knuth, Volume 3, where this is # explained and quantified in an exercise. # # Cutting the # of comparisons is important, since these routines have no # way to extract "the priority" from an array element, so that intelligence # is likely to be hiding in custom comparison methods, or in array elements # storing (priority, record) tuples. Comparisons are thus potentially # expensive. # # On random arrays of length 1000, making this change cut the number of # comparisons made by heapify() a little, and those made by exhaustive # heappop() a lot, in accord with theory. Here are typical results from 3 # runs (3 just to demonstrate how small the variance is): # # Compares needed by heapify Compares needed by 1000 heappops # -------------------------- -------------------------------- # 1837 cut to 1663 14996 cut to 8680 # 1855 cut to 1659 14966 cut to 8678 # 1847 cut to 1660 15024 cut to 8703 # # Building the heap by using heappush() 1000 times instead required # 2198, 2148, and 2219 compares: heapify() is more efficient, when # you can use it. # # The total compares needed by list.sort() on the same lists were 8627, # 8627, and 8632 (this should be compared to the sum of heapify() and # heappop() compares): list.sort() is (unsurprisingly!) more efficient # for sorting. def _siftup(heap, pos): endpos = len(heap) startpos = pos newitem = heap[pos] # Bubble up the smaller child until hitting a leaf. childpos = 2*pos + 1 # leftmost child position while childpos < endpos: # Set childpos to index of smaller child. rightpos = childpos + 1 if rightpos < endpos and not heap[childpos] < heap[rightpos]: childpos = rightpos # Move the smaller child up. heap[pos] = heap[childpos] pos = childpos childpos = 2*pos + 1 # The leaf at pos is empty now. Put newitem there, and bubble it up # to its final resting place (by sifting its parents down). heap[pos] = newitem _siftdown(heap, startpos, pos) def _siftdown_max(heap, startpos, pos): 'Maxheap variant of _siftdown' newitem = heap[pos] # Follow the path to the root, moving parents down until finding a place # newitem fits. while pos > startpos: parentpos = (pos - 1) >> 1 parent = heap[parentpos] if parent < newitem: heap[pos] = parent pos = parentpos continue break heap[pos] = newitem def _siftup_max(heap, pos): 'Maxheap variant of _siftup' endpos = len(heap) startpos = pos newitem = heap[pos] # Bubble up the larger child until hitting a leaf. childpos = 2*pos + 1 # leftmost child position while childpos < endpos: # Set childpos to index of larger child. rightpos = childpos + 1 if rightpos < endpos and not heap[rightpos] < heap[childpos]: childpos = rightpos # Move the larger child up. heap[pos] = heap[childpos] pos = childpos childpos = 2*pos + 1 # The leaf at pos is empty now. Put newitem there, and bubble it up # to its final resting place (by sifting its parents down). heap[pos] = newitem _siftdown_max(heap, startpos, pos) def merge(*iterables, key=None, reverse=False): '''Merge multiple sorted inputs into a single sorted output. Similar to sorted(itertools.chain(*iterables)) but returns a generator, does not pull the data into memory all at once, and assumes that each of the input streams is already sorted (smallest to largest). >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25])) [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25] If *key* is not None, applies a key function to each element to determine its sort order. >>> list(merge(['dog', 'horse'], ['cat', 'fish', 'kangaroo'], key=len)) ['dog', 'cat', 'fish', 'horse', 'kangaroo'] ''' h = [] h_append = h.append if reverse: _heapify = _heapify_max _heappop = _heappop_max _heapreplace = _heapreplace_max direction = -1 else: _heapify = heapify _heappop = heappop _heapreplace = heapreplace direction = 1 if key is None: for order, it in enumerate(map(iter, iterables)): try: next = it.__next__ h_append([next(), order * direction, next]) except StopIteration: pass _heapify(h) while len(h) > 1: try: while True: value, order, next = s = h[0] yield value s[0] = next() # raises StopIteration when exhausted _heapreplace(h, s) # restore heap condition except StopIteration: _heappop(h) # remove empty iterator if h: # fast case when only a single iterator remains value, order, next = h[0] yield value yield from next.__self__ return for order, it in enumerate(map(iter, iterables)): try: next = it.__next__ value = next() h_append([key(value), order * direction, value, next]) except StopIteration: pass _heapify(h) while len(h) > 1: try: while True: key_value, order, value, next = s = h[0] yield value value = next() s[0] = key(value) s[2] = value _heapreplace(h, s) except StopIteration: _heappop(h) if h: key_value, order, value, next = h[0] yield value yield from next.__self__ # Algorithm notes for nlargest() and nsmallest() # ============================================== # # Make a single pass over the data while keeping the k most extreme values # in a heap. Memory consumption is limited to keeping k values in a list. # # Measured performance for random inputs: # # number of comparisons # n inputs k-extreme values (average of 5 trials) % more than min() # ------------- ---------------- --------------------- ----------------- # 1,000 100 3,317 231.7% # 10,000 100 14,046 40.5% # 100,000 100 105,749 5.7% # 1,000,000 100 1,007,751 0.8% # 10,000,000 100 10,009,401 0.1% # # Theoretical number of comparisons for k smallest of n random inputs: # # Step Comparisons Action # ---- -------------------------- --------------------------- # 1 1.66 * k heapify the first k-inputs # 2 n - k compare remaining elements to top of heap # 3 k * (1 + lg2(k)) * ln(n/k) replace the topmost value on the heap # 4 k * lg2(k) - (k/2) final sort of the k most extreme values # # Combining and simplifying for a rough estimate gives: # # comparisons = n + k * (log(k, 2) * log(n/k) + log(k, 2) + log(n/k)) # # Computing the number of comparisons for step 3: # ----------------------------------------------- # * For the i-th new value from the iterable, the probability of being in the # k most extreme values is k/i. For example, the probability of the 101st # value seen being in the 100 most extreme values is 100/101. # * If the value is a new extreme value, the cost of inserting it into the # heap is 1 + log(k, 2). # * The probability times the cost gives: # (k/i) * (1 + log(k, 2)) # * Summing across the remaining n-k elements gives: # sum((k/i) * (1 + log(k, 2)) for i in range(k+1, n+1)) # * This reduces to: # (H(n) - H(k)) * k * (1 + log(k, 2)) # * Where H(n) is the n-th harmonic number estimated by: # gamma = 0.5772156649 # H(n) = log(n, e) + gamma + 1 / (2 * n) # http://en.wikipedia.org/wiki/Harmonic_series_(mathematics)#Rate_of_divergence # * Substituting the H(n) formula: # comparisons = k * (1 + log(k, 2)) * (log(n/k, e) + (1/n - 1/k) / 2) # # Worst-case for step 3: # ---------------------- # In the worst case, the input data is reversed sorted so that every new element # must be inserted in the heap: # # comparisons = 1.66 * k + log(k, 2) * (n - k) # # Alternative Algorithms # ---------------------- # Other algorithms were not used because they: # 1) Took much more auxiliary memory, # 2) Made multiple passes over the data. # 3) Made more comparisons in common cases (small k, large n, semi-random input). # See the more detailed comparison of approach at: # http://code.activestate.com/recipes/577573-compare-algorithms-for-heapqsmallest def nsmallest(n, iterable, key=None): """Find the n smallest elements in a dataset. Equivalent to: sorted(iterable, key=key)[:n] """ # Short-cut for n==1 is to use min() if n == 1: it = iter(iterable) sentinel = object() if key is None: result = min(it, default=sentinel) else: result = min(it, default=sentinel, key=key) return [] if result is sentinel else [result] # When n>=size, it's faster to use sorted() try: size = len(iterable) except (TypeError, AttributeError): pass else: if n >= size: return sorted(iterable, key=key)[:n] # When key is none, use simpler decoration if key is None: it = iter(iterable) # put the range(n) first so that zip() doesn't # consume one too many elements from the iterator result = [(elem, i) for i, elem in zip(range(n), it)] if not result: return result _heapify_max(result) top = result[0][0] order = n _heapreplace = _heapreplace_max for elem in it: if elem < top: _heapreplace(result, (elem, order)) top = result[0][0] order += 1 result.sort() return [r[0] for r in result] # General case, slowest method it = iter(iterable) result = [(key(elem), i, elem) for i, elem in zip(range(n), it)] if not result: return result _heapify_max(result) top = result[0][0] order = n _heapreplace = _heapreplace_max for elem in it: k = key(elem) if k < top: _heapreplace(result, (k, order, elem)) top = result[0][0] order += 1 result.sort() return [r[2] for r in result] def nlargest(n, iterable, key=None): """Find the n largest elements in a dataset. Equivalent to: sorted(iterable, key=key, reverse=True)[:n] """ # Short-cut for n==1 is to use max() if n == 1: it = iter(iterable) sentinel = object() if key is None: result = max(it, default=sentinel) else: result = max(it, default=sentinel, key=key) return [] if result is sentinel else [result] # When n>=size, it's faster to use sorted() try: size = len(iterable) except (TypeError, AttributeError): pass else: if n >= size: return sorted(iterable, key=key, reverse=True)[:n] # When key is none, use simpler decoration if key is None: it = iter(iterable) result = [(elem, i) for i, elem in zip(range(0, -n, -1), it)] if not result: return result heapify(result) top = result[0][0] order = -n _heapreplace = heapreplace for elem in it: if top < elem: _heapreplace(result, (elem, order)) top = result[0][0] order -= 1 result.sort(reverse=True) return [r[0] for r in result] # General case, slowest method it = iter(iterable) result = [(key(elem), i, elem) for i, elem in zip(range(0, -n, -1), it)] if not result: return result heapify(result) top = result[0][0] order = -n _heapreplace = heapreplace for elem in it: k = key(elem) if top < k: _heapreplace(result, (k, order, elem)) top = result[0][0] order -= 1 result.sort(reverse=True) return [r[2] for r in result] # If available, use C implementation try: from _heapq import * except ImportError: pass try: from _heapq import _heapreplace_max except ImportError: pass try: from _heapq import _heapify_max except ImportError: pass try: from _heapq import _heappop_max except ImportError: pass if __name__ == "__main__": import doctest print(doctest.testmod())