Source code for icarus.models.cache.policies

"""Cache replacement policies implementations

This module contains the implementations of all the cache replacement policies
provided by Icarus.
"""
from __future__ import division
from collections import deque, defaultdict
import random
import abc
import copy

import numpy as np

from icarus.util import inheritdoc, apportionment
from icarus.registry import register_cache_policy


__all__ = [
        'LinkedSet',
        'Cache',
        'NullCache',
        'BeladyMinCache',
        'LruCache',
        'SegmentedLruCache',
        'InCacheLfuCache',
        'PerfectLfuCache',
        'FifoCache',
        'ClimbCache',
        'RandEvictionCache',
        'insert_after_k_hits_cache',
        'rand_insert_cache',
        'keyval_cache',
        'ttl_cache',
           ]


[docs]class LinkedSet(object): """A doubly-linked set, i.e., a set whose entries are ordered and stored as a doubly-linked list. This data structure is designed to efficiently implement a number of cache replacement policies such as LRU and derivatives such as Segmented LRU. It provides O(1) time complexity for the following operations: searching, remove from any position, move to top, move to bottom, insert after or before a given item. """ class _Node(object): """Class implementing a node of the linked list""" def __init__(self, val, up=None, down=None): """Constructor Parameters ---------- val : any hashable type The value stored by the node up : any hashable type, optional The node above in the list down : any hashable type, optional The node below in the list """ self.val = val self.up = up self.down = down def __init__(self, iterable=[]): """Constructor Parameters ---------- itaerable : iterable type An iterable type to inizialize the data structure. It must contain only one instance of each element """ self._top = None self._bottom = None self._map = {} if iterable: if len(set(iterable)) < len(iterable): raise ValueError('The iterable parameter contains repeated ' 'elements') for i in iterable: self.append_bottom(i) def __len__(self): """Return the number of elements in the linked set Returns ------- len : int The length of the set """ return len(self._map) def __iter__(self): """Return an iterator over the set Returns ------- reversed : iterator An iterator over the set """ cur = self._top while cur: yield cur.val cur = cur.down def __reversed__(self): """Return a reverse iterator over the set Returns ------- reversed : iterator A reverse iterator over the set """ cur = self._bottom while cur: yield cur.val cur = cur.up def __str__(self): """Return a string representation of the set Returns ------- str : str A string representation of the set """ return self.__class__.__name__ + "([" + "".join("%s, " % str(i) for i in self)[:-2] + "])" def __contains__(self, k): """Return whether the set contains a given item Parameters ---------- k : any hashable type The item to search Returns ------- contains : bool *True* if the set contains the item, *False* otherwise """ return k in self._map @property def top(self): """Return the item at the top of the set Returns ------- top : any hashable type The item at the top or *None* if the set is empty """ return self._top.val if self._top is not None else None @property def bottom(self): """Return the item at the bottom of the set Returns ------- bottom : any hashable type The item at the bottom or *None* if the set is empty """ return self._bottom.val if self._bottom is not None else None
[docs] def pop_top(self): """Pop the item at the top of the set Returns ------- top : any hashable type The item at the top or *None* if the set is empty """ if self._top == None: # No elements to pop return None k = self._top.val if self._top == self._bottom: # One single element self._bottom = self._top = None else: self._top.down.up = None self._top = self._top.down self._map.pop(k) return k
[docs] def pop_bottom(self): """Pop the item at the bottom of the set Returns ------- bottom : any hashable type The item at the bottom or *None* if the set is empty """ if self._bottom == None: # No elements to pop return None k = self._bottom.val if self._bottom == self._top: # One single element self._top = self._bottom = None else: self._bottom.up.down = None self._bottom = self._bottom.up self._map.pop(k) return k
[docs] def append_top(self, k): """Append an item at the top of the set Parameters ---------- k : any hashable type The item to append """ if k in self._map: raise KeyError('The item %s is already in the set' % str(k)) n = self._Node(val=k, up=None, down=self._top) if self._top == self._bottom == None: self._bottom = n else: self._top.up = n self._top = n self._map[k] = n
[docs] def append_bottom(self, k): """Append an item at the bottom of the set Parameters ---------- k : any hashable type The item to append """ if k in self._map: raise KeyError('The item %s is already in the set' % str(k)) n = self._Node(val=k, up=self._bottom, down=None) if self._top == self._bottom == None: self._top = n else: self._bottom.down = n self._bottom = n self._map[k] = n
[docs] def move_up(self, k): """Move a specified item one position up in the set Parameters ---------- k : any hashable type The item to move up """ if k not in self._map: raise KeyError('Item %s not in the set' % str(k)) n = self._map[k] if n.up == None: # already on top or there is only one element return if n.down == None: # bottom but not top: there are at least two elements self._bottom = n.up else: n.down.up = n.up n.up.down = n.down new_up = n.up.up new_down = n.up if new_up: new_up.down = n else: self._top = n new_down.up = n n.up = new_up n.down = new_down
[docs] def move_down(self, k): """Move a specified item one position down in the set Parameters ---------- k : any hashable type The item to move down """ if k not in self._map: raise KeyError('Item %s not in the set' % str(k)) n = self._map[k] if n.down == None: # already at the bottom or there is only one element return if n.up == None: self._top = n.down else: n.up.down = n.down n.down.up = n.up new_down = n.down.down new_up = n.down new_up.down = n if new_down != None: new_down.up = n else: self._bottom = n n.up = new_up n.down = new_down
[docs] def move_to_top(self, k): """Move a specified item to the top of the set Parameters ---------- k : any hashable type The item to move to the top """ if k not in self._map: raise KeyError('Item %s not in the set' % str(k)) n = self._map[k] if n.up == None: # already on top or there is only one element return if n.down == None: # at the bottom, there are at least two elements self._bottom = n.up else: n.down.up = n.up n.up.down = n.down # Move to top n.up = None n.down = self._top self._top.up = n self._top = n
[docs] def move_to_bottom(self, k): """Move a specified item to the bottom of the set Parameters ---------- k : any hashable type The item to move to the bottom """ if k not in self._map: raise KeyError('Item %s not in the set' % str(k)) n = self._map[k] if n.down == None: # already at bottom or there is only one element return if n.up == None: # at the top, there are at least two elements self._top = n.down else: n.up.down = n.down n.down.up = n.up # Move to top n.down = None n.up = self._bottom self._bottom.down = n self._bottom = n
[docs] def insert_above(self, i, k): """Insert an item one position above a given item already in the set Parameters ---------- i : any hashable type The item of the set above which the new item is inserted k : any hashable type The item to insert """ if k in self._map: raise KeyError('Item %s already in the set' % str(k)) if i not in self._map: raise KeyError('Item %s not in the set' % str(i)) n = self._map[i] if n.up == None: # Insert on top return self.append_top(k) # Now I know I am inserting between two actual elements m = self._Node(k, up=n.up, down=n) n.up.down = m n.up = m self._map[k] = m
[docs] def insert_below(self, i, k): """Insert an item one position below a given item already in the set Parameters ---------- i : any hashable type The item of the set below which the new item is inserted k : any hashable type The item to insert """ if k in self._map: raise KeyError('Item %s already in the set' % str(k)) if i not in self._map: raise KeyError('Item %s not in the set' % str(i)) n = self._map[i] if n.down == None: # Insert on top return self.append_bottom(k) # Now I know I am inserting between two actual elements m = self._Node(k, up=n, down=n.down) n.down.up = m n.down = m self._map[k] = m
[docs] def index(self, k): """Return index of a given element. This operation has a O(n) time complexity, with n being the size of the set. Parameters ---------- k : any hashable type The item whose index is queried Returns ------- index : int The index of the item """ if not k in self._map: raise KeyError('The item %s is not in the set' % str(k)) index = 0 curr = self._top while curr: if curr.val == k: return index curr = curr.down index += 1 else: raise KeyError('It seems that the item %s is not in the set, ' 'but you should never see this message. ' 'There is something wrong with the code. ' 'Debug it or report it to the developers' % str(k))
[docs] def remove(self, k): """Remove an item from the set Parameters ---------- k : any hashable type The item to remove """ if k not in self._map: raise KeyError('Item %s not in the set' % str(k)) n = self._map[k] if self._bottom == n: # I am trying to remove the last node self._bottom = n.up else: n.down.up = n.up if self._top == n: # I am trying to remove the top node self._top = n.down else: n.up.down = n.down self._map.pop(k)
[docs] def clear(self): """Empty the set""" self._top = None self._bottom = None self._map.clear()
[docs]class Cache(object): """Base implementation of a cache object""" @abc.abstractmethod def __init__(self, maxlen, *args, **kwargs): """Constructor Parameters ---------- maxlen : int The maximum number of items the cache can store """ raise NotImplementedError('This method is not implemented') @abc.abstractmethod def __len__(self): """Return the number of items currently stored in the cache Returns ------- len : int The number of items currently in the cache """ raise NotImplementedError('This method is not implemented') @property @abc.abstractmethod def maxlen(self): """Return the maximum number of items the cache can store Return ------ maxlen : int The maximum number of items the cache can store """ raise NotImplementedError('This method is not implemented')
[docs] @abc.abstractmethod def dump(self): """Return a dump of all the elements currently in the cache possibly sorted according to the eviction policy. Returns ------- cache_dump : list The list of all items currently stored in the cache """ raise NotImplementedError('This method is not implemented')
[docs] def do(self, op, k, *args, **kwargs): """Utility method that performs a specified operation on a given item. This method allows to perform one of the different operations on an item: * GET: Retrieve an item * PUT: Insert an item * UPDATE: Update the value associated to an item * DELETE: Remove an item Parameters ---------- op : string The operation to execute: GET | PUT | UPDATE | DELETE k : any hashable type The item looked up in the cache Returns ------- res : bool Boolean value being *True* if the operation succeeded or *False* otherwise. """ res = { 'GET': self.get, 'PUT': self.put, 'UPDATE': self.put, 'DELETE': self.remove }[op](k, *args, **kwargs) return res if res is not None else False
[docs] @abc.abstractmethod def has(self, k, *args, **kwargs): """Check if an item is in the cache without changing the internal state of the caching object. Parameters ---------- k : any hashable type The item looked up in the cache Returns ------- v : bool Boolean value being *True* if the requested item is in the cache or *False* otherwise """ raise NotImplementedError('This method is not implemented')
[docs] @abc.abstractmethod def get(self, k, *args, **kwargs): """Retrieves an item from the cache. Differently from *has(k)*, calling this method may change the internal state of the caching object depending on the specific cache implementation. Parameters ---------- k : any hashable type The item looked up in the cache Returns ------- v : bool Boolean value being *True* if the requested item is in the cache or *False* otherwise """ raise NotImplementedError('This method is not implemented')
[docs] @abc.abstractmethod def put(self, k, *args, **kwargs): """Insert an item in the cache if not already inserted. If the element is already present in the cache, it will not be inserted again but the internal state of the cache object may change. Parameters ---------- k : any hashable type The item to be inserted Returns ------- evicted : any hashable type The evicted object or *None* if no contents were evicted. """ raise NotImplementedError('This method is not implemented')
[docs] @abc.abstractmethod def remove(self, k, *args, **kwargs): """Remove an item from the cache, if present. Parameters ---------- k : any hashable type The item to be inserted Returns ------- removed : bool *True* if the content was in the cache, *False* if it was not. """ raise NotImplementedError('This method is not implemented')
[docs] @abc.abstractmethod def clear(self): """Empty the cache """ raise NotImplementedError('This method is not implemented')
[docs]@register_cache_policy('NULL') class NullCache(Cache): """Implementation of a null cache. This is a dummy cache which never stores anything. It is functionally identical to a cache with max size equal to 0. """ def __init__(self, maxlen=0, *args, **kwargs): """ Constructor Parameters ---------- maxlen : int, optional The max length of the cache. This parameters is always ignored """ pass def __len__(self): """Return the number of items currently stored in the cache. Since this is a dummy cache implementation, it is always empty Returns ------- len : int The number of items currently in the cache. It is always 0. """ return 0 @property def maxlen(self): """Return the maximum number of items the cache can store. Since this is a dummy cache implementation, this value is 0. Returns ------- maxlen : int The maximum number of items the cache can store. It is always 0 """ return 0
[docs] def dump(self): """Return a list of all the elements currently in the cache. In this case it is always an empty list. Returns ------- cache_dump : list An empty list. """ return []
[docs] def has(self, k, *args, **kwargs): """Check if an item is in the cache without changing the internal state of the caching object. Parameters ---------- k : any hashable type The item looked up in the cache Returns ------- v : bool Boolean value being *True* if the requested item is in the cache or *False* otherwise. It always returns *False* """ return False
[docs] def get(self, k, *args, **kwargs): """Retrieves an item from the cache. Parameters ---------- k : any hashable type The item looked up in the cache Returns ------- v : bool Boolean value being *True* if the requested item is in the cache or *False* otherwise. It always returns False """ return False
[docs] def put(self, k, *args, **kwargs): """Insert an item in the cache if not already inserted. Parameters ---------- k : any hashable type The item to be inserted Returns ------- evicted : any hashable type The evicted object or *None* if no contents were evicted. """ return None
[docs] def remove(self, k, *args, **kwargs): """Remove a specified item from the cache. If the element is not present in the cache, no action is taken. Parameters ---------- k : any hashable type The item to be inserted Returns ------- removed : bool *True* if the content was in the cache, *False* if it was not. It always return *False* """ return False
[docs] @inheritdoc(Cache) def clear(self): pass
[docs]@register_cache_policy('MIN') class BeladyMinCache(Cache): """Belady's MIN cache replacement policy The Belady's MIN policy is the provably optimal cache replacement policy under arbitrary workload. Each time an item is inserted into a full cache, it evicts the item that will be requested next the latest. This policy is not implementable in practice because it requires knowledge of future requests, however it is very useful as a theoretical performance upper bound. """ @inheritdoc(Cache) def __init__(self, maxlen, trace, **kwargs): """Constructor Parameters ---------- maxlen : int The maximum number of items the cache can store trace : iterable Trace of requests that the cache will be subject to """ self._maxlen = int(maxlen) if self._maxlen <= 0: raise ValueError('maxlen must be positive') self._next = defaultdict(deque) for i, k in enumerate(trace): self._next[k].append(i) for k in self._next.values(): k.append(np.infty) self._cache = {} @inheritdoc(Cache) def __len__(self): return len(self._cache) @property @inheritdoc(Cache) def maxlen(self): return self._maxlen
[docs] @inheritdoc(Cache) def dump(self): return set(self._cache.keys())
[docs] @inheritdoc(Cache) def has(self, k, *args, **kwargs): return k in self._cache
[docs] @inheritdoc(Cache) def get(self, k, *args, **kwargs): self._next[k].popleft() return k in self._cache
[docs] def put(self, k, *args, **kwargs): if len(self) < self.maxlen: self._cache[k] = self._next[k] return None next_cache = max(self._cache, key=lambda k: self._cache[k][0]) if self._next[k][0] < self._next[next_cache][0]: self._cache.pop(next_cache) self._cache[k] = self._next[k] return next_cache else: return None
[docs] @inheritdoc(Cache) def remove(self, k, *args, **kwargs): if k not in self._cache: return False self._cache.pop(k) return True
[docs] @inheritdoc(Cache) def clear(self): self._cache.clear()
[docs]@register_cache_policy('LRU') class LruCache(Cache): """Least Recently Used (LRU) cache eviction policy. According to this policy, When a new item needs to inserted into the cache, it evicts the least recently requested one. This eviction policy is efficient for line speed operations because both search and replacement tasks can be performed in constant time (*O(1)*). This policy has been shown to perform well in the presence of temporal locality in the request pattern. However, its performance drops under the Independent Reference Model (IRM) assumption (i.e. the probability that an item is requested is not dependent on previous requests). """ @inheritdoc(Cache) def __init__(self, maxlen, **kwargs): self._cache = LinkedSet() self._maxlen = int(maxlen) if self._maxlen <= 0: raise ValueError('maxlen must be positive') @inheritdoc(Cache) def __len__(self): return len(self._cache) @property @inheritdoc(Cache) def maxlen(self): return self._maxlen
[docs] @inheritdoc(Cache) def dump(self): return list(iter(self._cache))
[docs] def position(self, k, *args, **kwargs): """Return the current position of an item in the cache. Position *0* refers to the head of cache (i.e. most recently used item), while position *maxlen - 1* refers to the tail of the cache (i.e. the least recently used item). This method does not change the internal state of the cache. Parameters ---------- k : any hashable type The item looked up in the cache Returns ------- position : int The current position of the item in the cache """ if not k in self._cache: raise ValueError('The item %s is not in the cache' % str(k)) return self._cache.index(k)
[docs] @inheritdoc(Cache) def has(self, k, *args, **kwargs): return k in self._cache
[docs] @inheritdoc(Cache) def get(self, k, *args, **kwargs): # search content over the list # if it has it push on top, otherwise return false if k not in self._cache: return False self._cache.move_to_top(k) return True
[docs] def put(self, k, *args, **kwargs): """Insert an item in the cache if not already inserted. If the element is already present in the cache, it will pushed to the top of the cache. Parameters ---------- k : any hashable type The item to be inserted Returns ------- evicted : any hashable type The evicted object or *None* if no contents were evicted. """ # if content in cache, push it on top, no eviction if k in self._cache: self._cache.move_to_top(k) return None # if content not in cache append it on top self._cache.append_top(k) return self._cache.pop_bottom() if len(self._cache) > self._maxlen else None
[docs] @inheritdoc(Cache) def remove(self, k, *args, **kwargs): if k not in self._cache: return False self._cache.remove(k) return True
[docs] @inheritdoc(Cache) def clear(self): self._cache.clear()
[docs]@register_cache_policy('SLRU') class SegmentedLruCache(Cache): """Segmented Least Recently Used (LRU) cache eviction policy. This policy divides the cache space into a number of segments of equal size each operating according to an LRU policy. When a new item is inserted to the cache, it is placed on the top entry of the bottom segment. Each subsequent hit promotes the item to the top entry of the segment above. When an item is evicted from a segment, it is demoted to the top entry of the segment immediately below. An item is evicted from the cache when it is evicted from the bottom segment. This policy can be viewed as a sort of combination between an LRU and LFU replacement policies as it makes eviction decisions based both frequency and recency of item reference. """ def __init__(self, maxlen, segments=2, alloc=None, *args, **kwargs): """Constructor Parameters ---------- maxlen : int The maximum number of items the cache can store segments : int The number of segments alloc : list List of floats, summing to 1. Indicates the fraction of overall caching space to be allocated to each segment. """ self._maxlen = int(maxlen) if self._maxlen <= 0: raise ValueError('maxlen must be positive') if not isinstance(segments, int) or segments <= 0 or segments > maxlen: raise ValueError('segments must be an integer and 0 < segments <= maxlen') if alloc: if len(alloc) != segments: raise ValueError('alloc must be an iterable with as many entries as segments') if np.abs(np.sum(alloc) - 1) > 0.001: raise ValueError('All alloc entries must sum up to 1') else: alloc = [1 / segments for _ in range(segments)] self._segment_maxlen = apportionment(maxlen, alloc) self._segment = [LinkedSet() for _ in range(segments)] # This map is a dictionary mapping each item in the cache with the # segment in which it is located. This is not strictly necessary to # locate an item as we could have used the map in each segment. # This design choice however speeds up processing at the cost of a # moderate increase in memory footprint. self._cache = {} @inheritdoc(Cache) def __len__(self): return len(self._cache) @property @inheritdoc(Cache) def maxlen(self): return self._maxlen
[docs] @inheritdoc(Cache) def has(self, k, *args, **kwargs): return k in self._cache
[docs] @inheritdoc(Cache) def get(self, k, *args, **kwargs): if k not in self._cache: return False seg = self._cache[k] if seg == 0: self._segment[seg].move_to_top(k) else: self._segment[seg].remove(k) self._segment[seg - 1].append_top(k) self._cache[k] = seg - 1 if len(self._segment[seg - 1]) > self._segment_maxlen[seg - 1]: demoted = self._segment[seg - 1].pop_bottom() self._segment[seg].append_top(demoted) self._cache[demoted] = seg return True
[docs] def put(self, k, *args, **kwargs): """Insert an item in the cache if not already inserted. If the element is already present in the cache, it will pushed to the top of the cache. Parameters ---------- k : any hashable type The item to be inserted Returns ------- evicted : any hashable type The evicted object or *None* if no contents were evicted. """ # if content in cache, promote it, no eviction if k in self._cache: seg = self._cache[k] if seg == 0: self._segment[seg].move_to_top(k) else: self._segment[seg].remove(k) self._segment[seg - 1].append_top(k) self._cache[k] = seg - 1 if len(self._segment[seg - 1]) > self._segment_maxlen[seg - 1]: demoted = self._segment[seg - 1].pop_bottom() self._segment[seg].append_top(demoted) self._cache[demoted] = seg return None # if content not in cache append on top of probatory segment and # possibly evict LRU item self._segment[-1].append_top(k) self._cache[k] = len(self._segment) - 1 if len(self._segment[-1]) > self._segment_maxlen[-1]: evicted = self._segment[-1].pop_bottom() self._cache.pop(evicted) return evicted
[docs] @inheritdoc(Cache) def remove(self, k, *args, **kwargs): if k not in self._cache: return False seg = self._cache.pop(k) self._segment[seg].remove(k) return True
[docs] def position(self, k, *args, **kwargs): """Return the current position of an item in the cache. Position *0* refers to the head of cache (i.e. most recently used item), while position *maxlen - 1* refers to the tail of the cache (i.e. the least recently used item). This method does not change the internal state of the cache. Parameters ---------- k : any hashable type The item looked up in the cache Returns ------- position : int The current position of the item in the cache """ if not k in self._cache: raise ValueError('The item %s is not in the cache' % str(k)) seg = self._cache[k] position = self._segment[seg].index(k) return sum(len(self._segment[i]) for i in range(seg)) + position
[docs] @inheritdoc(Cache) def dump(self, serialized=True): dump = list(list(iter(s)) for s in self._segment) return sum(dump, []) if serialized else dump
[docs] @inheritdoc(Cache) def clear(self): self._cache.clear() for s in self._segment: s.clear()
[docs]@register_cache_policy('IN_CACHE_LFU') class InCacheLfuCache(Cache): """In-cache Least Frequently Used (LFU) cache implementation The LFU replacement policy keeps a counter associated each item. Such counters are increased when the associated item is requested. Upon insertion of a new item, the cache evicts the one which was requested the least times in the past, i.e. the one whose associated value has the smallest value. This is an implementation of an In-Cache-LFU, i.e. a cache that keeps counters for items only as long as they are in cache and resets the counter of an item when it is evicted. This is different from a Perfect-LFU policy in which a counter is maintained also when the content is evicted. In-cache LFU performs better than LRU under IRM demands. However, its implementation is computationally expensive since it cannot be implemented in such a way that both search and replacement tasks can be executed in constant time. This makes it particularly unfit for large caches and line speed operations. """ @inheritdoc(Cache) def __init__(self, maxlen, *args, **kwargs): self._cache = {} self.t = 0 self._maxlen = int(maxlen) if self._maxlen <= 0: raise ValueError('maxlen must be positive') @inheritdoc(Cache) def __len__(self): return len(self._cache) @property @inheritdoc(Cache) def maxlen(self): return self._maxlen
[docs] @inheritdoc(Cache) def dump(self): return sorted(self._cache, key=lambda x: self._cache[x], reverse=True)
[docs] @inheritdoc(Cache) def has(self, k, *args, **kwargs): return k in self._cache
[docs] @inheritdoc(Cache) def get(self, k, *args, **kwargs): if self.has(k): freq, t = self._cache[k] self._cache[k] = freq + 1, t return True else: return False
[docs] @inheritdoc(Cache) def put(self, k, *args, **kwargs): if not self.has(k): self.t += 1 self._cache[k] = (1, self.t) if len(self._cache) > self._maxlen: evicted = min(self._cache, key=lambda x: self._cache[x]) self._cache.pop(evicted) return evicted return None
[docs] @inheritdoc(Cache) def remove(self, k, *args, **kwargs): if k in self._cache: self._cache.pop(k) return True else: return False
[docs] @inheritdoc(Cache) def clear(self): self._cache.clear()
[docs]@register_cache_policy('PERFECT_LFU') class PerfectLfuCache(Cache): """Perfect Least Frequently Used (LFU) cache implementation The LFU replacement policy keeps a counter associated each item. Such counters are increased when the associated item is requested. Upon insertion of a new item, the cache evicts the one which was requested the least times in the past, i.e. the one whose associated value has the smallest value. This is an implementation of a Perfect-LFU, i.e. a cache that keeps counters for every item, even for those not in the cache. In contrast to LRU, Perfect-LFU has been shown to perform optimally under IRM demands. However, its implementation is computationally expensive since it cannot be implemented in such a way that both search and replacement tasks can be executed in constant time. This makes it particularly unfit for large caches and line speed operations. """ @inheritdoc(Cache) def __init__(self, maxlen, *args, **kwargs): # Dict storing counter for all contents, not only those in cache self._counter = {} # Set storing only items currently in cache self._cache = set() self.t = 0 self._maxlen = int(maxlen) if self._maxlen <= 0: raise ValueError('maxlen must be positive') @inheritdoc(Cache) def __len__(self): return len(self._cache) @property @inheritdoc(Cache) def maxlen(self): return self._maxlen
[docs] @inheritdoc(Cache) def dump(self): return sorted(self._cache, key=lambda x: self._counter[x], reverse=True)
[docs] @inheritdoc(Cache) def has(self, k, *args, **kwargs): return k in self._cache
[docs] @inheritdoc(Cache) def get(self, k, *args, **kwargs): self.t += 1 if k in self._counter: freq, t = self._counter[k] self._counter[k] = freq + 1, t else: self._counter[k] = 1, self.t if self.has(k): return True else: return False
[docs] @inheritdoc(Cache) def put(self, k, *args, **kwargs): if not self.has(k): if k in self._counter: freq, t = self._counter[k] self._counter[k] = (freq + 1, t) else: # If I always call a get before a put, this line should never # be executed self._counter[k] = (1, self.t) self._cache.add(k) if len(self._cache) > self._maxlen: evicted = min(self._cache, key=lambda x: self._counter[x]) self._cache.remove(evicted) return evicted return None
[docs] @inheritdoc(Cache) def remove(self, k, *args, **kwargs): if k in self._cache: self._cache.pop(k) return True else: return False
[docs] @inheritdoc(Cache) def clear(self): self._cache.clear() self._counter.clear()
[docs]@register_cache_policy('FIFO') class FifoCache(Cache): """First In First Out (FIFO) cache implementation. According to the FIFO policy, when a new item is inserted, the evicted item is the first one inserted in the cache. The behavior of this policy differs from LRU only when an item already present in the cache is requested. In fact, while in LRU this item would be pushed to the top of the cache, in FIFO no movement is performed. The FIFO policy has a slightly simpler implementation in comparison to the LRU policy but yields worse performance. """ @inheritdoc(Cache) def __init__(self, maxlen, *args, **kwargs): self._cache = set() self._maxlen = int(maxlen) self._d = deque() if self._maxlen <= 0: raise ValueError('maxlen must be positive') @inheritdoc(Cache) def __len__(self): return len(self._cache) @property @inheritdoc(Cache) def maxlen(self): return self._maxlen
[docs] @inheritdoc(Cache) def dump(self): return list(self._d)
[docs] @inheritdoc(Cache) def has(self, k, *args, **kwargs): return k in self._cache
[docs] def position(self, k, *args, **kwargs): """Return the current position of an item in the cache. Position *0* refers to the head of cache (i.e. most recently inserted item), while position *maxlen - 1* refers to the tail of the cache (i.e. the least recently inserted item). This method does not change the internal state of the cache. Parameters ---------- k : any hashable type The item looked up in the cache Returns ------- position : int The current position of the item in the cache """ i = 0 for c in self._d: if c == k: return i i += 1 raise ValueError('The item %s is not in the cache' % str(k))
[docs] @inheritdoc(Cache) def get(self, k, *args, **kwargs): return self.has(k)
[docs] @inheritdoc(Cache) def put(self, k, *args, **kwargs): evicted = None if not self.has(k): self._cache.add(k) self._d.appendleft(k) if len(self._cache) > self.maxlen: evicted = self._d.pop() self._cache.remove(evicted) return evicted
[docs] @inheritdoc(Cache) def remove(self, k, *args, **kwargs): if k in self._cache: self._cache.remove(k) self._d.remove(k) return True else: return False
[docs] @inheritdoc(Cache) def clear(self): self._cache.clear() self._d.clear()
[docs]@register_cache_policy('CLIMB') class ClimbCache(Cache): """CLIMB cache implementation According to this policy, items are organized in a list. When an item in the cache is requested it is moved one position up the list; if already on top, nothing is done. When a new item is inserted, it replaces the one at the bottom of the list. """ @inheritdoc(Cache) def __init__(self, maxlen, *args, **kwargs): self._cache = LinkedSet() self._maxlen = int(maxlen) if self._maxlen <= 0: raise ValueError('maxlen must be positive') @inheritdoc(Cache) def __len__(self): return len(self._cache) @property @inheritdoc(Cache) def maxlen(self): return self._maxlen
[docs] @inheritdoc(Cache) def dump(self): return list(iter(self._cache))
[docs] def position(self, k, *args, **kwargs): """Return the current position of an item in the cache. Position *0* refers to the head of cache, while position *maxlen - 1* refers to the tail of the cache. This method does not change the internal state of the cache. Parameters ---------- k : any hashable type The item looked up in the cache Returns ------- position : int The current position of the item in the cache """ if not k in self._cache: raise ValueError('The item %s is not in the cache' % str(k)) return self._cache.index(k)
[docs] @inheritdoc(Cache) def has(self, k, *args, **kwargs): return k in self._cache
[docs] @inheritdoc(Cache) def get(self, k, *args, **kwargs): # search content over the list # if it has it move it one position up, otherwise return false if k not in self._cache: return False self._cache.move_up(k) return True
[docs] def put(self, k, *args, **kwargs): """Insert an item in the cache if not already inserted. If the element is already present in the cache, it will pushed one position up. Parameters ---------- k : any hashable type The item to be inserted Returns ------- evicted : any hashable type The evicted object or *None* if no contents were evicted. """ # if content in cache, move one position up, no eviction if k in self._cache: self._cache.move_up(k) return None # Note: I am not sure of the implementation in the case of cache not # yet full. In this implementation I am inserting an item to the bottom # of the cache when the cache is not full, but I am not sure if I # should insert the element on top in this case. I could not find any # reference to this in literature. Anyway, I think this should not have # an impact on steady state performance if len(self._cache) == self._maxlen: evicted = self._cache.pop_bottom() else: evicted = None self._cache.append_bottom(k) return evicted
[docs] @inheritdoc(Cache) def remove(self, k, *args, **kwargs): if k not in self._cache: return False self._cache.remove(k) return True
[docs] @inheritdoc(Cache) def clear(self): self._cache.clear()
[docs]@register_cache_policy('RAND') class RandEvictionCache(Cache): """Random eviction cache implementation. This class implements a cache replacement policies which randomly select an item to evict when the cache is full. It generally yields poor performance in terms of cache hits, especially with non-stationary workloads but is sometimes used as baseline and for this reason it has been implemented here. In case of stationary IRM workloads, the RAND eviction policy provably achieves the same cache hit ratio of the FIFO replacement policy. """ @inheritdoc(Cache) def __init__(self, maxlen, *args, **kwargs): self._maxlen = int(maxlen) if self._maxlen <= 0: raise ValueError('maxlen must be positive') self._cache = set() self._a = [None for _ in range(self._maxlen)] @inheritdoc(Cache) def __len__(self): return len(self._cache) @property def maxlen(self): return self._maxlen
[docs] @inheritdoc(Cache) def dump(self): return list(self._cache)
[docs] @inheritdoc(Cache) def has(self, k, *args, **kwargs): return k in self._cache
[docs] @inheritdoc(Cache) def get(self, k, *args, **kwargs): return self.has(k)
[docs] @inheritdoc(Cache) def put(self, k, *args, **kwargs): evicted = None if not self.has(k): if len(self._cache) == self._maxlen: evicted_index = random.randint(0, self.maxlen - 1) evicted = self._a[evicted_index] self._a[evicted_index] = k self._cache.remove(evicted) else: self._a[len(self._cache)] = k self._cache.add(k) return evicted
[docs] @inheritdoc(Cache) def remove(self, k, *args, **kwargs): if k not in self._cache: return False index = self._a.index(k) self._a[index] = self._a[len(self._cache) - 1] self._a[len(self._cache) - 1] = None self._cache.remove(k) return True
[docs] @inheritdoc(Cache) def clear(self): self._cache.clear()
[docs]def insert_after_k_hits_cache(cache, k=2, memory=None): """Return a cache inserting items only after k requests. This methods allows to implement a variant of k-LRU and k-RANDOM policies, which insert items in the main cache only at the k-th request. However, proper k-LRU and k-RANDOM policies, keep a separate queue of fixed size for items being hit the same number of times. For example, let's say k=3, then there is a fixed size queue storing all items being hit 1 time and another queue for items being hit 2 times. In this implementation there is a unique FIFO queue keeping all items being hit < k times. The size of this queue is equal to the value of memory parameter. If memory is None, then this queue is infinite. In the most common case of k=2, this difference of implementation does not matter. Parameters ---------- cache : Cache The instance of a cache to be applied random insertion k : int, optional The number of hits after which the item is inserted memory : int, optional The size of the metacache just storing the reference to the item and the number of hits, without storing the item itself. Returns ------- cache : Cache The modified cache instance """ if k < 1: raise ValueError("k must be positive") if k == 1: # This is a corner case, as I always insert at first attempt. return cache hits = {} if memory is not None: queue = LinkedSet() c_put = cache.put def put(item, force_insert=False, *args, **kwargs): if force_insert: if item in hits: hits.pop(item) if memory is not None: queue.remove(item) return c_put(item) if item in hits: hits[item] += 1 if hits[item] < k: return None else: # I got hit enough times, inserting in cache hits.pop(item) if memory is not None: queue.remove(item) return c_put(item) else: hits[item] = 1 if memory is not None: queue.append_top(item) if len(queue) > memory: evicted = queue.pop_bottom() hits.pop(evicted) return None cache.put = put cache.put.__doc__ = c_put.__doc__ cache._metacache_hits = hits if memory is not None: cache._metacache_queue = queue return cache
[docs]def rand_insert_cache(cache, p, seed=None): """Return a random insertion cache It modifies the instance of a cache object such that items are inserted randomly instead of deterministically. This function modifies the behavior of the *put* method of a given cache instance such that it inserts contents randomly with a given probability Parameters ---------- cache : Cache The instance of a cache to be applied random insertion p : float the insert probability seed : any hashable type, optional The seed of the random number generator Returns ------- cache : Cache The modified cache instance """ if not isinstance(cache, Cache): raise TypeError('cache must be an instance of Cache or its subclasses') if p < 0 or p > 1: raise ValueError('p must be a value between 0 and 1') cache = copy.deepcopy(cache) random.seed(seed) c_put = cache.put def put(k, *args, **kwargs): if random.random() < p: return c_put(k) cache.put = put cache.put.__doc__ = c_put.__doc__ return cache
[docs]def keyval_cache(cache): """It modifies the instance of a cache object such that items are saved together with a value instead of just a key. This modifies the signature and/or return types of methods *get*, *put* and *dump*. The new format is documented in the docstrings of the modified methods of the cache instance. This function modifies the contract of methods of Cache objects, which is admittedly a bad software engineering practice . It may also lead to bugs in a key-value cache implementation if the base key-only cache implementation from which it derives has methods calling other methods of the same instance. Parameters ---------- cache : Cache The instance of a cache to be changed to a key-value cache Returns ------- cache : Cache The modified cache instance """ if not isinstance(cache, Cache): raise TypeError('cache must be an instance of Cache or its subclasses') if len(cache) > 0: raise ValueError('the cache must be empty') cache = copy.deepcopy(cache) cache._val = {} k_put = cache.put k_get = cache.get k_remove = cache.remove k_dump = cache.dump k_clear = cache.clear def put(k, v, *args, **kwargs): """Insert an item in the cache if not already inserted. If the element is already present in the cache with the same value, it will not be inserted again but the internal state of the cache object may change. Parameters ---------- k : any hashable type The key of item to be inserted v : any hashable type The value of item to be inserted Returns ------- evicted : tuple The key, value tuple of the evicted object or *None* if no contents were evicted. """ evicted = k_put(k) cache._val[k] = v if evicted is not None: val = cache._val.pop(evicted) return evicted, val def get(k, *args, **kwargs): """Retrieve an item from the cache. Differently from *has(k)*, calling this method may change the internal state of the caching object depending on the specific cache implementation. Parameters ---------- k : any hashable type The item looked up in the cache Returns ------- v : any hashable type The value of the requested object or *None* if it is not in the cache """ return cache._val[k] if k_get(k) else None def remove(k, *args, **kwargs): """Remove an item from the cache, if present Parameters ---------- k : any hashable type The item looked up in the cache Returns ------- v : any hashable type The value of the deleted object or *None* if it was not in the cache """ return cache._val.pop(k) if k_remove(k) else None def dump(*args, **kwargs): """Return a dump of all the elements currently in the cache possibly sorted according to the eviction policy. Returns ------- cache_dump : list of tuples The list of items currently stored in the cache represented as key, value pairs """ dump = k_dump() return [(k, cache._val[k]) for k in dump] def clear(): k_clear() cache._val.clear() def value(k, *args, **kwargs): """Return the value of item k Differently from *get(k)*, calling this method does not change the internal state of the cache. Parameters ---------- k : any hashable type The item looked up in the cache Returns ------- v : any hashable type The value of the requested object or *None* if it is not in the cache """ return cache._val[k] if k in cache._val else None cache.put = put cache.get = get cache.remove = remove cache.dump = dump cache.clear = clear cache.clear.__doc__ = k_clear.__doc__ cache.value = value return cache
[docs]def ttl_cache(cache, f_time): """Return a TTL cache. This function takes as a input a cache policy and returns a new policy where items, when inserted, are (optionally) labelled with their expiration time and are automatically evicted when their validity expires. The time validity is verified against the return value of the callable argument *f_time*, which is called whenever a purging is executed. This implementation can be used with both real time and simulated time. Parameters ---------- cache : Cache The instance of a cache to be changed to a TTL cache f_time : callable A function that returns the current time (simulated or real). The return type must be a numerical value, e.g. float Returns ------- cache : Cache The modified cache instance Notes ----- The returned TTL cache performs purging operations only when *has*, *get*, *put* and *dump* operations are performed. This ensures correctness when normal caches are used with common routing and caching strategies. However, if other operations like *position* or *len* are executed, results may take into account also expired items. In such cases, it is then advisable to execute a *purge* first. """ if not isinstance(cache, Cache): raise TypeError('cache must be an instance of Cache or its subclasses') if len(cache) > 0: raise ValueError('the cache must be empty') if not hasattr(f_time, '__call__'): raise TypeError('f_time must be callable') cache = copy.deepcopy(cache) cache.f_time = f_time cache.expiry = {} cache._exp_list = LinkedSet() c_put = cache.put c_get = cache.get c_has = cache.has c_remove = cache.remove c_dump = cache.dump c_clear = cache.clear def _purge_till(expiry): """Purge all entries expired before a certain time Parameters ---------- expiry : float Cutoff expiration time """ while cache._exp_list.bottom is not None and \ cache.expiry[cache._exp_list.bottom] < expiry: expired = cache._exp_list.pop_bottom() cache.expiry.pop(expired) c_remove(expired) def purge(): """Purge all expired items""" cache._purge_till(cache.f_time()) def get(k, *args, **kwargs): if c_get(k): if cache.f_time() < cache.expiry[k]: return True else: remove(k) return False def put(k, ttl=None, expires=None, *args, **kwargs): """Insert an item in the cache if not already inserted. If the element is already present in the cache, it will not be inserted again but the internal state of the cache object may change. Parameters ---------- k : any hashable type The item to be inserted ttl : float, optional The TTL of the item, i.e. its relative expiration time expires : float, optional The absolute expiration time of the item. It cannot be used in conjunction with ttl. If both ttl and expires are None, then the inserted content has infinite TTL. Returns ------- evicted : any hashable type The evicted object or *None* if no contents were evicted. """ now = cache.f_time() if ttl is not None: if expires is not None: raise ValueError('Both expires and ttl parameters provided. ' 'Only one can be provided.') if ttl <= 0: # if TTL is not positive, then do not cache the content at all return None expires = now + ttl else: # case where TTL is None if expires is None: # If both TTL and expire are None, then TTL is infinite expires = np.infty elif expires <= now: return None # Purge expired items only if cache is full for performance reasons if len(cache) == cache.maxlen: cache._purge_till(now) evicted = c_put(k) if evicted is not None: cache.expiry.pop(evicted) cache._exp_list.remove(evicted) if k not in cache.expiry or cache.expiry[k] < expires: cache.expiry[k] = expires if k in cache._exp_list: cache._exp_list.remove(k) if len(cache._exp_list) == 0: cache._exp_list.append_top(k) else: for i in cache._exp_list: if expires >= cache.expiry[i]: cache._exp_list.insert_above(i, k) break else: cache._exp_list.append_bottom(k) return evicted def has(k, *args, **kwargs): return c_has(k) and cache.f_time() <= cache.expiry[k] def remove(k, *args, **kwargs): c_remove(k) cache.expiry.pop(k) cache._exp_list.remove(k) def dump(): """Return a dump of all the elements currently in the cache possibly sorted according to the eviction policy. Return ------ cache_dump : list of tuples The list of items currently stored in the cache represented as (key, expiration time) pairs """ cache.purge() dump = c_dump() return [(k, cache.expiry[k]) for k in dump] def clear(): c_clear() cache.expiry.clear() cache._exp_list.clear() cache._purge_till = _purge_till cache.get = get cache.put = put cache.has = has cache.remove = remove cache.dump = dump cache.clear = clear cache.purge = purge cache.clear.__doc__ = c_clear.__doc__ return cache
def ttl_keyval_cache(): pass