Source code for icarus.tools.cacheperf

"""Functions for modelling and evaluating the performance of cache
replacement policies.
"""
from __future__ import division
import math

import numpy as np
from scipy.optimize import fsolve

from icarus.tools import TruncatedZipfDist, DiscreteDist


__all__ = [
       'che_characteristic_time',
       'che_per_content_cache_hit_ratio',
       'che_cache_hit_ratio',
       'che_characteristic_time_simplified',
       'che_per_content_cache_hit_ratio_simplified',
       'che_cache_hit_ratio_simplified',
       'che_characteristic_time_generalized',
       'che_per_content_cache_hit_ratio_generalized',
       'che_cache_hit_ratio_generalized',
       'laoutaris_characteristic_time',
       'laoutaris_per_content_cache_hit_ratio',
       'laoutaris_cache_hit_ratio',
       'optimal_cache_hit_ratio',
       'numeric_per_content_cache_hit_ratio',
       'numeric_cache_hit_ratio',
       'numeric_cache_hit_ratio_2_layers',
       'trace_driven_cache_hit_ratio'
          ]


[docs]def che_characteristic_time(pdf, cache_size, target=None): """Return the characteristic time of an item or of all items, as defined by Che et al. Parameters ---------- pdf : array-like The probability density function of an item being requested cache_size : int The size of the cache (in number of items) target : int, optional The item index [1,N] for which characteristic time is requested. If not specified, the function calculates the characteristic time of all the items in the population. Returns ------- r : array of float or float If target is None, returns an array with the characteristic times of all items in the population. If a target is specified, then it returns the characteristic time of only the specified item. """ def func_r(r, i): return sum(math.exp(-pdf[j] * r) for j in range(len(pdf)) if j != i) \ - len(pdf) + 1 + cache_size items = range(len(pdf)) if target is None else [target - 1] r = [fsolve(func_r, x0=cache_size, args=(i)) for i in items] return r if target is None else r[0]
[docs]def che_per_content_cache_hit_ratio(pdf, cache_size, target=None): """Estimate the cache hit ratio of an item or of all items using the Che's approximation. Parameters ---------- pdf : array-like The probability density function of an item being requested cache_size : int The size of the cache (in number of items) target : int, optional The item index for which cache hit ratio is requested. If not specified, the function calculates the cache hit ratio of all the items in the population. Returns ------- cache_hit_ratio : array of float or float If target is None, returns an array with the cache hit ratios of all items in the population. If a target is specified, then it returns the cache hit ratio of only the specified item. """ items = range(len(pdf)) if target is None else [target] r = che_characteristic_time(pdf, cache_size) hit_ratio = [1 - math.exp(-pdf[i] * r[i]) for i in items] return hit_ratio if target is None else hit_ratio[0]
[docs]def che_cache_hit_ratio(pdf, cache_size): """Estimate the overall cache hit ratio of an LRU cache under generic IRM demand using the Che's approximation. Parameters ---------- pdf : array-like The probability density function of an item being requested cache_size : int The size of the cache (in number of items) Returns ------- cache_hit_ratio : float The overall cache hit ratio """ ch = che_per_content_cache_hit_ratio(pdf, cache_size) return sum(pdf[i] * ch[i] for i in range(len(pdf)))
[docs]def che_characteristic_time_simplified(pdf, cache_size): """Return the characteristic time of an LRU cache under a given IRM workload, as defined by Che et al. This function computes one single characteristic time for all contents. This further approximation is normally accurate for workloads with reduced skewness in their popularity distribution. Parameters ---------- pdf : array-like The probability density function of an item being requested cache_size : int The size of the cache (in number of items) Returns ------- r : float The characteristic time. """ def func_r(r): return sum(math.exp(-pdf[j] * r) for j in range(len(pdf))) \ - len(pdf) + cache_size return fsolve(func_r, x0=cache_size)[0]
[docs]def che_per_content_cache_hit_ratio_simplified(pdf, cache_size, target=None): """Estimate the cache hit ratio of an item or of all items using the Che's approximation. This version uses a single characteristic time for all contents. Parameters ---------- pdf : array-like The probability density function of an item being requested cache_size : int The size of the cache (in number of items) target : int, optional The item index for which cache hit ratio is requested. If not specified, the function calculates the cache hit ratio of all the items in the population. Returns ------- cache_hit_ratio : array of float or float If target is None, returns an array with the cache hit ratios of all items in the population. If a target is specified, then it returns the cache hit ratio of only the specified item. """ items = range(len(pdf)) if target is None else [target] r = che_characteristic_time_simplified(pdf, cache_size) hit_ratio = [1 - math.exp(-pdf[i] * r) for i in items] return hit_ratio if target is None else hit_ratio[0]
[docs]def che_cache_hit_ratio_simplified(pdf, cache_size): """Estimate the overall cache hit ratio of an LRU cache under generic IRM demand using the Che's approximation. This version uses a single characteristic time for all contents. Parameters ---------- pdf : array-like The probability density function of an item being requested cache_size : int The size of the cache (in number of items) Returns ------- cache_hit_ratio : float The overall cache hit ratio """ ch = che_per_content_cache_hit_ratio_simplified(pdf, cache_size) return sum(pdf[i] * ch[i] for i in range(len(pdf)))
def che_p_in_func(pdf, cache_size, policy, **policy_args): """Return function to compute cache hit ratio of a policy given probability of a content being requested and characteristic time Parameters ---------- pdf : array-like The probability density function of an item being requested cache_size : int The size of the cache (in number of items) policy : str The cache replacement policy ('LRU', 'q-LRU', 'FIFO', 'RANDOM') """ if policy == 'LRU': p_in = lambda p, t: 1 - np.exp(-p * t) elif policy == 'q-LRU': if 'q' not in policy_args: raise ValueError('q parameter not specified') q = policy_args['q'] p_in = lambda p, t: q * (1 - np.exp(-p * t)) / (np.exp(-p * t) + q * (1 - np.exp(-p * t))) elif policy in ('FIFO', 'RANDOM'): p_in = lambda p, t: p * t / (1 + p * t) else: raise ValueError('policy %s not recognized' % policy) return p_in
[docs]def che_characteristic_time_generalized(pdf, cache_size, policy, **policy_args): """Return the characteristic time of a cache under a given IRM demand according to the the extension of Che's approximation proposed by Martina et al. This function computes one single characteristic time for all content items. Parameters ---------- pdf : array-like The probability density function of an item being requested cache_size : int The size of the cache (in number of items) policy : str The cache replacement policy ('LRU', 'q-LRU', 'FIFO', 'RANDOM') Returns ------- r : float The characteristic time. Rereferences ------------ V. Martina, M. Garetto, and E. Leonardi, "A unified approach to the performance analysis of caching systems," in Proceedings of the 2014 IEEE Conference on Computer Communications (INFOCOM'14), April 2014 """ p_in = che_p_in_func(pdf, cache_size, policy, **policy_args) def func_t(t): return np.sum(p_in(pdf, t)) - cache_size return fsolve(func_t, x0=cache_size)[0]
[docs]def che_per_content_cache_hit_ratio_generalized(pdf, cache_size, policy, **policy_args): """Estimate the cache hit ratio of an item or of all items in a cache subject to IRM demand according to the extension of Che's approximation proposed by Martina et al. Parameters ---------- pdf : array-like The probability density function of an item being requested cache_size : int The size of the cache (in number of items) policy : str, optional The cache replacement policy ('LRU', 'q-LRU', 'FIFO', 'RANDOM') Returns ------- cache_hit_ratio : array of float or float If target is None, returns an array with the cache hit ratios of all items in the population. If a target is specified, then it returns the cache hit ratio of only the specified item. Rereferences ------------ V. Martina, M. Garetto, and E. Leonardi, "A unified approach to the performance analysis of caching systems," in Proceedings of the 2014 IEEE Conference on Computer Communications (INFOCOM'14), April 2014 """ p_in = che_p_in_func(pdf, cache_size, policy, **policy_args) t = che_characteristic_time_generalized(pdf, cache_size, policy, **policy_args) return p_in(pdf, t)
[docs]def che_cache_hit_ratio_generalized(pdf, cache_size, policy='LRU', **policy_args): """Estimate the overall cache hit ratio of a cache subject to IRM demand according to the extension of Che's approximation proposed by Martina et al. Parameters ---------- pdf : array-like The probability density function of an item being requested cache_size : int The size of the cache (in number of items) policy : str, optional The cache replacement policy ('LRU', 'q-LRU', 'FIFO', 'RANDOM') Returns ------- cache_hit_ratio : float The overall cache hit ratio Rereferences ------------ V. Martina, M. Garetto, and E. Leonardi, "A unified approach to the performance analysis of caching systems," in Proceedings of the 2014 IEEE Conference on Computer Communications (INFOCOM'14), April 2014 """ ch = che_per_content_cache_hit_ratio_generalized(pdf, cache_size, policy, **policy_args) return sum(pdf[i] * ch[i] for i in range(len(pdf)))
[docs]def laoutaris_characteristic_time(alpha, population, cache_size, order=3): """Estimates the Che's characteristic time of an LRU cache under general power-law demand using the Laoutaris approximation. Parameters ---------- alpha : float The coefficient of the demand power-law population : int The content population cache_size : int The cache size order : int, optional The order of the Taylor expansion. Supports only 2 and 3 Returns ------- cache_hit_ratio : float The cache hit ratio References ---------- http://arxiv.org/pdf/0705.1970.pdf """ def H(N, alpha): return sum(1.0 / l ** alpha for l in range(1, N + 1)) def cubrt(x): """Compute cubic root of a number Parameters ---------- x : float Number whose cubic root is to be calculated Returns ------- cubrt : float The cubic root """ exp = 1.0 / 3 return x ** exp if x >= 0 else -(-x) ** exp def solve_3rd_order_equation(a, b, c, d): """Calculate the real solutions of the 3rd order equations a*x**3 + b*x**2 + c*x + d = 0 Parameters ---------- a : float Coefficent of 3rd degree monomial b : float Coefficent of 2nd degree monomial c : float Coefficent of 1st degree monomial d : float Constant Returns ------- roots : tuple Tuple of real solutions. The tuple may comprise either 1 or 3 values Notes ----- The method used to calculate roots is described in this paper: http://www.nickalls.org/dick/papers/maths/cubic1993.pdf """ # Compute parameters x_N = -b / (3 * a) y_N = a * x_N ** 3 + b * x_N ** 2 + c * x_N + d delta_2 = (b ** 2 - 3 * a * c) / (9 * a ** 2) h_2 = 4 * (a ** 2) * (delta_2 ** 3) # Calculate discriminator and find roots discr = y_N ** 2 - h_2 if discr > 0: r_x = (x_N + cubrt(0.5 / a * (-y_N + math.sqrt(discr))) \ + cubrt(0.5 / a * (-y_N - math.sqrt(discr))),) elif discr == 0: delta = math.sqrt(delta_2) r1 = r2 = x_N + delta r3 = x_N - 2 * delta r_x = (r1, r2, r3) else: # discr < 0 h = math.sqrt(h_2) delta = math.sqrt(delta_2) Theta = np.arccos(-y_N / h) / 3.0 r1 = x_N + 2 * delta * np.cos(Theta) r2 = x_N + 2 * delta * np.cos(2 * np.pi / 3 - Theta) r3 = x_N + 2 * delta * np.cos(2 * np.pi / 3 + Theta) r_x = (r1, r2, r3) return r_x # Get parameters C = cache_size N = population # Calculate harmonics H_N_a = H(N, alpha) H_N_2a = H(N, 2 * alpha) H_N_3a = H(N, 3 * alpha) H_N_4a = H(N, 4 * alpha) Lambda = 1.0 / H_N_a # Find values of r if order == 2: alpha_2 = (0.5 * Lambda ** 2 * H_N_2a) - (0.5 * Lambda ** 3 * C * H_N_3a) + (0.25 * Lambda ** 4 * C ** 2 * H_N_4a) alpha_1 = -(Lambda * H_N_a) + (0.5 * Lambda ** 3 * C ** 2 * H_N_3a) - (0.5 * Lambda ** 4 * C ** 3 * H_N_4a) alpha_0 = C + (0.25 * Lambda ** 4 * C ** 4 * H_N_4a) # Calculate discriminant to verify if there are real solutions discr = alpha_1 ** 2 - 4 * alpha_2 * alpha_0 if discr < 0: raise ValueError('Could not find real values for the ' 'characteristic time. Try using a 3rd order ' 'expansion') # Calculate roots of the 2nd order equation r1 = (-alpha_1 + math.sqrt(discr)) / (2 * alpha_2) r2 = (-alpha_1 - math.sqrt(discr)) / (2 * alpha_2) r_x = (r1, r2) elif order == 3: # Additional parameters H_N_5a = H(N, 5 * alpha) H_N_6a = H(N, 6 * alpha) # Calculate coefficients of the 3rd order equation alpha_3 = -(Lambda ** 3 / 6 * H_N_3a) + (Lambda ** 4 * C / 6 * H_N_4a) - \ (Lambda ** 5 * C ** 2 / 12 * H_N_5a) + (Lambda ** 6 * C ** 3 / 36 * H_N_6a) alpha_2 = (Lambda ** 2 / 2 * H_N_2a) - (Lambda ** 4 * C ** 2 / 4 * H_N_4a) + \ (Lambda ** 5 * C ** 3 / 6 * H_N_5a) - (Lambda ** 6 * C ** 4 / 12 * H_N_6a) alpha_1 = -Lambda * H_N_a + (Lambda ** 4 * C ** 3 / 6 * H_N_4a) - \ (Lambda ** 5 * C ** 4 / 12 * H_N_5a) + (Lambda ** 6 * C ** 5 / 12 * H_N_6a) alpha_0 = C - (Lambda ** 4 * C ** 4 / 12 * H_N_4a) - \ (Lambda ** 6 * C ** 6 / 36 * H_N_6a) # Solve 3rd order equation r_x = solve_3rd_order_equation(alpha_3, alpha_2, alpha_1, alpha_0) else: raise ValueError('Only 2nd and 3rd order solutions are supported') # Find actual value of characteristic time (r) if exists # We select the minimum positive r greater than C r_c = [x for x in r_x if x > C] if r_c: return min(r_c) else: raise ValueError('Cannot compute cache hit ratio using this method. ' 'Could not find positive values of characteristic time' ' greater than the cache size.')
[docs]def laoutaris_per_content_cache_hit_ratio(alpha, population, cache_size, order=3, target=None): """Estimates the per-content cache hit ratio of an LRU cache under general power-law demand using the Laoutaris approximation. Parameters ---------- alpha : float The coefficient of the demand power-law distribution population : int The content population cache_size : int The cache size order : int, optional The order of the Taylor expansion. Supports only 2 and 3 target : int, optional The item index [1,N] for which cache hit ratio is requested. If not specified, the function calculates the cache hit ratio of all the items in the population. Returns ------- cache_hit_ratio : array of float or float If target is None, returns an array with the cache hit ratios of all items in the population. If a target is specified, then it returns the cache hit ratio of only the specified item. References ---------- http://arxiv.org/pdf/0705.1970.pdf """ pdf = TruncatedZipfDist(alpha, population).pdf r = laoutaris_characteristic_time(alpha, population, cache_size, order) items = range(len(pdf)) if target is None else [target - 1] hit_ratio = [1 - math.exp(-pdf[i] * r) for i in items] return hit_ratio if target is None else hit_ratio[0]
[docs]def laoutaris_cache_hit_ratio(alpha, population, cache_size, order=3): """Estimate the cache hit ratio of an LRU cache under general power-law demand using the Laoutaris approximation. Parameters ---------- alpha : float The coefficient of the demand power-law distribution population : int The content population cache_size : int The cache size order : int, optional The order of the Taylor expansion. Supports only 2 and 3 Returns ------- cache_hit_ratio : float The cache hit ratio References ---------- http://arxiv.org/pdf/0705.1970.pdf """ pdf = TruncatedZipfDist(alpha, population).pdf r = laoutaris_characteristic_time(alpha, population, cache_size, order) return np.sum(pdf * (1 - math.e ** -(r * pdf)))
[docs]def optimal_cache_hit_ratio(pdf, cache_size): """Return the value of the optimal cache hit ratio of a cache under IRM stationary demand with a given pdf. In practice this function returns the probability of a cache hit if cache is filled with the *cache_size* most popular times. This value also corresponds to the steady-state cache hit ratio of an LFU cache. Parameters ---------- pdf : array-like The probability density function of an item being requested cache_size : int The size of the cache (in number of items) Returns ------- cache_hit_ratio : float The optimal cache hit ratio """ if cache_size >= len(pdf): return 1.0 return sum(sorted(pdf, reverse=True)[:cache_size])
[docs]def numeric_per_content_cache_hit_ratio(pdf, cache, warmup=None, measure=None, seed=None, target=None): """Numerically compute the per-content cache hit ratio of a cache under IRM stationary demand with a given pdf. Parameters ---------- pdf : array-like The probability density function of an item being requested cache : Cache The cache object (i.e. the instance of a class subclassing icarus.Cache) warmup : int, optional The number of warmup requests to generate. If not specified, it is set to 10 times the content population measure : int, optional The number of measured requests to generate. If not specified, it is set to 30 times the content population seed : int, optional The seed used to generate random numbers target : int, optional The item index [1, N] for which cache hit ratio is requested. If not specified, the function calculates the cache hit ratio of all the items in the population. Returns ------- cache_hit_ratio : array of float or float If target is None, returns an array with the cache hit ratios of all items in the population. If a target is specified, then it returns the cache hit ratio of only the specified item. """ if warmup is None: warmup = 10 * len(pdf) if measure is None: measure = 30 * len(pdf) z = DiscreteDist(pdf, seed) for _ in range(warmup): content = z.rv() if not cache.get(content): cache.put(content) cache_hits = np.zeros(len(pdf)) requests = np.zeros(len(pdf)) for _ in range(measure): content = z.rv() requests[content - 1] += 1 if cache.get(content): cache_hits[content - 1] += 1 else: cache.put(content) hit_ratio = np.where(requests > 0, cache_hits / requests, requests) return hit_ratio if target is None else hit_ratio[target - 1]
[docs]def numeric_cache_hit_ratio(pdf, cache, warmup=None, measure=None, seed=None): """Numerically compute the cache hit ratio of a cache under IRM stationary demand with a given pdf. Parameters ---------- pdf : array-like The probability density function of an item being requested cache : Cache The cache object (i.e. the instance of a class subclassing icarus.Cache) warmup : int, optional The number of warmup requests to generate. If not specified, it is set to 10 times the content population measure : int, optional The number of measured requests to generate. If not specified, it is set to 30 times the content population seed : int, optional The seed used to generate random numbers Returns ------- cache_hit_ratio : float The cache hit ratio """ if warmup is None: warmup = 10 * len(pdf) if measure is None: measure = 30 * len(pdf) z = DiscreteDist(pdf, seed) for _ in range(warmup): content = z.rv() if not cache.get(content): cache.put(content) cache_hits = 0 for _ in range(measure): content = z.rv() if cache.get(content): cache_hits += 1 else: cache.put(content) return cache_hits / measure
[docs]def numeric_cache_hit_ratio_2_layers(pdf, l1_cache, l2_cache, warmup=None, measure=None, seed=None): """Numerically compute the cache hit ratio of a two-layer cache under IRM stationary demand with a given pdf. Differently from the numeric_cache_hit_ratio function, this function allows users to compute the hits at layer 1, layer 2 and overall. Parameters ---------- pdf : array-like The probability density function of an item being requested cache : Cache The cache object (i.e. the instance of a class subclassing icarus.Cache) warmup : int, optional The number of warmup requests to generate. If not specified, it is set to 10 times the content population measure : int, optional The number of measured requests to generate. If not specified, it is set to 30 times the content population seed : int, optional The seed used to generate random numbers Returns ------- cache_hit_ratio : dict Dictionary with keys "l1_hits", "l2_hits" and "total_hits" """ if warmup is None: warmup = 10 * len(pdf) if measure is None: measure = 30 * len(pdf) z = DiscreteDist(pdf, seed) for _ in range(warmup): content = z.rv() if not l1_cache.get(content): if not l2_cache.get(content): l2_cache.put(content) l1_cache.put(content) l1_hits = 0 l1_misses = 0 l2_hits = 0 for _ in range(measure): content = z.rv() if l1_cache.get(content): l1_hits += 1 else: l1_misses += 1 if l2_cache.get(content): l2_hits += 1 else: l2_cache.put(content) l1_cache.put(content) return { 'l1_hits': l1_hits / measure, 'l2_hits': l2_hits / measure, 'total_hits': (l1_hits + l2_hits) / measure }
[docs]def trace_driven_cache_hit_ratio(workload, cache, warmup_ratio=0.25): """Compute cache hit ratio of a cache under an arbitrary trace-driven workload. Parameters ---------- workload : list or array List of URLs or content identifiers extracted from a trace. This list only needs to contains content identifiers and not timestamps cache : Cache Instance of a cache object warmup_ratio : float, optional Ratio of requests of the workload used to warm up the cache (i.e. whose cache hit/miss results are discarded) Returns ------- cache_hit_ratio : float The cache hit ratio """ if warmup_ratio < 0 or warmup_ratio > 1: raise ValueError("warmup_ratio must be comprised between 0 and 1") n = len(workload) cache_hits = 0 n_warmup = int(warmup_ratio * n) n_req = 0 for content in workload: if cache.get(content): if n_req >= n_warmup: cache_hits += 1 else: cache.put(content) n_req += 1 return cache_hits / (n - n_warmup)