"""Functions for modelling and evaluating the performance of cache
replacement policies.
"""
from __future__ import division
import math
import numpy as np
from scipy.optimize import fsolve
from icarus.tools import TruncatedZipfDist, DiscreteDist
__all__ = [
'che_characteristic_time',
'che_per_content_cache_hit_ratio',
'che_cache_hit_ratio',
'che_characteristic_time_simplified',
'che_per_content_cache_hit_ratio_simplified',
'che_cache_hit_ratio_simplified',
'che_characteristic_time_generalized',
'che_per_content_cache_hit_ratio_generalized',
'che_cache_hit_ratio_generalized',
'laoutaris_characteristic_time',
'laoutaris_per_content_cache_hit_ratio',
'laoutaris_cache_hit_ratio',
'optimal_cache_hit_ratio',
'numeric_per_content_cache_hit_ratio',
'numeric_cache_hit_ratio',
'numeric_cache_hit_ratio_2_layers',
'trace_driven_cache_hit_ratio'
]
[docs]def che_characteristic_time(pdf, cache_size, target=None):
"""Return the characteristic time of an item or of all items, as defined by
Che et al.
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache_size : int
The size of the cache (in number of items)
target : int, optional
The item index [1,N] for which characteristic time is requested. If not
specified, the function calculates the characteristic time of all the
items in the population.
Returns
-------
r : array of float or float
If target is None, returns an array with the characteristic times of
all items in the population. If a target is specified, then it returns
the characteristic time of only the specified item.
"""
def func_r(r, i):
return sum(math.exp(-pdf[j] * r) for j in range(len(pdf)) if j != i) \
- len(pdf) + 1 + cache_size
items = range(len(pdf)) if target is None else [target - 1]
r = [fsolve(func_r, x0=cache_size, args=(i)) for i in items]
return r if target is None else r[0]
[docs]def che_per_content_cache_hit_ratio(pdf, cache_size, target=None):
"""Estimate the cache hit ratio of an item or of all items using the Che's
approximation.
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache_size : int
The size of the cache (in number of items)
target : int, optional
The item index for which cache hit ratio is requested. If not
specified, the function calculates the cache hit ratio of all the items
in the population.
Returns
-------
cache_hit_ratio : array of float or float
If target is None, returns an array with the cache hit ratios of all
items in the population. If a target is specified, then it returns
the cache hit ratio of only the specified item.
"""
items = range(len(pdf)) if target is None else [target]
r = che_characteristic_time(pdf, cache_size)
hit_ratio = [1 - math.exp(-pdf[i] * r[i]) for i in items]
return hit_ratio if target is None else hit_ratio[0]
[docs]def che_cache_hit_ratio(pdf, cache_size):
"""Estimate the overall cache hit ratio of an LRU cache under generic IRM
demand using the Che's approximation.
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache_size : int
The size of the cache (in number of items)
Returns
-------
cache_hit_ratio : float
The overall cache hit ratio
"""
ch = che_per_content_cache_hit_ratio(pdf, cache_size)
return sum(pdf[i] * ch[i] for i in range(len(pdf)))
[docs]def che_characteristic_time_simplified(pdf, cache_size):
"""Return the characteristic time of an LRU cache under a given IRM
workload, as defined by Che et al.
This function computes one single characteristic time for all contents.
This further approximation is normally accurate for workloads with
reduced skewness in their popularity distribution.
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache_size : int
The size of the cache (in number of items)
Returns
-------
r : float
The characteristic time.
"""
def func_r(r):
return sum(math.exp(-pdf[j] * r) for j in range(len(pdf))) \
- len(pdf) + cache_size
return fsolve(func_r, x0=cache_size)[0]
[docs]def che_per_content_cache_hit_ratio_simplified(pdf, cache_size, target=None):
"""Estimate the cache hit ratio of an item or of all items using the Che's
approximation. This version uses a single characteristic time for all
contents.
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache_size : int
The size of the cache (in number of items)
target : int, optional
The item index for which cache hit ratio is requested. If not
specified, the function calculates the cache hit ratio of all the items
in the population.
Returns
-------
cache_hit_ratio : array of float or float
If target is None, returns an array with the cache hit ratios of all
items in the population. If a target is specified, then it returns
the cache hit ratio of only the specified item.
"""
items = range(len(pdf)) if target is None else [target]
r = che_characteristic_time_simplified(pdf, cache_size)
hit_ratio = [1 - math.exp(-pdf[i] * r) for i in items]
return hit_ratio if target is None else hit_ratio[0]
[docs]def che_cache_hit_ratio_simplified(pdf, cache_size):
"""Estimate the overall cache hit ratio of an LRU cache under generic IRM
demand using the Che's approximation. This version uses a single
characteristic time for all contents.
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache_size : int
The size of the cache (in number of items)
Returns
-------
cache_hit_ratio : float
The overall cache hit ratio
"""
ch = che_per_content_cache_hit_ratio_simplified(pdf, cache_size)
return sum(pdf[i] * ch[i] for i in range(len(pdf)))
def che_p_in_func(pdf, cache_size, policy, **policy_args):
"""Return function to compute cache hit ratio of a policy given probability
of a content being requested and characteristic time
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache_size : int
The size of the cache (in number of items)
policy : str
The cache replacement policy ('LRU', 'q-LRU', 'FIFO', 'RANDOM')
"""
if policy == 'LRU':
p_in = lambda p, t: 1 - np.exp(-p * t)
elif policy == 'q-LRU':
if 'q' not in policy_args:
raise ValueError('q parameter not specified')
q = policy_args['q']
p_in = lambda p, t: q * (1 - np.exp(-p * t)) / (np.exp(-p * t) + q * (1 - np.exp(-p * t)))
elif policy in ('FIFO', 'RANDOM'):
p_in = lambda p, t: p * t / (1 + p * t)
else:
raise ValueError('policy %s not recognized' % policy)
return p_in
[docs]def che_characteristic_time_generalized(pdf, cache_size, policy, **policy_args):
"""Return the characteristic time of a cache under a given IRM demand
according to the the extension of Che's approximation proposed by Martina
et al.
This function computes one single characteristic time for all content items.
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache_size : int
The size of the cache (in number of items)
policy : str
The cache replacement policy ('LRU', 'q-LRU', 'FIFO', 'RANDOM')
Returns
-------
r : float
The characteristic time.
Rereferences
------------
V. Martina, M. Garetto, and E. Leonardi, "A unified approach to the
performance analysis of caching systems," in Proceedings of the 2014
IEEE Conference on Computer Communications (INFOCOM'14), April 2014
"""
p_in = che_p_in_func(pdf, cache_size, policy, **policy_args)
def func_t(t):
return np.sum(p_in(pdf, t)) - cache_size
return fsolve(func_t, x0=cache_size)[0]
[docs]def che_per_content_cache_hit_ratio_generalized(pdf, cache_size, policy,
**policy_args):
"""Estimate the cache hit ratio of an item or of all items in a cache
subject to IRM demand according to the extension of Che's approximation
proposed by Martina et al.
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache_size : int
The size of the cache (in number of items)
policy : str, optional
The cache replacement policy ('LRU', 'q-LRU', 'FIFO', 'RANDOM')
Returns
-------
cache_hit_ratio : array of float or float
If target is None, returns an array with the cache hit ratios of all
items in the population. If a target is specified, then it returns
the cache hit ratio of only the specified item.
Rereferences
------------
V. Martina, M. Garetto, and E. Leonardi, "A unified approach to the
performance analysis of caching systems," in Proceedings of the 2014
IEEE Conference on Computer Communications (INFOCOM'14), April 2014
"""
p_in = che_p_in_func(pdf, cache_size, policy, **policy_args)
t = che_characteristic_time_generalized(pdf, cache_size, policy, **policy_args)
return p_in(pdf, t)
[docs]def che_cache_hit_ratio_generalized(pdf, cache_size, policy='LRU', **policy_args):
"""Estimate the overall cache hit ratio of a cache subject to IRM demand
according to the extension of Che's approximation proposed by Martina et al.
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache_size : int
The size of the cache (in number of items)
policy : str, optional
The cache replacement policy ('LRU', 'q-LRU', 'FIFO', 'RANDOM')
Returns
-------
cache_hit_ratio : float
The overall cache hit ratio
Rereferences
------------
V. Martina, M. Garetto, and E. Leonardi, "A unified approach to the
performance analysis of caching systems," in Proceedings of the 2014
IEEE Conference on Computer Communications (INFOCOM'14), April 2014
"""
ch = che_per_content_cache_hit_ratio_generalized(pdf, cache_size, policy, **policy_args)
return sum(pdf[i] * ch[i] for i in range(len(pdf)))
[docs]def laoutaris_characteristic_time(alpha, population, cache_size, order=3):
"""Estimates the Che's characteristic time of an LRU cache under general
power-law demand using the Laoutaris approximation.
Parameters
----------
alpha : float
The coefficient of the demand power-law
population : int
The content population
cache_size : int
The cache size
order : int, optional
The order of the Taylor expansion. Supports only 2 and 3
Returns
-------
cache_hit_ratio : float
The cache hit ratio
References
----------
http://arxiv.org/pdf/0705.1970.pdf
"""
def H(N, alpha):
return sum(1.0 / l ** alpha for l in range(1, N + 1))
def cubrt(x):
"""Compute cubic root of a number
Parameters
----------
x : float
Number whose cubic root is to be calculated
Returns
-------
cubrt : float
The cubic root
"""
exp = 1.0 / 3
return x ** exp if x >= 0 else -(-x) ** exp
def solve_3rd_order_equation(a, b, c, d):
"""Calculate the real solutions of the 3rd order equations
a*x**3 + b*x**2 + c*x + d = 0
Parameters
----------
a : float
Coefficent of 3rd degree monomial
b : float
Coefficent of 2nd degree monomial
c : float
Coefficent of 1st degree monomial
d : float
Constant
Returns
-------
roots : tuple
Tuple of real solutions.
The tuple may comprise either 1 or 3 values
Notes
-----
The method used to calculate roots is described in this paper:
http://www.nickalls.org/dick/papers/maths/cubic1993.pdf
"""
# Compute parameters
x_N = -b / (3 * a)
y_N = a * x_N ** 3 + b * x_N ** 2 + c * x_N + d
delta_2 = (b ** 2 - 3 * a * c) / (9 * a ** 2)
h_2 = 4 * (a ** 2) * (delta_2 ** 3)
# Calculate discriminator and find roots
discr = y_N ** 2 - h_2
if discr > 0:
r_x = (x_N + cubrt(0.5 / a * (-y_N + math.sqrt(discr))) \
+ cubrt(0.5 / a * (-y_N - math.sqrt(discr))),)
elif discr == 0:
delta = math.sqrt(delta_2)
r1 = r2 = x_N + delta
r3 = x_N - 2 * delta
r_x = (r1, r2, r3)
else: # discr < 0
h = math.sqrt(h_2)
delta = math.sqrt(delta_2)
Theta = np.arccos(-y_N / h) / 3.0
r1 = x_N + 2 * delta * np.cos(Theta)
r2 = x_N + 2 * delta * np.cos(2 * np.pi / 3 - Theta)
r3 = x_N + 2 * delta * np.cos(2 * np.pi / 3 + Theta)
r_x = (r1, r2, r3)
return r_x
# Get parameters
C = cache_size
N = population
# Calculate harmonics
H_N_a = H(N, alpha)
H_N_2a = H(N, 2 * alpha)
H_N_3a = H(N, 3 * alpha)
H_N_4a = H(N, 4 * alpha)
Lambda = 1.0 / H_N_a
# Find values of r
if order == 2:
alpha_2 = (0.5 * Lambda ** 2 * H_N_2a) - (0.5 * Lambda ** 3 * C * H_N_3a) + (0.25 * Lambda ** 4 * C ** 2 * H_N_4a)
alpha_1 = -(Lambda * H_N_a) + (0.5 * Lambda ** 3 * C ** 2 * H_N_3a) - (0.5 * Lambda ** 4 * C ** 3 * H_N_4a)
alpha_0 = C + (0.25 * Lambda ** 4 * C ** 4 * H_N_4a)
# Calculate discriminant to verify if there are real solutions
discr = alpha_1 ** 2 - 4 * alpha_2 * alpha_0
if discr < 0:
raise ValueError('Could not find real values for the '
'characteristic time. Try using a 3rd order '
'expansion')
# Calculate roots of the 2nd order equation
r1 = (-alpha_1 + math.sqrt(discr)) / (2 * alpha_2)
r2 = (-alpha_1 - math.sqrt(discr)) / (2 * alpha_2)
r_x = (r1, r2)
elif order == 3:
# Additional parameters
H_N_5a = H(N, 5 * alpha)
H_N_6a = H(N, 6 * alpha)
# Calculate coefficients of the 3rd order equation
alpha_3 = -(Lambda ** 3 / 6 * H_N_3a) + (Lambda ** 4 * C / 6 * H_N_4a) - \
(Lambda ** 5 * C ** 2 / 12 * H_N_5a) + (Lambda ** 6 * C ** 3 / 36 * H_N_6a)
alpha_2 = (Lambda ** 2 / 2 * H_N_2a) - (Lambda ** 4 * C ** 2 / 4 * H_N_4a) + \
(Lambda ** 5 * C ** 3 / 6 * H_N_5a) - (Lambda ** 6 * C ** 4 / 12 * H_N_6a)
alpha_1 = -Lambda * H_N_a + (Lambda ** 4 * C ** 3 / 6 * H_N_4a) - \
(Lambda ** 5 * C ** 4 / 12 * H_N_5a) + (Lambda ** 6 * C ** 5 / 12 * H_N_6a)
alpha_0 = C - (Lambda ** 4 * C ** 4 / 12 * H_N_4a) - \
(Lambda ** 6 * C ** 6 / 36 * H_N_6a)
# Solve 3rd order equation
r_x = solve_3rd_order_equation(alpha_3, alpha_2, alpha_1, alpha_0)
else:
raise ValueError('Only 2nd and 3rd order solutions are supported')
# Find actual value of characteristic time (r) if exists
# We select the minimum positive r greater than C
r_c = [x for x in r_x if x > C]
if r_c:
return min(r_c)
else:
raise ValueError('Cannot compute cache hit ratio using this method. '
'Could not find positive values of characteristic time'
' greater than the cache size.')
[docs]def laoutaris_per_content_cache_hit_ratio(alpha, population, cache_size,
order=3, target=None):
"""Estimates the per-content cache hit ratio of an LRU cache under general
power-law demand using the Laoutaris approximation.
Parameters
----------
alpha : float
The coefficient of the demand power-law distribution
population : int
The content population
cache_size : int
The cache size
order : int, optional
The order of the Taylor expansion. Supports only 2 and 3
target : int, optional
The item index [1,N] for which cache hit ratio is requested. If not
specified, the function calculates the cache hit ratio of all the items
in the population.
Returns
-------
cache_hit_ratio : array of float or float
If target is None, returns an array with the cache hit ratios of all
items in the population. If a target is specified, then it returns
the cache hit ratio of only the specified item.
References
----------
http://arxiv.org/pdf/0705.1970.pdf
"""
pdf = TruncatedZipfDist(alpha, population).pdf
r = laoutaris_characteristic_time(alpha, population, cache_size, order)
items = range(len(pdf)) if target is None else [target - 1]
hit_ratio = [1 - math.exp(-pdf[i] * r) for i in items]
return hit_ratio if target is None else hit_ratio[0]
[docs]def laoutaris_cache_hit_ratio(alpha, population, cache_size, order=3):
"""Estimate the cache hit ratio of an LRU cache under general power-law
demand using the Laoutaris approximation.
Parameters
----------
alpha : float
The coefficient of the demand power-law distribution
population : int
The content population
cache_size : int
The cache size
order : int, optional
The order of the Taylor expansion. Supports only 2 and 3
Returns
-------
cache_hit_ratio : float
The cache hit ratio
References
----------
http://arxiv.org/pdf/0705.1970.pdf
"""
pdf = TruncatedZipfDist(alpha, population).pdf
r = laoutaris_characteristic_time(alpha, population, cache_size, order)
return np.sum(pdf * (1 - math.e ** -(r * pdf)))
[docs]def optimal_cache_hit_ratio(pdf, cache_size):
"""Return the value of the optimal cache hit ratio of a cache under IRM
stationary demand with a given pdf.
In practice this function returns the probability of a cache hit if cache
is filled with the *cache_size* most popular times. This value also
corresponds to the steady-state cache hit ratio of an LFU cache.
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache_size : int
The size of the cache (in number of items)
Returns
-------
cache_hit_ratio : float
The optimal cache hit ratio
"""
if cache_size >= len(pdf):
return 1.0
return sum(sorted(pdf, reverse=True)[:cache_size])
[docs]def numeric_per_content_cache_hit_ratio(pdf, cache, warmup=None, measure=None,
seed=None, target=None):
"""Numerically compute the per-content cache hit ratio of a cache under IRM
stationary demand with a given pdf.
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache : Cache
The cache object (i.e. the instance of a class subclassing
icarus.Cache)
warmup : int, optional
The number of warmup requests to generate. If not specified, it is set
to 10 times the content population
measure : int, optional
The number of measured requests to generate. If not specified, it is
set to 30 times the content population
seed : int, optional
The seed used to generate random numbers
target : int, optional
The item index [1, N] for which cache hit ratio is requested. If not
specified, the function calculates the cache hit ratio of all the items
in the population.
Returns
-------
cache_hit_ratio : array of float or float
If target is None, returns an array with the cache hit ratios of all
items in the population. If a target is specified, then it returns
the cache hit ratio of only the specified item.
"""
if warmup is None: warmup = 10 * len(pdf)
if measure is None: measure = 30 * len(pdf)
z = DiscreteDist(pdf, seed)
for _ in range(warmup):
content = z.rv()
if not cache.get(content):
cache.put(content)
cache_hits = np.zeros(len(pdf))
requests = np.zeros(len(pdf))
for _ in range(measure):
content = z.rv()
requests[content - 1] += 1
if cache.get(content):
cache_hits[content - 1] += 1
else:
cache.put(content)
hit_ratio = np.where(requests > 0, cache_hits / requests, requests)
return hit_ratio if target is None else hit_ratio[target - 1]
[docs]def numeric_cache_hit_ratio(pdf, cache, warmup=None, measure=None, seed=None):
"""Numerically compute the cache hit ratio of a cache under IRM
stationary demand with a given pdf.
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache : Cache
The cache object (i.e. the instance of a class subclassing
icarus.Cache)
warmup : int, optional
The number of warmup requests to generate. If not specified, it is set
to 10 times the content population
measure : int, optional
The number of measured requests to generate. If not specified, it is
set to 30 times the content population
seed : int, optional
The seed used to generate random numbers
Returns
-------
cache_hit_ratio : float
The cache hit ratio
"""
if warmup is None: warmup = 10 * len(pdf)
if measure is None: measure = 30 * len(pdf)
z = DiscreteDist(pdf, seed)
for _ in range(warmup):
content = z.rv()
if not cache.get(content):
cache.put(content)
cache_hits = 0
for _ in range(measure):
content = z.rv()
if cache.get(content):
cache_hits += 1
else:
cache.put(content)
return cache_hits / measure
[docs]def numeric_cache_hit_ratio_2_layers(pdf, l1_cache, l2_cache,
warmup=None, measure=None, seed=None):
"""Numerically compute the cache hit ratio of a two-layer cache under IRM
stationary demand with a given pdf.
Differently from the numeric_cache_hit_ratio function, this function
allows users to compute the hits at layer 1, layer 2 and overall.
Parameters
----------
pdf : array-like
The probability density function of an item being requested
cache : Cache
The cache object (i.e. the instance of a class subclassing
icarus.Cache)
warmup : int, optional
The number of warmup requests to generate. If not specified, it is set
to 10 times the content population
measure : int, optional
The number of measured requests to generate. If not specified, it is
set to 30 times the content population
seed : int, optional
The seed used to generate random numbers
Returns
-------
cache_hit_ratio : dict
Dictionary with keys "l1_hits", "l2_hits" and "total_hits"
"""
if warmup is None: warmup = 10 * len(pdf)
if measure is None: measure = 30 * len(pdf)
z = DiscreteDist(pdf, seed)
for _ in range(warmup):
content = z.rv()
if not l1_cache.get(content):
if not l2_cache.get(content):
l2_cache.put(content)
l1_cache.put(content)
l1_hits = 0
l1_misses = 0
l2_hits = 0
for _ in range(measure):
content = z.rv()
if l1_cache.get(content):
l1_hits += 1
else:
l1_misses += 1
if l2_cache.get(content):
l2_hits += 1
else:
l2_cache.put(content)
l1_cache.put(content)
return {
'l1_hits': l1_hits / measure,
'l2_hits': l2_hits / measure,
'total_hits': (l1_hits + l2_hits) / measure
}
[docs]def trace_driven_cache_hit_ratio(workload, cache, warmup_ratio=0.25):
"""Compute cache hit ratio of a cache under an arbitrary trace-driven
workload.
Parameters
----------
workload : list or array
List of URLs or content identifiers extracted from a trace. This list
only needs to contains content identifiers and not timestamps
cache : Cache
Instance of a cache object
warmup_ratio : float, optional
Ratio of requests of the workload used to warm up the cache (i.e. whose
cache hit/miss results are discarded)
Returns
-------
cache_hit_ratio : float
The cache hit ratio
"""
if warmup_ratio < 0 or warmup_ratio > 1:
raise ValueError("warmup_ratio must be comprised between 0 and 1")
n = len(workload)
cache_hits = 0
n_warmup = int(warmup_ratio * n)
n_req = 0
for content in workload:
if cache.get(content):
if n_req >= n_warmup:
cache_hits += 1
else:
cache.put(content)
n_req += 1
return cache_hits / (n - n_warmup)