版权声明:本作品采用知识共享署名-非商业性使用-相同方式共享 4.0 国际许可协议进行许可。转载时请标注http://guoruibiao.blog.csdn.net https://blog.csdn.net/Marksinoberg/article/details/84861316
换出策略
前几天看到别人整理的关于缓存替换的讨论,觉得很是不错。大致有这么几种思路:
- 随机换出
- FIFO(First In First Out)
- LRU (Latest Recently Used) 最近最少使用的被换出
- LFU (Latest Frequency Used) 最不频繁使用的被换出
最后两个稍微有点绕,看似是俩不一样的思路,但是仔细想想,一个是基于时间线,另一个基于使用频次。都有一个共同的特性,抽象出来,这俩就是计算规则上的不同罢了,实际上大体也还属于同一种模式。
下面简单用代码走一遍,加深下印象。
随机换出
#coding: utf8
__author__ = "郭 璞"
__email__ = "[email protected]"
# random cache algorithm
import random
class Cacher(object):
def __init__(self, size):
self.size = size
self.container = [None for item in range(size)]
# 统计
self.hitcnt = 0
self.misscnt = 0
def _getrandkey(self, key):
"""通过随机算法找到对应的index"""
return sum([ord(item) for item in str(key)]) % self.size
def cache(self, key, value):
self.container[self._getrandkey(key)] = value
def get(self, key):
ret = self.container[self._getrandkey(key)]
print("method `get` ret: ", ret)
if ret is None:
self.misscnt += 1
else:
self.hitcnt += 1
return ret
def printself(self):
print(self.container, "misscnt:", self.misscnt, ", hitcnt: ", self.hitcnt)
if __name__ == "__main__":
cacher = Cacher(10)
cacher.cache("hello", "hellovalue")
cacher.printself()
cacher.cache("mother", "mothervalue")
cacher.printself()
print(cacher.get("hello"))
cacher.printself()
print(cacher.get("mother"))
cacher.printself()
print(cacher.get("oadsado"))
cacher.printself()
print(cacher.get("ndsdi"))
cacher.printself()
print(cacher.get("madsdsm"))
cacher.printself()
运行结果:
[None, None, 'hellovalue', None, None, None, None, None, None, None] misscnt: 0 , hitcnt: 0
[None, None, 'hellovalue', None, None, 'mothervalue', None, None, None, None] misscnt: 0 , hitcnt: 0
method `get` ret: hellovalue
hellovalue
[None, None, 'hellovalue', None, None, 'mothervalue', None, None, None, None] misscnt: 0 , hitcnt: 1
method `get` ret: mothervalue
mothervalue
[None, None, 'hellovalue', None, None, 'mothervalue', None, None, None, None] misscnt: 0 , hitcnt: 2
method `get` ret: None
None
[None, None, 'hellovalue', None, None, 'mothervalue', None, None, None, None] misscnt: 1 , hitcnt: 2
method `get` ret: None
None
[None, None, 'hellovalue', None, None, 'mothervalue', None, None, None, None] misscnt: 2 , hitcnt: 2
method `get` ret: mothervalue
mothervalue
[None, None, 'hellovalue', None, None, 'mothervalue', None, None, None, None] misscnt: 2 , hitcnt: 3
FIFO
#coding: utf8
__author__ = "郭 璞"
__email__ = "[email protected]"
# FIFO cacher
class Cacher(object):
def __init__(self, capacity):
self.capacity = capacity
self.container = [None for i in range(capacity)]
def cache(self, value):
if len(self.container) >= self.capacity:
self.container.pop(0)
self.container.append(value)
def get(self):
return self.container.pop(0)
def printself(self):
print(self.container)
if __name__ == "__main__":
cacher = Cacher(3)
cacher.cache(1)
cacher.cache(2)
cacher.cache(3)
cacher.cache(4)
cacher.printself()
cacher.cache(5)
cacher.printself()
运行结果:
[2, 3, 4]
[3, 4, 5]
LRU
#coding: utf8
__author__ = "郭 璞"
__email__ = "[email protected]"
# LRU Cacher
# 最新使用到的放到最前面
class Node(object):
def __init__(self, data, next):
self.data = data
self.next = next
class Chain(object):
"""
LRU 算法基础
"""
def __init__(self, size):
self.root = None
self.size = 0
self.MAX_SIZE = size
def printself(self):
data = []
if self.root == None:
data = []
else:
cursor = self.root
while cursor != None:
data.append(cursor.data)
cursor = cursor.next
print(data)
def prepend(self, data):
if self.size >= self.MAX_SIZE:
print("数量已达上限, 所以会删去尾部的数据")
self.removetail()
if self.root == None:
self.root = Node(data=data, next=None)
else:
tmp = Node(data=data, next=self.root)
self.root = tmp
self.size += 1
return self
def removetail(self):
if self.root == None:
return self
else:
pre = self.root
cursor = self.root.next
while cursor.next != None:
pre = cursor
cursor = cursor.next
pre.next = None
return self
def remove(self, index):
if self.root == None:
return self
if index < 0 or index > self.size:
return self
elif index == 0 and self.size > 1:
self.root = self.root.next
return self
elif index > 0 and self.size > 1:
pre = self.root
cursor = self.root.next
counter = 1
while cursor.next != None:
if counter == index:
pre.next = cursor.next
break
else:
counter += 1
pre = cursor
cursor = cursor.next
return self
def apppend(self, data):
if self.size >= self.MAX_SIZE:
print("数量已达上限")
return self
if self.root == None:
self.root = Node(data=data, next=None)
else:
cursor = self.root
while cursor.next != None:
cursor = cursor.next
tmp = Node(data=data, next=None)
cursor.next = tmp
self.size += 1
return self
def index(self, data):
counter = 0
hit = 0
if self.root != None:
counter = 0
hit = 0
cursor = self.root
while cursor != None:
if cursor.data == data:
hit = 1
break
else:
cursor = cursor.next
counter += 1
return (counter, hit)
def cache(self, data):
if self.root == None :
return self.prepend(data)
else:
index, hit = self.index(data)
if hit == 0:
return self.prepend(data)
else:
return self.remove(index).prepend(data)
if __name__ == "__main__":
chain = Chain(5)
# chain.prepend(1).prepend(2).prepend(3).prepend(4).prepend(5).prepend(6).prepend(7).printself()
# print(chain.index(3))
chain.cache(1).cache(2).cache(3).cache(4).cache(5).cache(6).printself()
chain.cache(3).cache(2).printself()
运行结果:
数量已达上限, 所以会删去尾部的数据
[6, 5, 4, 3, 2]
数量已达上限, 所以会删去尾部的数据
数量已达上限, 所以会删去尾部的数据
[2, 3, 6, 5]
整理
经过上面几个小例子,不难看出有如下几个特点:
- 随机算法有一定的误伤,编码简单,小容量下效率还行。
- FIFO误伤率低,但是不符合**“程序访问的局部性原理”**,热点数据不一定能留的下来。
- LRU 相对而言还算靠谱,也符合缓存的生存周期。