From ae55083bae7b8f823218c8c633bdab0b19cea2eb Mon Sep 17 00:00:00 2001 From: Taito Ohsumi Date: Mon, 27 Jan 2025 22:29:55 +1100 Subject: [PATCH 1/2] Add 146. LRU Cache.md --- 146. LRU Cache.md | 489 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 489 insertions(+) create mode 100644 146. LRU Cache.md diff --git a/146. LRU Cache.md b/146. LRU Cache.md new file mode 100644 index 0000000..5fa7e99 --- /dev/null +++ b/146. LRU Cache.md @@ -0,0 +1,489 @@ +# step 1 +doubly linked listを使ってた記憶があったが、アクセス履歴とデータをまとめて管理しようとして +よくわからなくなった。https://llego.dev/posts/implement-lru-cache-python/ をみて、 +アクセス履歴をdeque(double linked list)で、keyとデータの組み合わせはdictでやれば良さそう +だったのでその通りに実装した。 + +1回目は30分程度実装に時間がかかった。 +インクリメンタルにListNode、deque、cacheの順に作ろうとして、 +dequeでどういうメソッドが必要なのかわからなくなって、 +cacheの実装を中途半端に進めるみたいにあっちこっち見てたらよくわからなくなった。 + +2回目は、とりあえず、dequeがあるものとしてcacheからつくり、dequeで用意すべきメソッドを確認してから +dequeを作成。時間は5分程度で済んだ。 + +```python +class ListNode: + def __init__( + self, + value: int = 0, + prev_node: ListNode = None, + next_node: ListNode = None + ): + self.value = value + self.prev = prev_node + self.next = next_node + + +class Deque: + def __init__(self): + self.dummy_head = ListNode() + self.dummy_tail = ListNode() + + self.dummy_head.next = self.dummy_tail + self.dummy_tail.prev = self.dummy_head + + def append(self, value: int) -> None: + node = ListNode(value=value) + prev_node = self.dummy_tail.prev + next_node = self.dummy_tail + + prev_node.next = node + next_node.prev = node + node.prev = prev_node + node.next = next_node + + def remove(self, value: int) -> int: + node = self.dummy_head.next + while node != self.dummy_tail: + if node.value == value: + return self._remove(node) + node = node.next + raise IndexError( + "Deque.remove(value): No value found: " + f"value = {value}" + ) + + def popleft(self) -> int: + if self.dummy_head.next == self.dummy_tail: + raise IndexError("Deque.popleft(): deque is empty") + return self._remove(self.dummy_head.next) + + def _remove(self, node: ListNode) -> int: + prev_node = node.prev + next_node = node.next + + prev_node.next = next_node + next_node.prev = prev_node + return node.value + +class LRUCache: + def __init__(self, capacity: int): + self.cache = {} + self.access_history = Deque() + self.capacity = capacity + + def get(self, key: int) -> int: + if key not in self.cache: + return -1 + self.access_history.remove(key) + self.access_history.append(key) + return self.cache[key] + + def put(self, key: int, value: int) -> None: + if key in self.cache: + self.access_history.remove(key) + self.access_history.append(key) + self.cache[key] = value + elif len(self.cache) == self.capacity: + oldest_key = self.access_history.popleft() + del self.cache[oldest_key] + self.access_history.append(key) + self.cache[key] = value + else: + self.access_history.append(key) + self.cache[key] = value +``` + +type hintは今回の問題で固有のものとした。 + +builtinのnext関数と被るのがきになったため、deque内で、`next_node`,`prev_node`と、わざわざ`_node`をいれた. + +LRUCache内で、access_historyとしてLRUで表現したが、厳密な履歴ではないからもっと良い +名前もつけられそう。 + +LRUCache.put()について、 +```python +self.access_history.append(key) +self.cache[key] = value +``` +はすべての分岐で同じ処理だが、まとめない方がわかりやすい気がしてまとめなかった。 +まとめないと、 +1. cache上にアクセス履歴がある場合は、それを消して、一番後ろに履歴を持っていき、cacheを更新します +1. cache上に履歴がなくcacheがいっぱいの時は、一番古いものを削除して、cacheに新しい値と履歴を追加します。 +1. それ以外の場合は、ただcacheに新しい値と履歴を追加します。 + +と上から下に読みやすいように感じるが、これは自分が読み慣れてないことが原因なんでしょうか? + +まとめた場合は、 +```python +if key in self.cache: + self.access_history.remove(key) +elif len(self.cache) == self.capacity: + oldest_key = self.access_history.popleft() + del self.cache[oldest_key] +self.access_history.append(key) +self.cache[key] = value +``` + +nをcapacityとして、 +- time complexity: + - get: O(ん) + - put: O(n) +- space complexity: + - get: O(n) (Aux: O(1)) + - put: O(n) (Aux: O(1)) + +time complexityのO(n)はdequeのremoveによるものなので、cacheのkeyはそのままで、valueを +ノードに変えれば良さそう。 + +```python +class Node: + def __init__( + self, + key: int = 0, + value: int = 0, + prev=None, + next=None, + ): + self.key = key + self.value = value + self.prev = prev + self.next = next + + +class LRUCache: + def __init__(self, capacity: int): + if capacity < 1: + raise ValueError( + "LRUCache.__init__(): capacity is a negative number: " + f"capacity = {capacity}" + ) + self.dummy_head = Node() + self.dummy_tail = Node() + self.dummy_head.next = self.dummy_tail + self.dummy_tail.prev = self.dummy_head + self.cache = {} + self.capacity = capacity + + def get(self, key: int) -> int: + if key not in self.cache: + return -1 + node = self._pop(key) + self._append(node) + return node.value + + def put(self, key: int, value: int) -> None: + if key in self.cache: + node = self._pop(key) + node.value = value + self._append(node) + elif len(self.cache) == self.capacity: + self._popleft() + node = Node(key, value) + self.cache[node.key] = node + self._append(node) + else: + node = Node(key, value) + self.cache[node.key] = node + self._append(node) + + def _pop(self, key: int) -> Node: + node = self.cache[key] + next_node = node.next + prev_node = node.prev + + next_node.prev = prev_node + prev_node.next = next_node + node.next = None + node.prev = None + return node + + def _popleft(self) -> Node: + oldest_node = self.dummy_head.next + oldest_key = oldest_node.key + next_node = oldest_node.next + prev_node = oldest_node.prev + + next_node.prev = prev_node + prev_node.next = next_node + oldest_node.next = None + oldest_node.prev = None + + del self.cache[oldest_key] + + return oldest_node + + def _append(self, node: Node) -> None: + next_node = self.dummy_tail + prev_node = self.dummy_tail.prev + + next_node.prev = node + prev_node.next = node + node.next = next_node + node.prev = prev_node +``` + +Nodeで`from __future__ import annotations`をファイル先頭にいれて、 +prev, nextも`Optional[Node]`とtype hintを入れられるようにしたかったが、 +leetcode上だと動かなかった。 +`SyntaxError: from __future__ imports must occur at the beginning of the file` + +keyとLRUで保持しているノードの対応をキャッシュとした。 + +- time complexity + - get: O(n) (hashの被りによる), 平均O(1) + - put: O(n) (hashの被りによる), 平均O(1) +- space complexity + - get: O(n) (Aux: O(1)) + - put: O(n) (Aux: O(1)) + +# step 2 +- https://github.com/Mike0121/LeetCode/pull/49/files +- https://github.com/fhiyo/leetcode/pull/9/files +- https://docs.python.org/3/library/collections.html#collections.deque.popleft + - pythonのライブラリのdequeのpopleftでも、空の時はIndexErrorが発生する。 + - なんでLookUpErrorじゃないのかよくわからない +- https://github.com/Mike0121/LeetCode/pull/49/files#r1817874775 + - dummy_head, tailを用意しなくても、サイクルを作るようにsentinelだけ用意すればよい +- valueでなくnodeを保持する場合、cacheよりkey_to_nodeとかのがわかりやすい +- https://github.com/python/cpython/blob/3.12/Lib/functools.py#L482 + - decoratorで使う時の実装 + - doubly linked listをリスト構造を使って表現していた。 + - https://github.com/python/cpython/blob/3.12/Lib/functools.py#L540-L541 + + ```python + root = [] # root of the circular doubly linked list + root[:] = [root, root, None, None] # initialize by pointing to self + ``` + - こんな書き方できるんだ。すごい +- deque + - https://github.com/python/cpython/blob/main/Modules/_collectionsmodule.c + - 一つのデータに対してノードを一つではなく、大きめのチャンクを用意してmallocの手間を減らしている。 + - それを除けば、doubly linked listっぽい + - https://docs.python.org/3/library/collections.html#deque-objects +- orderedDict + - [cpythonの実装](https://github.com/python/cpython/blob/7d275611f62c9008c2d90b08c9f21462f80a8328/Lib/collections/__init__.py#L89) + - keyからdoubly linked listへのマッピング + - https://docs.python.org/3/library/collections.html#collections.OrderedDict + + +deque利用 +```python +from collections import deque + + +class LRUCache: + def __init__(self, capacity: int): + if capacity < 1: + raise ValueError( + "LRUCache.__init__(): capacity is a negative number: " + f"capacity = {capacity}" + ) + self.access_history = deque() + self.cache = {} + self.capacity = capacity + + def get(self, key: int) -> int: + if key not in self.cache: + return -1 + self.access_history.remove(key) + self.access_history.append(key) + return self.cache[key] + + def put(self, key: int, value: int) -> None: + if key in self.cache: + self.access_history.remove(key) + elif len(self.cache) == self.capacity: + oldest_key = self.access_history.popleft() + del self.cache[oldest_key] + self.access_history.append(key) + self.cache[key] = value +``` + +OrderedDict利用 +```python +from collections import OrderedDict + + +class LRUCache: + def __init__(self, capacity: int): + self.capacity = capacity + self.key_to_value = OrderedDict() + + def get(self, key: int) -> int: + if key not in self.key_to_value: + return -1 + self.key_to_value.move_to_end(key) + return self.key_to_value[key] + + def put(self, key: int, value: int) -> None: + if key in self.key_to_value: + self.key_to_value.move_to_end(key) + elif len(self.key_to_value) == self.capacity: + self.key_to_value.popitem(last=False) + self.key_to_value[key] = value +``` + +deque実装 +```python +class DoublyLinkedList: + class ListNode: + def __init__(self, value=0, prev=None, next=None): + self.value = value + self.prev = prev + self.next = next + + def __init__(self): + self.sentinel = DoublyLinkedList.ListNode() + self.sentinel.next = self.sentinel + self.sentinel.prev = self.sentinel + + def append(self, value: int) -> None: + node = DoublyLinkedList.ListNode(value=value) + next_node = self.sentinel + prev_node = self.sentinel.prev + + node.next = next_node + node.prev = prev_node + next_node.prev = node + prev_node.next = node + + def remove(self, value: int) -> int: + node = self.sentinel.next + while node != self.sentinel: + if node.value == value: + return self._remove(node) + node = node.next + raise IndexError( + "DoublyLinkedList.remove(value): No item found: " + f"value = {value}" + ) + + def popleft(self) -> int: + node = self.sentinel.next + return self._remove(node) + + def _remove(self, node) -> int: + next_node = node.next + prev_node = node.prev + + next_node.prev = prev_node + prev_node.next = next_node + + return node.value + +class LRUCache: + def __init__(self, capacity: int): + self.access_history = DoublyLinkedList() + self.cache = {} + self.capacity = capacity + + def get(self, key: int) -> int: + if key not in self.cache: + return -1 + self.access_history.remove(key) + self.access_history.append(key) + return self.cache[key] + + def put(self, key: int, value: int) -> None: + if key in self.cache: + self.access_history.remove(key) + elif len(self.cache) == self.capacity: + oldest_key = self.access_history.popleft() + del self.cache[oldest_key] + self.access_history.append(key) + self.cache[key] = value +``` + +OrderedDict実装 +```python +class LRUCache: + class ListNode: + def __init__(self, key: int = 0, value: int = 0, prev=None, next=None): + self.key = key + self.value = value + self.prev = prev + self.next = next + + def __init__(self, capacity: int): + self.sentinel = LRUCache.ListNode() + self.sentinel.next = self.sentinel + self.sentinel.prev = self.sentinel + self.capacity = capacity + self.key_to_node = {} + + def get(self, key: int) -> int: + if key not in self.key_to_node: + return -1 + node = self.key_to_node[key] + self._remove(node) + self._append(node) + return node.value + + def put(self, key: int, value: int) -> None: + if key in self.key_to_node: + node = self.key_to_node[key] + self._remove(node) + self._append(node) + node.value = value + elif len(self.key_to_node) == self.capacity: + oldest_node = self._popleft() + del self.key_to_node[oldest_node.key] + node = LRUCache.ListNode(key=key, value=value) + self._append(node) + self.key_to_node[key] = node + else: + node = LRUCache.ListNode(key=key, value=value) + self._append(node) + self.key_to_node[key] = node + + def _remove(self, node): + next_node = node.next + prev_node = node.prev + + next_node.prev = prev_node + prev_node.next = next_node + node.next = None + node.prev = None + return node + + def _popleft(self): + if self.sentinel.next == self.sentinel: + raise IndexError("LRUCache._popleft(): cache is empty") + node = self.sentinel.next + return self._remove(node) + + def _append(self, node): + next_node = self.sentinel + prev_node = self.sentinel.prev + + next_node.prev = node + prev_node.next = node + node.next = next_node + node.prev = prev_node +``` + +# step 3 +```python +from collections import OrderedDict + + +class LRUCache: + def __init__(self, capacity: int): + self.key_to_value = OrderedDict() + self.capacity = capacity + + def get(self, key: int) -> int: + if key not in self.key_to_value: + return -1 + self.key_to_value.move_to_end(key) + return self.key_to_value[key] + + def put(self, key: int, value: int) -> None: + if key in self.key_to_value: + self.key_to_value.move_to_end(key) + elif len(self.key_to_value) == self.capacity: + self.key_to_value.popitem(last=False) + self.key_to_value[key] = value +``` \ No newline at end of file From 84ceab69e3167dfec286c5f7e3f90fc28fed59fb Mon Sep 17 00:00:00 2001 From: Taito Ohsumi Date: Tue, 4 Feb 2025 22:40:11 +1100 Subject: [PATCH 2/2] Add step 4 --- 146. LRU Cache.md | 213 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 213 insertions(+) diff --git a/146. LRU Cache.md b/146. LRU Cache.md index 5fa7e99..2ad18bc 100644 --- a/146. LRU Cache.md +++ b/146. LRU Cache.md @@ -486,4 +486,217 @@ class LRUCache: elif len(self.key_to_value) == self.capacity: self.key_to_value.popitem(last=False) self.key_to_value[key] = value +``` + +# step 4 +コメントまとめ +- あと、「消す」メソッドがあってもいいかもしれません。 +(データ構造を整合性を保って触る最小単位が何になっているかを考えると、 +そういう関数があってもいいでしょう。そういうものだけを使って動かしていると、 +何らかの意味で各関数が整合性を保って全体が動いていることが分かりやすくなります。) + - アクセス履歴とデータが別である場合、片方のみに対して行う操作をしていることがあった。 + そこを解消することを意識したら、少しだけ読みやすくなった。 + +step 2でやってなかった、pythonのlru_cacheの定義っぽく書いたもの +```python +class LRUCache: + def __init__(self, capacity: int): + self.capacity = capacity + self.key_to_node = {} + self.sentinel = [] + self.sentinel[:] = [self.sentinel, self.sentinel, None, None] + self.PREV = 0 + self.NEXT = 1 + self.KEY = 2 + self.VALUE = 3 + + def get(self, key: int) -> int: + if key not in self.key_to_node: + return -1 + value = self._remove(key) + self._append(key, value) + return value + + def put(self, key: int, value: int) -> None: + if key in self.key_to_node: + self._remove(key) + elif len(self.key_to_node) == self.capacity: + self.evict() + self._append(key, value) + + def evict(self) -> Tuple[int, int]: + LRU_node = self.sentinel[self.NEXT] + self._remove(LRU_node[self.KEY]) + return LRU_node[self.KEY], LRU_node[self.VALUE] + + def _remove(self, key: int) -> int: + node = self.key_to_node[key] + del self.key_to_node[key] + + prev_node = node[self.PREV] + next_node = node[self.NEXT] + + prev_node[self.NEXT] = next_node + next_node[self.PREV] = prev_node + + return node[self.VALUE] + + def _append(self, key: int, value: int) -> None: + node = [None, None, key, value] + self.key_to_node[key] = node + + prev_node = self.sentinel[self.PREV] + next_node = self.sentinel + + prev_node[self.NEXT] = node + next_node[self.PREV] = node + node[self.PREV] = prev_node + node[self.NEXT] = next_node +``` + +deque:cacheとアクセス履歴を一緒に触るような関数にしたら、内部実装は違えど、 +ほとんど、上の解法と同じになった。 +```python +from collections import deque + + +class LRUCache: + def __init__(self, capacity: int): + self.capacity = capacity + self.key_to_value = {} + self.access_history = deque() + + def get(self, key: int) -> int: + if key not in self.key_to_value: + return -1 + value = self._remove(key) + self._append(key, value) + return value + + def put(self, key: int, value: int) -> None: + if key in self.key_to_value: + self._remove(key) + elif len(self.key_to_value) == self.capacity: + self.evict() + self._append(key, value) + + def evict(self) -> Tuple[int, int]: + if not self.key_to_value: + raise IndexError("LRUCache.evict(): cache is empty") + LRU_key = self.access_history.popleft() + LRU_value = self.key_to_value[LRU_key] + del self.key_to_value[LRU_key] + return LRU_key, LRU_value + + def _remove(self, key: int) -> int: + if key not in self.key_to_value: + raise IndexError( + f"LRUCache._remove(): key not found: key = {key}" + ) + value = self.key_to_value[key] + del self.key_to_value[key] + self.access_history.remove(key) + return value + + def _append(self, key: int, value: int) -> None: + self.key_to_value[key] = value + self.access_history.append(key) +``` + +OrderedDict import +```python +from collections import OrderedDict + + +class LRUCache: + def __init__(self, capacity: int): + self.cache = OrderedDict() + self.capacity = capacity + + def get(self, key: int) -> int: + if key not in self.cache: + return -1 + self.cache.move_to_end(key) + return self.cache[key] + + def put(self, key: int, value: int) -> None: + if key in self.cache: + self.cache.move_to_end(key) + elif len(self.cache) == self.capacity: + self.cache.popitem(last=False) + self.cache[key] = value +``` + +OrderedDict 実装 +```python +class LRUCache: + class ListNode: + def __init__( + self, + key: int = 0, + value: int = 0, + prev_node=None, + next_node=None + ): + self.key = key + self.value = value + self.prev = prev_node + self.next = next_node + + def __init__(self, capacity: int): + self.capacity = capacity + self.key_to_node = {} + self.sentinel = LRUCache.ListNode() + self.sentinel.next = self.sentinel + self.sentinel.prev = self.sentinel + + def get(self, key: int) -> int: + if key not in self.key_to_node: + return -1 + value = self._remove(key) + self._append(key, value) + return value + + def put(self, key: int, value: int) -> None: + if key in self.key_to_node: + self._remove(key) + elif len(self.key_to_node) == self.capacity: + self.evict() + self._append(key, value) + + def evict(self) -> Tuple[int, int]: + if not self.key_to_node: + raise IndexError("LRUCache.evict(): cache is empty") + LRU_key = self.sentinel.next.key + LRU_value = self.sentinel.next.value + self._remove(LRU_key) + return LRU_key, LRU_value + + def _append(self, key: int, value: int) -> None: + node = LRUCache.ListNode(key, value) + self.key_to_node[key] = node + + prev_node = self.sentinel.prev + next_node = self.sentinel + + prev_node.next = node + next_node.prev = node + node.next = next_node + node.prev = prev_node + + def _remove(self, key: int) -> int: + if key not in self.key_to_node: + raise IndexError( + f"LRUCache._remove(): key not found in cache: key = {key}" + ) + node = self.key_to_node[key] + del self.key_to_node[key] + + prev_node = node.prev + next_node = node.next + + prev_node.next = next_node + next_node.prev = prev_node + + return node.value ``` \ No newline at end of file