diff --git a/347TopKFrequentElements.md b/347TopKFrequentElements.md new file mode 100644 index 0000000..66e1281 --- /dev/null +++ b/347TopKFrequentElements.md @@ -0,0 +1,181 @@ +### Step 1 +- 最初に思いついたのはヒープを使う方法 + - numとその発生回数をマップに溜めた後、順にminヒープにpushしていく。ヒープのノード数は常にkで抑え、残ったものが最大頻度をもつk個の要素。 +- ヒープのノード数を常にk以下に抑えるので、時間計算量はO(n logk) + - 見積もり実行時間:10^5 * log 10^5 ≒ 10^6。これを1億で割って、10^-2 s = 10 ms。 + - よって、最悪のケースで10msかかるだろう。 + - 空間計算量はO(n) +- テストケースで細かいミスを修正した後に一発でACしたのは嬉しかった + +```Go +type numCount struct { + n int + cnt int +} + +type numCountMinHeap []numCount + +func (h numCountMinHeap) Len() int { return len(h) } +func (h numCountMinHeap) Less(i, j int) bool { return h[i].cnt < h[j].cnt } +func (h numCountMinHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } + +func (h *numCountMinHeap) Push(x any) { *h = append(*h, x.(numCount)) } + +func (h *numCountMinHeap) Pop() any { + n := len(*h) + min := (*h)[n-1] + *h = (*h)[:n-1] + return min +} + +func (h numCountMinHeap) top() numCount { return h[0] } + +func topKFrequent(nums []int, k int) []int { + numCntHashMap := make(map[int]int) // {num: cnt} + for _, n := range nums { + if _, exist := numCntHashMap[n]; exist { + numCntHashMap[n]++ + continue + } + numCntHashMap[n] = 1 + } + + h := &numCountMinHeap{} + for n, c := range numCntHashMap { + if h.Len() == k && c <= h.top().cnt { + continue + } + heap.Push(h, numCount{n: n, cnt: c}) + if h.Len() > k { + heap.Pop(h) + } + } + + topKFrequentElems := []int{} + for h.Len() > 0 { + top := heap.Pop(h).(numCount) + topKFrequentElems = append(topKFrequentElems, top.n) + } + + return topKFrequentElems +} +``` + +### Step 2 +- step1をブラッシュアップ +- Goのint型のデフォルト値(nil)が0であることを使い、最初のループの中の発生頻度を数える処理を簡潔にした +- 問題文ではfrequencyという単語が使われいたので、cntからfreqに変更した + +```Go +type numFrequency struct { + n int + freq int +} + +type numFreqMinHeap []numFrequency + +func (h numFreqMinHeap) Len() int { return len(h) } +func (h numFreqMinHeap) Less(i, j int) bool { return h[i].freq < h[j].freq } +func (h numFreqMinHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } + +func (h *numFreqMinHeap) Push(x any) { *h = append(*h, x.(numFrequency)) } + +func (h *numFreqMinHeap) Pop() any { + n := len(*h) + min := (*h)[n-1] + *h = (*h)[:n-1] + return min +} + +func (h numFreqMinHeap) top() numFrequency { return h[0] } + +func topKFrequent(nums []int, k int) []int { + numFreqHashMap := make(map[int]int) // {num: freq} + for _, n := range nums { + numFreqHashMap[n]++ + } + + h := &numFreqMinHeap{} + for n, f := range numFreqHashMap { + if h.Len() == k && f <= h.top().freq { + continue + } + heap.Push(h, numFrequency{n: n, freq: f}) + if h.Len() > k { + heap.Pop(h) + } + } + + topKFreqElems := []int{} + for h.Len() > 0 { + top := heap.Pop(h).(numFrequency) + topKFreqElems = append(topKFreqElems, top.n) + } + + return topKFreqElems +} +``` + +- Discordで他の人の解答を見ていたら、バケットソートを使っている人がいたので真似をした +- 時間計算量O(n)で実現できており、感動 + - 見積もり実行時間:1ms +- 参照:https://github.com/hayashi-ay/leetcode/pull/60/files + +```Go +func topKFrequent(nums []int, k int) []int { + numToFreq := make(map[int]int) + maxFreq := 0 + for _, n := range nums { + numToFreq[n]++ + if numToFreq[n] > maxFreq { + maxFreq = numToFreq[n] + } + } + + numFreqBuckets := make([][]int, maxFreq + 1) + for n, f := range numToFreq { + numFreqBuckets[f] = append(numFreqBuckets[f], n) + } + + topKFreqElems := []int{} + for i := maxFreq; i >= 0; i-- { + if len(topKFreqElems) >= k { + break + } + topKFreqElems = append(topKFreqElems, numFreqBuckets[i]...) + } + return topKFreqElems +} +``` + +### Step 3 +- 最終的なコード。Step2のバケットソートでやった +- step2からの変更箇所 + - maxFreq変数の更新にビルトインのmax関数を使った。2023年にリリースされたGo1.21で追加されたものらしいが、知らなかった。使う言語の最新verを追うことも大事 + - 最後のループのbreakする部分をtopKFreqElemsの更新の後にした。その方が自然 + +```Go +func topKFrequent(nums []int, k int) []int { + numToFreq := make(map[int]int) + maxFreq := 0 + for _, n := range nums { + numToFreq[n]++ + maxFreq = max(maxFreq, numToFreq[n]) + } + + numFreqBuckets := make([][]int, maxFreq + 1) + for n, f := range numToFreq { + numFreqBuckets[f] = append(numFreqBuckets[f], n) + } + + topKFreqElems := []int{} + for i := maxFreq; i >= 0; i-- { + topKFreqElems = append(topKFreqElems, numFreqBuckets[i]...) + if len(topKFreqElems) >= k { + break + } + } + + return topKFreqElems +} +``` diff --git a/step4.md b/step4.md new file mode 100644 index 0000000..cbe1914 --- /dev/null +++ b/step4.md @@ -0,0 +1,98 @@ +### Step 4 +- ヒープ +- 修正点 + - `numFrequency`構造体のフィールドは、広い行数に渡って使用されるので省略しすぎない + +```Go +type numFrequency struct { + num int + frequency int +} + +type numFrequencyHeap []numFrequency + +func (h numFrequencyHeap) Len() int { return len(h) } +func (h numFrequencyHeap) Less(i, j int) bool { return h[i].frequency < h[j].frequency } +func (h numFrequencyHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } + +func (h *numFrequencyHeap) Push(x any) { + *h = append(*h, x.(numFrequency)) +} + +func (h *numFrequencyHeap) Pop() any { + l := len(*h) + min := (*h)[l-1] + *h = (*h)[:l-1] + return min +} + +func (h numFrequencyHeap) top() (numFrequency, error) { + if h.Len() == 0 { + return numFrequency{num: 0, frequency: 0}, errors.New("Empty heap") + } + return h[0], nil +} + +func topKFrequent(nums []int, k int) []int { + numToFrequency := make(map[int]int) + for _, n := range nums { + numToFrequency[n]++ + } + + h := &numFrequencyHeap{} + heap.Init(h) + + for n, freq := range numToFrequency { + top, _ := h.top() + if h.Len() == k && freq <= top.frequency { + continue + } + + heap.Push(h, numFrequency{num: n, frequency: freq}) + if h.Len() > k { + heap.Pop(h) + } + } + + ans := make([]int, 0, k) + for h.Len() > 0 { + top := heap.Pop(h).(numFrequency) + fmt.Println(top) + ans = append(ans, top.num) + } + + return ans +} +``` + +- バケットソート + +```Go +func topKFrequent(nums []int, k int) []int { + numToFrequency := make(map[int]int) + maxFrequency := 0 + for _, n := range nums { + numToFrequency[n]++ + maxFrequency = max(maxFrequency, numToFrequency[n]) + } + + numFreqBuckets := make([][]int, maxFrequency+1) + for n, freq := range numToFrequency { + numFreqBuckets[freq] = append(numFreqBuckets[freq], n) + } + + ans := make([]int, 0, k) + for i := len(numFreqBuckets) - 1; i >= 0; i-- { + if len(numFreqBuckets[i]) == 0 { + continue + } + + ans = append(ans, numFreqBuckets[i]...) + if len(ans) == k { + break + } + } + + return ans +} +```