-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathashton_and_string.py
More file actions
executable file
·100 lines (79 loc) · 3.1 KB
/
ashton_and_string.py
File metadata and controls
executable file
·100 lines (79 loc) · 3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python3
import itertools
import os
import sys
from pathlib import Path
from typing import IO
def ashtonString(s: str, k: int) -> str:
N = len(s)
suffixRank = [0] * N
# Example "abaab"
# Suffix Array for this (2, 3, 0, 4, 1)
# Create a tuple to store rank for each suffix
# struct myTuple {
# int originalIndex; // stores original index of suffix
# int firstHalf; // store rank for first half of suffix
# int secondHalf; // store rank for second half of suffix
# };
L = [(0, 0, 0) for _ in range(N)]
# suffixRank is table hold the rank of each string
# Initialize suffix ranking on the basis of only single character
# for single character ranks will be 'a' = 0, 'b' = 1, 'c' = 2 ... 'z' = 25
suffixRank = [ord(s[i]) - ord("a") for i in range(N)]
# Iterate log(n) times i.e. till when all the suffixes are sorted
# 'stp' keeps the track of number of iteration
# 'cnt' store length of suffix which is going to be compared
#
# On each iteration we initialize tuple for each suffix array
# with values computed from previous iteration
cnt = 1
stp = 1
while cnt < N:
for i in range(N):
L[i] = (i, suffixRank[i], suffixRank[i + cnt] if i + cnt < N else -1)
# On the basis of tuples obtained sort the tuple array
# L.sort(key = cmp_to_key(lambda a, b: a[2] - b[2] if a[1] == b[1] else a[1] - b[1]))
L.sort(key=lambda a: a[1] * 2**24 + a[2]) # faster than using `cmp_to_key`
# Initialize rank for rank 0 suffix after sorting to its original index
# in suffixRank array
suffixRank[L[0][0]] = 0
currRank = 0
for i in range(1, N):
# compare ith ranked suffix (after sorting) to (i - 1)th ranked suffix
# if they are equal till now assign same rank to ith as that of (i - 1)th
# else rank for ith will be currRank (i.e. rank of (i - 1)th) plus 1,
# i.e (currRank + 1)
if L[i - 1][1] != L[i][1] or L[i - 1][2] != L[i][2]:
currRank += 1
suffixRank[L[i][0]] = currRank
cnt *= 2
stp += 1
# loop all ordered substrings
seen = set()
length = 0
for i in range(N):
index = L[i][0]
for c in itertools.accumulate(itertools.islice(s, index, N)):
# more memory efficient to store hash of strings seen, instead of strings
hash_c = hash(c)
if hash_c in seen: # check if unique
continue
seen.add(hash_c)
length += len(c)
if length >= k:
return c[k - length - 1]
raise NotImplementedError("should not happen")
def main(fptr: IO) -> None:
t = int(input().strip())
for _ in range(t):
s = input()
k = int(input().strip())
res = ashtonString(s, k)
fptr.write(str(res) + "\n")
if __name__ == "__main__":
if path := os.getenv("OUTPUT_PATH"):
with Path(path).open("wt", encoding="utf-8") as fptr:
main(fptr)
fptr.close()
else:
main(sys.stdout)