-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.py
More file actions
133 lines (108 loc) · 3.56 KB
/
server.py
File metadata and controls
133 lines (108 loc) · 3.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from utils import *
import mimetypes, re, sqlite3, sys, time
import http.server
import socketserver
from html.parser import HTMLParser
from urllib.parse import unquote, urlparse, parse_qs
import pystache
from spotquery import query
import spot
import cgi
index = spot.Index('spot-index')
ext2type = mimetypes.types_map.copy()
class FindAndBoldTermsHTMLParser(HTMLParser):
def __init__(self):
super().__init__()
self.terms = set()
def reset(self):
super().reset()
self.html = ''
def handle_starttag(self, tag, attrs):
r = '<' + tag
for key, value in attrs:
r += f' {key}="{value}"'
r += '>'
self.html += r
def handle_endtag(self, tag):
self.html += f'</{tag}>'
def handle_data(self, data):
for term in self.terms:
data = re.sub(
re.compile(f"[^\\w\\d]({term})[^\\w\\d]", re.IGNORECASE),
r" <span class='term'>\1</span> ",
data,
)
data = re.sub(
re.compile(f"^({term})[^\\w\\d]", re.IGNORECASE),
r"<span class='term'>\1</span> ",
data,
)
# self.html += cgi.escape(data)
self.html += data
boulder = FindAndBoldTermsHTMLParser()
class MyServer(http.server.BaseHTTPRequestHandler):
def do_GET(self):
if self.path[:7] == '/search':
args = parse_qs(urlparse(self.path).query)
self.search(args)
else:
self.servefile()
def servefile(self):
path = self.path[1:]
try:
f = open(path, 'rb')
except IOError:
self.send_error(404, 'File not found')
return
self.send_response(200)
# self.send_header('Content-type', 'image/png')
self.send_header('Content-type', ext2type['.' + self.path.split('.')[-1]])
self.end_headers()
self.wfile.write(f.read())
f.close()
def search(self, args):
start_time = time.time()
print(f'search {args}')
query_text = ' ' + unquote(args.get('query', [''])[0])
try:
max_results = int(args["max_results"][0])
except:
max_results = 100
query_result = query(index, query_text, max_results = max_results+1)
if type(query_result) is str:
self.send_error(500, query_result)
return
for comment in query_result['comments']:
tokens = comment["tokens"].split(' ')
tokens = [t for t in tokens if t[:8] == 'pauthor:']
if len(tokens) > 0:
comment["pauthor"] = tokens[0][8:]
tokens = query_result['tokens']
parser = MyHTMLParser()
boulder.terms = set([t for t in tokens if (':' not in t) and (t not in '()+')])
print('tokens' ,boulder.terms)
for i in range(len(query_result['comments'])):
comment = query_result['comments'][i]
comment['subreddit'] = 'slatestarcodex' if 'slatestarcodex' in comment['permalink'] else 'TheMotte'
comment['idx'] = i + 1
parser.reset()
parser.feed(comment["body_html"])
boulder.reset()
boulder.feed(parser.alltext)
comment['body_html'] = boulder.html
with open('template.html', 'r') as f:
text = f.read()
dt = time.time() - start_time
msg = f'Over {max_results} results in %.3f seconds' % dt if len(query_result['comments']) == max_results + 1 else f'{len(query_result["comments"])} results in %.3f seconds' % dt
result = pystache.render(text, {
'comments': query_result['comments'],
'num_results_msg': msg
})
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(result.encode())
PORT = int(sys.argv[1])
with socketserver.TCPServer(("", PORT), MyServer) as httpd:
print(f'serving @ {PORT}')
httpd.serve_forever()