Arduino-Source/SerialPrograms/Scripts/image_viewer.py at main · PokemonAutomation/Arduino-Source · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
#!python3

"""
A simple image viewer (using OpenCV) with basic ability to check pixel location and color.
It can also draw boxes on the image and print their locations. Useful for writing
PokemonAutomation visual inference methods.

Single left clicking on a pixel shows you the info of that pixel. Note it may also print the
alpha channel value.

- Press 'w', 's', 'a', 'd' to move the selected pixels around.

To draw a box: left click and drag the rectangle.
You can draw multiple boxes on the screen.

- Press 'i' to dump the information of those boxes so you can copy them into the code,
  or into check_detector_regions.py to fine tune the boxes.

- Press 'backspace/delete' to delete the current selected box. If no box is selected,
  delete the last added box. Select an existing box by right clicking.

- Press 'ESC' to exit the program.
"""


import argparse
import cv2
import numpy as np

class ImageViewer:
	def __init__(self, image: np.ndarray, highlight_list = []):
		if image.ndim == 2:
			# convert gray scale image to RGB image
			image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
		self.image = image  # bgr or bgra channel order
		self.nc = image.shape[2]  # num_channel
		if self.nc == 4:
			alpha_free_image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
		else:
			alpha_free_image = image
		self.hsv_image = cv2.cvtColor(alpha_free_image, cv2.COLOR_BGR2HSV)
		self.selected_pixel = (-1, -1)
		self.buffer = image.copy()
		self.window_name = 'image'
		self.height = image.shape[0]
		self.width = image.shape[1]
		self.highlight_list = highlight_list
		self.cur_highlight_index = -1
		self.rects = [] # List of Tuple[int,int,int,int]: each tuple: start_x, start_y, end_x, end_y
		self.cur_rect_index = -1
		self.mouse_down = False
		self.mouse_move_counter = 0
		# The size of the cross used to highlight a selected pixel
		self.cross_size = max(1, min(self.width, self.height) // 200)

	def _solid_color(self, color):
		return color if self.image.shape[2] == 3 else color + [255]

	def _set_color_to_buffer(self, coord, color):
		x, y = coord
		if x >= 0 and x < self.width and y >= 0 and y < self.height:
			self.buffer[y, x] = self._solid_color(color)

	def update_buffer(self) -> None:
		self.buffer = self.image.copy()
		if self.selected_pixel[0] >= 0 and self.selected_pixel[1] >= 0:
			p = self.selected_pixel
			# Draw a red cross
			self._set_color_to_buffer(p, color=[0, 0, 255])
			for i in range(1, self.cross_size):
				self._set_color_to_buffer((p[0]-i, p[1]), color=[0, 0, 255])
				self._set_color_to_buffer((p[0]+i, p[1]), color=[0, 0, 255])
				self._set_color_to_buffer((p[0], p[1]-i), color=[0, 0, 255])
				self._set_color_to_buffer((p[0], p[1]+i), color=[0, 0, 255])

		if self.cur_highlight_index >= 0 and len(self.highlight_list) > 0:
			for pixel in self.highlight_list[self.cur_highlight_index]:
				self.buffer[pixel[1], pixel[0]] = self._solid_color([255, 0, 0])

		for i, rect in enumerate(self.rects):
			# print(f"In render(): {rect}")
			# rect: [start_x start_y end_x end_y]
			width = 2
			color = (0, 0, 255) if i == self.cur_rect_index else (255, 0, 0)
			self.buffer = cv2.rectangle(self.buffer, (rect[0], rect[1]), (rect[2], rect[3]), color, width)

	def _render(self) -> None:
		self.update_buffer()
		cv2.imshow(self.window_name, self.buffer)
		# self.fullscreen = False

	def _change_selected_rect(self, x, y):
		if len(self.rects) == 0:
			return

		min_dist = 0
		for i, rect in enumerate(self.rects):
			min_x = min(rect[0], rect[2])
			max_x = max(rect[0], rect[2])
			min_y = min(rect[1], rect[3])
			max_y = max(rect[1], rect[3])
			dist_x = min_x - x if x <= min_x else (x - max_x if x >= max_x else 0)
			dist_y = min_y - y if y <= min_y else (y - max_y if y >= max_y else 0)
			dist = dist_x ** 2 + dist_y ** 2
			if i == 0 or dist < min_dist:
				min_dist = dist
				self.cur_rect_index = i
		print(f"Selected rect No.{self.cur_rect_index}/{len(self.rects)}: {self.rects[self.cur_rect_index]}.")

	def _print_pixel(self, coord):
		p = self.image[coord[1], coord[0]]
		msg = f"Pixel (x,y) = ({coord[0]}, {coord[1]}), ({coord[0]/self.width:0.3}, {coord[1]/self.height:0.3}), "
		if self.nc == 4:
			msg += f"argb=[{p[3]}, {p[2]}, {p[1]}, {p[0]}], {hex(((int(p[3])*256+int(p[2]))*256+int(p[1]))*256+int(p[0]))}"
		else:
			msg += f"rgb=[{p[2]}, {p[1]}, {p[0]}]"
		p = self.hsv_image[coord[1], coord[0]]
		msg += f", hsv=[{p[0]}, {p[1]}, {p[2]}]"
		print(msg)

	def _print_rect(self, i, rect):
		crop = self.image[rect[1]:rect[3], rect[0]:rect[2]].astype(float)
		num_pixels = crop.shape[0] * crop.shape[1]
		# crop_sum shape: (4), 4 is channel count
		crop_sum = np.sum(crop, (0, 1))
		# crop_avg shape: (4), 4 is channel count
		crop_avg = crop_sum / num_pixels
		# crop_sqr_sum shape: (4), 4 is channel count
		crop_sqr_sum = np.sum(np.square(crop), (0, 1))

		crop_stddev = np.sqrt(np.clip(crop_sqr_sum - (np.square(crop_sum) / num_pixels), 0, None) / (num_pixels-1))

		# [0:3] to remove alpha channel
		avg_sum = np.sum(crop_avg[0:3])
		avg_ratio = [1/3., 1/3., 1/3.] if avg_sum == 0. else crop_avg / avg_sum
		avg_ratio = np.array([avg_ratio[2], avg_ratio[1], avg_ratio[0]])
		x = rect[0] / self.width
		y = rect[1] / self.height
		w = (rect[2] - rect[0]) / self.width
		h = (rect[3] - rect[1]) / self.height

		stddev_sum = np.sum(crop_stddev)
		print(f"Rect No.{i}, ({x:.3f}, {y:.3f}, {w:.3f}, {h:.3f}) rgb ratio: {avg_ratio[0]:.3f}:{avg_ratio[1]:.3f}:{avg_ratio[2]:.3f}, stddev sum: {stddev_sum:.3g}")

	def _mouse_callback(self, event, x, y, flags, param):
		redraw = False

		# Dragging selected_pixel rectangle
		if event == cv2.EVENT_LBUTTONDOWN:
			self.mouse_down = True
			self.cur_rect_index = len(self.rects)
			self.rects.append([x, y, x, y])
			self.mouse_move_counter = 0
		elif event == cv2.EVENT_LBUTTONUP:
			self.mouse_down = False
			assert len(self.rects) > 0
			if self.rects[-1][0] == self.rects[-1][2] and self.rects[-1][1] == self.rects[-1][3]:
				self.cur_rect_index -= 1
				self.rects.pop()
				self._change_selected_rect(x, y)
				self._print_pixel((x, y))
				if self.selected_pixel != (x, y):
					self.selected_pixel = (x, y)
					redraw = True
			else:
				# sanitize the rect:
				if self.cur_rect_index >= 0 and self.cur_rect_index < len(self.rects):
					rect = self.rects[self.cur_rect_index]
					min_x = min(rect[0], rect[2])
					max_x = max(rect[0], rect[2])
					min_y = min(rect[1], rect[3])
					max_y = max(rect[1], rect[3])
					rect[0] = min_x
					rect[1] = min_y
					rect[2] = max_x
					rect[3] = max_y
			redraw = True
			mouse_move_counter = 0
		elif event == cv2.EVENT_RBUTTONUP: # right click to select existing rect to edit
			if len(self.rects) > 0:
				self._change_selected_rect(x, y)
				redraw = True
		elif event == cv2.EVENT_MOUSEMOVE and self.mouse_down:
			if self.cur_rect_index >= 0 and self.cur_rect_index < len(self.rects):
				old_loc = (self.rects[self.cur_rect_index][2], self.rects[self.cur_rect_index][3])
				if old_loc != (x, y):
					self.rects[self.cur_rect_index][2] = x
					self.rects[self.cur_rect_index][3] = y
					# if mouse_move_counter % 2 == 0:
					redraw = True
			self.mouse_move_counter += 1

		if redraw:
			self._render()

	def run(self):
		cv2.imshow(self.window_name, self.buffer)
		cv2.setWindowProperty(self.window_name, cv2.WND_PROP_TOPMOST, 1)
		cv2.setMouseCallback(self.window_name, self._mouse_callback)

		while True:
			key = cv2.waitKey(0)

			if key == 27: # esc
				cv2.destroyAllWindows()
				break
			elif key == 119: # w
				if self.selected_pixel[0] >= 0 and self.selected_pixel[1] >= 1:
					self.selected_pixel = (self.selected_pixel[0], self.selected_pixel[1]-1)
					self._print_pixel(self.selected_pixel)
			elif key == 115: # s
				if self.selected_pixel[0] >= 0 and self.selected_pixel[1] >= 0 and self.selected_pixel[1] + 1 < self.height:
					self.selected_pixel = (self.selected_pixel[0], self.selected_pixel[1]+1)
					self._print_pixel(self.selected_pixel)
			elif key == 97: # a
				if self.selected_pixel[0] >= 1 and self.selected_pixel[1] >= 0:
					self.selected_pixel = (self.selected_pixel[0]-1, self.selected_pixel[1])
					self._print_pixel(self.selected_pixel)
			elif key == 100: # d
				if self.selected_pixel[0] >= 0 and self.selected_pixel[1] >= 0 and self.selected_pixel[0] + 1 < self.width:
					self.selected_pixel = (self.selected_pixel[0]+1, self.selected_pixel[1])
					self._print_pixel(self.selected_pixel)
			elif key == 44: # ,
				if len(self.highlight_list) > 0:
					self.cur_highlight_index = len(self.highlight_list) - 1 if self.cur_highlight_index < 0 else self.cur_highlight_index-1
					print(f"Change highlight index to {self.cur_highlight_index}/{len(self.highlight_list)}")
			elif key == 46: # .
				if len(self.highlight_list) > 0:
					self.cur_highlight_index = -1 if self.cur_highlight_index == len(self.highlight_list)-1 else self.cur_highlight_index+1
					print(f"Change highlight index to {self.cur_highlight_index}/{len(self.highlight_list)}")
			elif key == 127 or key == 8: # DEL or backspace (BS), remove selected rectangle
				mouse_down = False
				mouse_move_counter = 0
				if len(self.rects) > 0:
					if self.cur_rect_index < 0:
						self.cur_rect_index = len(self.rects) - 1
					del self.rects[self.cur_rect_index]
					if len(self.rects) == 0:
						self.cur_rect_index = -1
					elif self.cur_rect_index >= len(self.rects):
						self.cur_rect_index = len(self.rects) - 1
			elif key == 105: # i
				for i, rect in enumerate(self.rects):
					self._print_rect(i, rect)
			else:
				print(f"Pressed key {key}")
			self._render()


def parse_box(box_str: str):
	"""
	Parse a box string in the format "start_x,start_y,width,height".
	All values should be floats between 0.0 and 1.0 (inclusive).
	Returns a tuple of (start_x, start_y, width, height).
	"""
	try:
		parts = box_str.split(',')
		if len(parts) != 4:
			raise ValueError(f"Box must have exactly 4 values, got {len(parts)}")

		values = [float(v) for v in parts]
		for i, v in enumerate(values):
			if not (0.0 <= v <= 1.0):
				raise ValueError(f"Value at position {i} ({v}) is not between 0.0 and 1.0")

		return tuple(values)
	except ValueError as e:
		raise argparse.ArgumentTypeError(f"Invalid box format '{box_str}': {e}")


if __name__ == '__main__':
	parser = argparse.ArgumentParser(
		description='Interactive image viewer with pixel inspection and box drawing capabilities.',
		epilog="""
Controls:
  Left click: Select pixel and show info
  Left drag: Draw a box
  Right click: Select existing box
  w/s/a/d: Move selected pixel
  i: Print info for all boxes
  Backspace/Delete: Delete selected box (or last box if none selected)
  ESC: Exit

Box format:
  Each box is specified as "start_x,start_y,width,height" where all values
  are floats between 0.0 and 1.0 (inclusive), representing normalized coordinates.
		""",
		formatter_class=argparse.RawDescriptionHelpFormatter
	)

	parser.add_argument(
		'image',
		help='Path to the image file to view'
	)

	parser.add_argument(
		'--box',
		action='append',
		type=parse_box,
		metavar='START_X,START_Y,WIDTH,HEIGHT',
		help='Add a box to render on the image. Format: "start_x,start_y,width,height" '
		     'where each value is a float between 0.0 and 1.0. Can be specified multiple times.'
	)

	args = parser.parse_args()

	# bgr or bgra channel order
	image = cv2.imread(args.image, cv2.IMREAD_UNCHANGED)
	if image is None:
		parser.error(f"Could not load image from '{args.image}'")

	height = image.shape[0]
	width = image.shape[1]
	print(f"Load image from {args.image}, size: {width} x {height}")

	viewer = ImageViewer(image)

	# Add boxes from command line arguments
	if args.box:
		for box in args.box:
			start_x, start_y, box_width, box_height = box
			# Convert normalized coordinates to absolute pixel coordinates
			abs_start_x = int(start_x * width)
			abs_start_y = int(start_y * height)
			abs_end_x = int((start_x + box_width) * width)
			abs_end_y = int((start_y + box_height) * height)

			# Add to viewer's rect list in the format [start_x, start_y, end_x, end_y]
			viewer.rects.append([abs_start_x, abs_start_y, abs_end_x, abs_end_y])
			print(f"Added box: ({start_x:.3f}, {start_y:.3f}, {box_width:.3f}, {box_height:.3f}) -> "
			      f"pixels ({abs_start_x}, {abs_start_y}, {abs_end_x}, {abs_end_y})")
		viewer.update_buffer()

	viewer.run()