diff --git a/source/pic2card/datagen/generate_annotations.py b/source/pic2card/datagen/generate_annotations.py index cbd0c21eba..9df16f9910 100644 --- a/source/pic2card/datagen/generate_annotations.py +++ b/source/pic2card/datagen/generate_annotations.py @@ -7,12 +7,16 @@ from lxml import etree as et from mystique import config from .utils import get_image_name +import cv2 + def calculate_annotation( elements_with_path: List[str], elements_dimensions: List[tuple], padded_image_element: List[Sequence], + has_merged_image_element: List[bool], + element_positions: List[str], ) -> List[List[tuple]]: """ Calculates the annotations for a given list of elements. @@ -20,12 +24,24 @@ def calculate_annotation( @param elements_with_path: list of elements path from elements directory @param elements_dimensions: list of elements dimensions @param padded_image_element: list of image elements after padding + @param has_merged_image_element: boolean True if the image has any merged elements + @param element_positions: list of the image element postions on canvas wrt config + @return: annotations """ annotations = [] padded_elements_height = [ element.shape[0] for element in padded_image_element ] + padded_elements_width = padded_image_element[0].shape[1] + number_of_elements = len(elements_with_path) + + for index in range(number_of_elements): + if 'right' in element_positions[index]: + xmin = padded_elements_width - (elements_dimensions[index][1] + 10) + else: + xmin = 10 + number_of_elements = len(elements_with_path) for index in range(number_of_elements): xmin = 10 @@ -36,11 +52,20 @@ def calculate_annotation( sum([padded_elements_height[height] for height in range(index)]) + 10 ) + if 'right' in element_positions[index]: + xmax = padded_elements_width - 10 + else: + xmax = elements_dimensions[index][1] + 10 xmax = elements_dimensions[index][1] + 10 ymax = ( sum([padded_elements_height[height] for height in range(index + 1)]) - 10 ) + + if has_merged_image_element: + if has_merged_image_element[index] and 'right' in element_positions[index]: + ymin = annotations[-1][0][1] + ymax = annotations[-1][1][1] annotations.append([(xmin, ymin), (xmax, ymax)]) return annotations @@ -91,6 +116,8 @@ def run_annotator( elements_with_path: List[str], elements_dimensions: List[tuple], padded_image_element: List[Sequence], + has_merged_image_element: List[bool], + element_positions: List[str], ) -> List[List[tuple]]: """ Returns a list of x and y coords of the elements in the generated image @@ -98,12 +125,16 @@ def run_annotator( @param elements_with_path: list of elements path from elements directory @param elements_dimensions: list of elements dimensions @param padded_image_element: list of image elements after padding + @param has_merged_image_element: boolean True if the image has any merged elements + @param element_positions: list of the image element postions on canvas wrt config + @return: annotations """ number_of_elements = len(elements_with_path) if number_of_elements <= config.ELEMENT_COUNT_THRESHOLD: annotations = calculate_annotation( - elements_with_path, elements_dimensions, padded_image_element + elements_with_path, elements_dimensions, padded_image_element, + has_merged_image_element, element_positions ) else: mid_value = number_of_elements // 2 @@ -115,6 +146,8 @@ def run_annotator( left_elements_with_path, left_elements_dimensions, left_padded_image_element, + has_merged_image_element, + element_positions, ) right_elements_with_path = elements_with_path[mid_value:] @@ -125,6 +158,8 @@ def run_annotator( right_elements_with_path, right_elements_dimensions, right_padded_image_element, + has_merged_image_element, + element_positions, ) padded_pixels_for_right_element = ( @@ -158,7 +193,19 @@ def get_annotation_file( layout.elements_with_path, layout.element_dimensions, padded_image_element, + layout.has_merged_elements, + layout.element_positions, ) + + """ + #check annotations + for annotation in annotations: + cv2.rectangle(generated_image, annotation[0], annotation[1], (255, 0, 0), 2) + cv2.imshow('annotated_image', generated_image) + cv2.waitKey(0) + cv2.destroyAllWindows() + """ + # get annotation xml annotation_xml = generate_annotation_xml( annotations, generated_image_prop, element_type diff --git a/source/pic2card/datagen/generate_synthetic_image.py b/source/pic2card/datagen/generate_synthetic_image.py index 35961949fa..797310078e 100644 --- a/source/pic2card/datagen/generate_synthetic_image.py +++ b/source/pic2card/datagen/generate_synthetic_image.py @@ -8,7 +8,7 @@ import os import glob import logging -from typing import List, Sequence, Any +from typing import List, Sequence, Any, Dict import cv2 import numpy as np from mystique import config @@ -36,8 +36,30 @@ def __init__(self, number_of_elements: int, elements_dir: str) -> None: self.element_dimensions = self.get_elements_dimensions( self.elements_with_path ) + self.elements_type = self.get_elements_type(self.elements_with_path) + self.attach_mandatory_element = self.get_mandatory_element(self.elements_with_path) + self.has_merged_elements = None + self.element_positions = None - def get_elements_path(self) -> List[str]: + def get_mandatory_element(self, elements_with_path): + """ + Replaces an mandatory element like textbox or image which is configured in + MANDATORY_CARD_ELEMENTS to the list of elements paths. + @param self: CardElements object + @param elements_with_path: List of elements path + @return elements_with_path: List of elements path + """ + if config.MANDATORY_CARD_ELEMENTS: + for index, mandatory_element in enumerate(config.MANDATORY_CARD_ELEMENTS): + if any(mandatory_element in path for path in elements_with_path): + continue + random_mandatory_element_path = random.sample(glob.glob(self.elements_dir + f"{mandatory_element}/*.*"), + k=1) + elements_with_path.pop(index) + elements_with_path.insert(index, random_mandatory_element_path[0]) + return elements_with_path + + def get_elements_path(self) -> Dict[str, str]: """ Returns a list of complete path of card_elements selected at random @param self: CardElements object @@ -46,15 +68,27 @@ def get_elements_path(self) -> List[str]: elements = glob.glob(self.elements_dir + "/**/*.*", recursive=True) elements_exist = [os.path.isfile(filepath) for filepath in elements] if elements_exist: - elements_with_path = random.choices( + elements_with_path = random.sample( elements, k=self.number_of_elements ) + elements_with_path = self.get_mandatory_element(elements_with_path) else: error_msg = "No image elements found under card_elements directory" logger.error(error_msg) raise Exception(error_msg) return elements_with_path + @staticmethod + def get_elements_type(elements_with_path: List[str]) -> Dict[int, str]: + """ + Returns the list of element types of card_elements + @params self: CardElements object + @return: element_types + """ + element_type = [os.path.basename(os.path.dirname(element)) for element in elements_with_path] + element_type = {k: v for k, v in enumerate(element_type)} + return element_type + @staticmethod def get_elements_dimensions(elements_with_path: List[str]) -> List[tuple]: """ @@ -69,37 +103,170 @@ def get_elements_dimensions(elements_with_path: List[str]) -> List[tuple]: elements_dimensions.append(dimension) return elements_dimensions + def add_padding_to_img_elements(self, elements_with_path: List[str], + elements_type: Dict[int, str]) -> List[Sequence]: + """ + Returns a list of elements in image format padded + along width of the image + @param elements_with_path: list of elements path from elements directory + @param elements_type: list of element categories + @return: reshaped_image_elements + """ + sorted_elements_with_path = position_elements_path(elements_with_path, elements_type) -def add_padding_to_img_elements( - elements_with_path: List[str], -) -> List[Sequence]: + # updating parameters necessary for annotations + self.elements_with_path = sorted_elements_with_path + elements_type = self.get_elements_type(self.elements_with_path) + self.elements_type = elements_type + updated_element_dimensions = self.get_elements_dimensions(sorted_elements_with_path) + self.element_dimensions = updated_element_dimensions + + # selecting random element positions from the available positions for elements in config + element_random_positions = get_random_elements_positions(elements_type) + self.element_positions = element_random_positions + image_elements = [cv2.imread(element) for element in sorted_elements_with_path] + + # check for element merging + element_merge = check_possible_element_merge(element_random_positions, sorted_elements_with_path) + + reference_canvas_width = max( + [element.shape[1] for element in image_elements] + ) + + reshaped_image_elements = [] + for e_index, image_element in enumerate(image_elements): + image_element_width = image_element.shape[1] + pixel_diff_width = reference_canvas_width - image_element_width + e_position = element_random_positions[e_index] + + merge = element_merge[e_index] + first_element_for_merging = element_merge[e_index] and\ + element_merge[e_index + 1 if len(image_elements)-1 != e_index else 0] + if first_element_for_merging: + first_image_height_with_merge = image_elements[e_index].shape[0] + second_image_height = image_elements[e_index + 1 if len(image_elements) - 1 != e_index else 0].shape[0] + first_image_width = image_elements[e_index].shape[1] + second_image_width = image_elements[e_index + 1 if len(image_elements) - 1 != e_index else 0].shape[1] + pixel_diff_height = abs(first_image_height_with_merge - second_image_height) + + if 'right' in e_position: + top_padding_for_merge = 10 + left_padding = pixel_diff_width + 10 + if merge: + left_padding = 10 + if first_image_height_with_merge > second_image_height: + top_padding_for_merge = pixel_diff_height + 10 + + padded_image_element = cv2.copyMakeBorder( + image_element, + top=top_padding_for_merge, + bottom=10, + left=left_padding, + right=10, + borderType=cv2.BORDER_CONSTANT, + value=config.CANVAS_COLOR["WHITE"], + ) + elif 'left' in e_position: + top_padding_for_merge = 10 + right_padding = pixel_diff_width + 10 + if merge: + right_padding = reference_canvas_width - (first_image_width + 10 + second_image_width) + if first_image_height_with_merge < second_image_height: + top_padding_for_merge = pixel_diff_height + 10 + + padded_image_element = cv2.copyMakeBorder( + image_element, + top=top_padding_for_merge, + bottom=10, + left=10, + right=right_padding, + borderType=cv2.BORDER_CONSTANT, + value=config.CANVAS_COLOR["WHITE"], + ) + else: + raise Exception('Position configuration for the elements are not provided') + reshaped_image_elements.append(padded_image_element) + + # replacing the merged image with the existing one and adding dummy white image inplace of second image + if any(element_merge): + for e_index, merge in enumerate(element_merge): + merge = element_merge[e_index] and element_merge[ + e_index + 1 if len(image_elements) - 1 != e_index else 0] + if merge: + first_img = reshaped_image_elements[e_index] + second_img = reshaped_image_elements[e_index + 1] + # second_img[:first_img.shape[0], :first_img.shape[1]] = second_img + merged_img = np.concatenate((first_img, second_img), axis=1) + dummy_img = np.ones(merged_img.shape, np.uint8) * 255 + reshaped_image_elements.pop(e_index + 1) + reshaped_image_elements.pop(e_index) + reshaped_image_elements.insert(e_index, merged_img) + reshaped_image_elements.insert(e_index + 1, dummy_img) + + # passing info for annotator + self.has_merged_elements = element_merge + + return reshaped_image_elements + + +def get_random_elements_positions(elements_type): + random_element_positions = {} + elements_position_key = config.ELEMENT_POSITION + + for index, element in elements_type.items(): + random_pos = random.choice(elements_position_key[element]) + random_element_positions.update({index: random_pos}) + return random_element_positions + + +def position_elements_path(elements_with_path, elements_type): + + elements_position_key = config.ELEMENT_POSITION + sorted_elements_path = {'top': [], 'mid': [], 'bottom': []} + for path_index, element_path in enumerate(elements_with_path): + e_path_type = elements_type[path_index] + elements_position = elements_position_key[e_path_type] + bottom = [position for position in elements_position if 'bottom' in position] + top = [position for position in elements_position if 'top' in position] + mid = [position for position in elements_position if 'mid' in position] + if top: + sorted_elements_path.get('top').append(element_path) + elif bottom: + sorted_elements_path.get('bottom').append(element_path) + elif mid: + sorted_elements_path.get('mid').append(element_path) + else: + pass + sorted_elements_path = sorted_elements_path.get('top') + sorted_elements_path.get('mid')\ + + sorted_elements_path.get('bottom') + return sorted_elements_path + + +def check_possible_element_merge(element_positions, elements_path): """ - Returns a list of elements in image format padded - along width of the image - @param elements_with_path: list of elements path from elements directory - @return: reshaped_image_elements + Returns a list of boolean values that specify which elements are + capable of merging """ + e_merge = [] + dimensions = CardElements.get_elements_dimensions(elements_path) + canvas_width = max([dimension[1] for dimension in dimensions]) + for index, position in element_positions.items(): + if index == 0: + e_merge.insert(index, False) + continue - image_elements = [cv2.imread(element) for element in elements_with_path] - reference_canvas_width = max( - [element.shape[1] for element in image_elements] - ) - reshaped_image_elements = [] - for image_element in image_elements: - image_element_width = image_element.shape[1] - pixel_diff = reference_canvas_width - image_element_width - padded_image_element = cv2.copyMakeBorder( - image_element, - top=10, - bottom=10, - left=10, - right=pixel_diff + 10, - borderType=cv2.BORDER_CONSTANT, - value=config.CANVAS_COLOR["WHITE"], - ) - reshaped_image_elements.append(padded_image_element) - return reshaped_image_elements + prev_position = element_positions[index-1] + if 'left' in prev_position and 'right' in position and prev_position.split('_')[0] == position.split('_')[0]: + if dimensions[index][1]+dimensions[index-1][1] < canvas_width: + e_merge.insert(index-1, True) + e_merge.pop(index) + e_merge.insert(index, True) + else: + e_merge.insert(index, False) + else: + e_merge.insert(index, False) + return e_merge def generate_image(reshaped_image_elements: List[Sequence]) -> List[Sequence]: """ @@ -117,7 +284,7 @@ def generate_image(reshaped_image_elements: List[Sequence]) -> List[Sequence]: reshaped_image_elements[: number_of_elements // 2] ) right_elements = np.vstack( - reshaped_image_elements[number_of_elements // 2 :] + reshaped_image_elements[number_of_elements // 2:] ) pixel_diff = abs(left_elements.shape[0] - right_elements.shape[0]) diff --git a/source/pic2card/mystique/config.py b/source/pic2card/mystique/config.py index 1908c6e791..8b710c6e76 100644 --- a/source/pic2card/mystique/config.py +++ b/source/pic2card/mystique/config.py @@ -158,7 +158,8 @@ "GOLD": [0, 255, 255], } BACKGROUND_COLOR = "WHITE" -ELEMENT_COUNT_THRESHOLD = 5 +ELEMENT_COUNT_THRESHOLD = 4 + BULK_IMAGES_NEEDED = 10 ELEMENTS_DIR = os.path.join( os.path.dirname(__file__), "../data/synthetic/card_elements/" @@ -172,3 +173,16 @@ GENERATED_ZIP_DIR = os.path.join( os.path.dirname(__file__), "../data/synthetic/generated_zipfiles/" ) + +ELEMENT_POSITION = { + "images": ["top_right", "mid_left", "mid_right"], + "textbox": ["top_left", "top_right", ], # "mid_left", "mid_right"], + "radiobutton": ["mid_left", ], + "checkbox": ["mid_left", "mid_right"], + "actionset": ["bottom_right", "bottom_left"], +} + +MANDATORY_CARD_ELEMENTS = [ + "textbox", +] +