diff --git a/PageSegmentation_Workflow.html b/PageSegmentation_Workflow.html new file mode 100644 index 0000000..e573976 --- /dev/null +++ b/PageSegmentation_Workflow.html @@ -0,0 +1,11 @@ + + + + +Untitled Diagram + + +
+ + + \ No newline at end of file diff --git a/README.md b/README.md index 274832a..37fa0db 100755 --- a/README.md +++ b/README.md @@ -111,3 +111,4 @@ All output files created by the program are created in the data directory: ## Notes ## * If using IDE, run the command line script first before running the file in the IDE. +* If one want to skip a binarizing process since the image is already binarized (such as .tiff), edit user.ini file NeedBinarizing=FALSE. diff --git a/RunAll.sh b/RunAll.sh index 2b688bf..d56371a 100755 --- a/RunAll.sh +++ b/RunAll.sh @@ -5,7 +5,7 @@ echo "AIDA: Poem Identifier" cd data/ find Output_Snippets/ -type f > snippetPathList.txt -find Output_Snippets/ -not -path '*/\.*' -type f -printf "%f\n" > SnippetNameList.txt +find Output_Snippets/ -not -path '*/\.*' -type f -name '*.jpg' -exec basename {} \; > SnippetNameList.txt; cd ../ NOME=$1 c=0 diff --git a/RunPageSegmentation.sh b/RunPageSegmentation.sh index cb6f6aa..e7a0c84 100755 --- a/RunPageSegmentation.sh +++ b/RunPageSegmentation.sh @@ -1,6 +1,6 @@ #!/bin/bash cd src/ -javac execute/RunPageSegmentation.java +javac -cp ../tif_jar/jai_imageio.jar:./ execute/RunPageSegmentation.java cd ../ beginYear=$1 @@ -22,7 +22,8 @@ for i in $( ls ); do totalCount=$((totalCount + 1)) current=$(pwd) cd $src - java execute/RunPageSegmentation $current/$k +#java execute/RunPageSegmentation $current/$k + java -cp ../tif_jar/jai_imageio.jar:./ execute/RunPageSegmentation $current/$k cd $current done cd .. diff --git a/data/FullPages/test1/test2/WO2_B0001ORIWEEJO_1718_12_20-0001.tif b/data/FullPages/test1/test2/WO2_B0001ORIWEEJO_1718_12_20-0001.tif new file mode 100755 index 0000000..30b9b84 Binary files /dev/null and b/data/FullPages/test1/test2/WO2_B0001ORIWEEJO_1718_12_20-0001.tif differ diff --git a/data/SnippetNameList.txt b/data/SnippetNameList.txt new file mode 100644 index 0000000..e69de29 diff --git a/data/snippetPathList.txt b/data/snippetPathList.txt new file mode 100644 index 0000000..20e5444 --- /dev/null +++ b/data/snippetPathList.txt @@ -0,0 +1 @@ +Output_Snippets//.keep diff --git a/src/execute/RunPageSegmentation.java b/src/execute/RunPageSegmentation.java index db850d7..e43b76a 100644 --- a/src/execute/RunPageSegmentation.java +++ b/src/execute/RunPageSegmentation.java @@ -20,205 +20,240 @@ import models.Image; import blurring.ImageBlurrer; -public class RunPageSegmentation { +import models.ReadIni; +import models.EnumCollection; - /** - * This main function is responsible for running the Full-page segmentation algorithm. - * It reads in either a single .jpg image or a text list of .jpg images and performs segmentation. - * If no argument given it runs on all images. Placement of these images is different from .txt or .jpg options. - * @param args - */ - public static void main(String[] args) { - if(args.length > 0){ - //process images from a text file list - if(args[0].contains(".txt")){ - String imageList = args[0]; - File inputImages = new File(Constants.imageLists,imageList); - BufferedReader br = null; - try { - br = new BufferedReader(new FileReader(inputImages)); - } catch (FileNotFoundException e1) { - e1.printStackTrace(); - } - try { - String line = br.readLine(); - int i = 1; - while(line != null){ - System.out.println("Image "+i); - Image img = importImage(line); - try{ - segmentImage(img, false); - }catch(RuntimeException r){ - System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the image isn't rotated and has good contrast"); - }catch(Exception e){ - System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the page isn't rotated and has good contrast"); - } - line = br.readLine(); - i++; - } - } catch (IOException e) { - e.printStackTrace(); - } - //process only one image - }else if(args[0].contains(".jpg")){ - Image img = importImage(args[0]); - try{ - segmentImage(img, false); - } catch(Exception e){ - e.printStackTrace(); - //System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the page isn't rotated"); - } - } - //Process all images in the AIDA file structure created by the image retrieval script. - //Currently legacy code as we now have a script that will do this process using a bash script - //that repeatedly calls the process single image option. - }else{ - File start = new File(Constants.fullPagePath); - File successFile = new File(Constants.successSegment); - BufferedWriter successStream = null; - try{ - successStream = new BufferedWriter(new FileWriter(successFile,false)); - }catch(Exception e){ - System.out.println("Failed to create BufferedWriter"); - } - StringBuilder sb = new StringBuilder(); - File[] newspapers = start.listFiles(new FileFilter(){ - @Override - public boolean accept(File file){ - return !file.isHidden(); - } - }); - int numOfNewspapers = newspapers.length; - int currentPaper = 0; - System.out.println("Segmenting Images..."); - for(File file : newspapers){ - currentPaper++; - File[] issues = file.listFiles(new FileFilter(){ - @Override - public boolean accept(File file){ - return !file.isHidden(); - } - }); - int numOfIssues = issues.length; - int currentIssue = 0; - for(File issue : issues){ - currentIssue++; - File[] images = issue.listFiles(new FileFilter(){ - @Override - public boolean accept(File file){ - return !file.isHidden(); - } - }); - int numOfImages = images.length; - int currentImage = 0; - for(File image : images){ - if(image.getName().contains(".jpg")){ - currentImage++; - String path = image.getAbsolutePath(); - Image img = importImage(path); - try{ - segmentImage(img, false); - System.out.print("\rSegmented: Newspaper "+currentPaper+"/"+numOfNewspapers+" Issue "+currentIssue+"/"+numOfIssues+" Image "+currentImage+"/"+numOfImages+" in "+file.getName()+" "); - sb.append(img.getName()+"\n"); - }catch(RuntimeException r){ - System.out.println(); - System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the image isn't rotated and has good contrast"); - r.printStackTrace(); - File output = new File(Constants.data,"FailedList.txt"); +public class RunPageSegmentation { + /** + * This main function is responsible for running the Full-page segmentation algorithm. + * It reads in either a single .jpg image or a text list of .jpg images and performs segmentation. + * If no argument given it runs on all images. Placement of these images is different from .txt or .jpg options. + * @param args + */ + public static void main(String[] args) { + if(args.length > 0){ + //process images from a text file list + if(args[0].contains(".txt")){ + String imageList = args[0]; + File inputImages = new File(Constants.imageLists,imageList); + BufferedReader br = null; + try { + br = new BufferedReader(new FileReader(inputImages)); + } catch (FileNotFoundException e1) { + e1.printStackTrace(); + } + + try { + String line = br.readLine(); + int i = 1; + while(line != null){ + System.out.println("Image "+i); + Image img = importImage(line); + try{ + segmentImage(img, false); + }catch(RuntimeException r){ + System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the image isn't rotated and has good contrast"); + }catch(Exception e){ + System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the page isn't rotated and has good contrast"); + } + line = br.readLine(); + i++; + } + } catch (IOException e) { + e.printStackTrace(); + } + //process only one image + }else if(args[0].contains(".jpg") || args[0].contains(".tif")){ + Image img = importImage(args[0]); + try{ + segmentImage(img, true); + } catch(Exception e){ + e.printStackTrace(); + //System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the page isn't rotated"); + } + } + //Process all images in the AIDA file structure created by the image retrieval script. + //Currently legacy code as we now have a script that will do this process using a bash script + //that repeatedly calls the process single image option. + }else{ + File start = new File(Constants.fullPagePath); + File successFile = new File(Constants.successSegment); + BufferedWriter successStream = null; + try{ + successStream = new BufferedWriter(new FileWriter(successFile,false)); + }catch(Exception e){ + System.out.println("Failed to create BufferedWriter"); + } + StringBuilder sb = new StringBuilder(); + File[] newspapers = start.listFiles(new FileFilter(){ + @Override + public boolean accept(File file){ + return !file.isHidden(); + } + }); + int numOfNewspapers = newspapers.length; + int currentPaper = 0; + System.out.println("Segmenting Images..."); + for(File file : newspapers){ + currentPaper++; + File[] issues = file.listFiles(new FileFilter(){ + @Override + public boolean accept(File file){ + return !file.isHidden(); + } + }); + int numOfIssues = issues.length; + int currentIssue = 0; + for(File issue : issues){ + currentIssue++; + File[] images = issue.listFiles(new FileFilter(){ + @Override + public boolean accept(File file){ + return !file.isHidden(); + } + }); + int numOfImages = images.length; + int currentImage = 0; + for(File image : images){ + if(image.getName().contains(".jpg") || image.getName().contains(".tif")){ + currentImage++; + String path = image.getAbsolutePath(); + Image img = importImage(path); + try{ + segmentImage(img, false); + System.out.print("\rSegmented: Newspaper "+currentPaper+"/"+numOfNewspapers+" Issue "+currentIssue+"/"+numOfIssues+" Image "+currentImage+"/"+numOfImages+" in "+file.getName()+" "); + sb.append(img.getName()+"\n"); + }catch(RuntimeException r){ + System.out.println(); + System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the image isn't rotated and has good contrast"); + r.printStackTrace(); + File output = new File(Constants.data,"FailedList.txt"); try { - if(!output.exists()) - output.createNewFile(); - FileWriter writer = new FileWriter(output,true); - writer.write("Runtime Exception: "+img.getName()+"\n"); - writer.close(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - }catch(Exception e){ - System.out.println(); - System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the page isn't rotated and has good contrast"); - - File output = new File(Constants.data,"FailedList.txt"); - try { - if(!output.exists()) - output.createNewFile(); - FileWriter writer = new FileWriter(output,true); - writer.write("Normal Exception: "+img.getName()+"\n"); - writer.close(); - } catch (IOException i) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - } - } - } - } - } - try{ - successStream.append(sb.toString()); - successStream.flush(); - successStream.close(); - }catch(Exception e){ - System.out.println("Writing to file Failed. Unexpected error"); - } - } - } - - - /** - * This method imports the image whose filepath is passed as a parameter and returns an models.Image object. - * @param inputFilename - * @return models.Image - */ - public static Image importImage(String inputFilename){ - BufferedImage inputImage = null; - int w=0,h=0; - try { - //System.out.println("Loading Image.."); - File inputImageFile = new File(inputFilename); - inputImage = ImageIO.read(inputImageFile); - - } catch (IOException e) { - e.printStackTrace(); - } - - Raster raster = inputImage.getData(); - w = raster.getWidth(); - h = raster.getHeight(); - Image img = new Image(h,w); - int pixels[][] = new int[h][w]; - - //read the pixels from the input image - for (int i = 0; i < h; i++) { - for (int j = 0; j < w; j++) { - pixels[i][j] = raster.getSample(j, i, 0); - } - } - - img.setByteImage(pixels); - img.setByteImage2(pixels); - - img.setName(inputFilename.substring(inputFilename.lastIndexOf("/")+1)); - - return img; - } - - /** - * A helper method for grouping together the function calls for image segmentation. - * @param img - */ - public static void segmentImage(Image img, boolean shouldShowColumns){ - ImageBlurrer imb = new ImageBlurrer(); + if(!output.exists()) + output.createNewFile(); + FileWriter writer = new FileWriter(output,true); + writer.write("Runtime Exception: "+img.getName()+"\n"); + writer.close(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + }catch(Exception e){ + System.out.println(); + System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the page isn't rotated and has good contrast"); + + File output = new File(Constants.data,"FailedList.txt"); + try { + if(!output.exists()) + output.createNewFile(); + FileWriter writer = new FileWriter(output,true); + writer.write("Normal Exception: "+img.getName()+"\n"); + writer.close(); + } catch (IOException i) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + } + } + } + } + try{ + successStream.append(sb.toString()); + successStream.flush(); + successStream.close(); + }catch(Exception e){ + System.out.println("Writing to file Failed. Unexpected error"); + } + } + } + + + /** + * This method imports the image whose filepath is passed as a parameter and returns an models.Image object. + * @param inputFilename + * @return models.Image + */ + public static Image importImage(String inputFilename){ + BufferedImage inputImage = null; + int w=0,h=0; + try { + //System.out.println("Loading Image.."); + File inputImageFile = new File(inputFilename); + inputImage = ImageIO.read(inputImageFile); + + } catch (IOException e) { + e.printStackTrace(); + } + Raster raster = inputImage.getData(); + w = raster.getWidth(); + h = raster.getHeight(); + Image img = new Image(h,w); + int pixels[][] = new int[h][w]; + + //read the pixels from the input image + for (int i = 0; i < h; i++) { + for (int j = 0; j < w; j++) { + pixels[i][j] = raster.getSample(j, i, 0); + } + } + + // Copy Yi's code + /***start***/ + Boolean isBin = true; + /***Safety check***/ + for(int i = 0; i < h; i++){ + for(int j = 0; j < w; j++) { + if(pixels[i][j] != 1) { + isBin = false; + } + if(pixels[i][j] == 0) { + isBin = true; + } + if(!isBin) { break; } + } + if(!isBin) { break; } + } + if(isBin) { + for(int i = 0; i < h; i++){ + for(int j = 0; j < w; j++) { + if(pixels[i][j] == 1){ + pixels[i][j] = 255; + } + } + } + } + /***end***/ + img.setByteImage(pixels); + img.setByteImage2(pixels); + + img.setName(inputFilename.substring(inputFilename.lastIndexOf("/")+1)); + + return img; + } + + /** + * A helper method for grouping together the function calls for image segmentation. + * @param img + */ + public static void segmentImage(Image img, boolean shouldShowColumns){ + + ImageBlurrer imb = new ImageBlurrer(); + ReadIni myConfig = new ReadIni(); + EnumCollection myEnums = new EnumCollection(); //boolean values indicate if we want to output the intermediate stages of binarizing the image //Stages: contrasted, binary, binary with Morphology - imb.binarizeSegment(img, false, false, false); - - int shouldContinue = img.findColumnBreaks(); + + // 9.17.2017. Added this branch to skip binarization if image is already binarized + if(myConfig.GetNeedBinarizing() == myEnums.GetIntOfTrueFalse("TRUE")){ + imb.binarizeSegment(img, false, false, false); + } + + int shouldContinue = img.findColumnBreaks(); System.out.println(shouldContinue); //Continue the process if image exited with no error @@ -257,8 +292,8 @@ public static void segmentImage(Image img, boolean shouldShowColumns){ error = "Columns are only on half of the page, "+img.getColumnBreaks(); break; case 4: - error = "Std Dev Above 150, "+img.getColumnBreaks(); - break; + error = "Std Dev Above 150, "+img.getColumnBreaks(); + break; } File output = new File(Constants.data, "imageFailedNeedHuman.txt"); try { @@ -272,5 +307,5 @@ public static void segmentImage(Image img, boolean shouldShowColumns){ ioe.printStackTrace(); } } - } + } } diff --git a/src/execute/RunProgram.java b/src/execute/RunProgram.java index 0a3ff7f..452fb7c 100644 --- a/src/execute/RunProgram.java +++ b/src/execute/RunProgram.java @@ -17,6 +17,8 @@ import models.BlurredImage; import models.Image; import blurring.ImageBlurrer; +import models.EnumCollection; +import models.ReadIni; /** * The method that is responsible for all processes. It calls methods from blurring.ImageBlurrer to blur, binarize and use @@ -28,6 +30,9 @@ */ public class RunProgram { public static void main(String args[]) throws IOException{ + ReadIni myConfig = new ReadIni(); + EnumCollection myEnums = new EnumCollection(); + int d = Integer.parseInt(args[0]); final int tripleRegular = 1,consolidated = 2,train = 3,test = 4; @@ -47,7 +52,7 @@ public static void main(String args[]) throws IOException{ // Manual Controls int blurMode = consolidated; // tripleRegular or consolidated - int whatSet = test; // 'test' for testing set of 7500+ snippets. 'train' for 400 training snippets + int whatSet = train; // 'test' for testing set of 7500+ snippets. 'train' for 400 training snippets boolean outCustom = true; // Set true output the image, false when output of images themselves is not necessary boolean outBinary = true; // Set true to output binary images boolean outBlurred= false; // Set true to output blurred images. @@ -128,7 +133,11 @@ public static void main(String args[]) throws IOException{ // --------------------------------------------------------------------------- imb.blurImage(img5 ,outBlurred); - imb.binarizeImage(img5, outBinary); + + // 9.17.2017. Added this branch to skip binarization if image is already binarized + if(myConfig.GetNeedBinarizing() == myEnums.GetIntOfTrueFalse("FALSE")){ + imb.binarizeImage(img5, outBinary); + } if(isCustom){ imb.customBlur(img5,outCustom); } @@ -222,7 +231,8 @@ public static Image importImage(String inputFilename){ int w=0,h=0; try { System.out.println("Loading Image.."); - File inputImageFile = new File(Constants.data, inputFilename); + //File inputImageFile = new File(Constants.data, inputFilename); + File inputImageFile = new File("/Users/Mike/aida/data/", inputFilename); BufferedImage inputImage = ImageIO.read(inputImageFile); Raster raster = inputImage.getData(); diff --git a/src/models/EnumCollection.java b/src/models/EnumCollection.java new file mode 100644 index 0000000..ffae9c5 --- /dev/null +++ b/src/models/EnumCollection.java @@ -0,0 +1,34 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package models; + +/** + * + * @author Mike + */ +public class EnumCollection { + // Enumurator for the Image Types + private enum EnumImageType{ + TIF, JPG + } + // Enumurator for the T/F + private enum EnumTrueFalse{ + FALSE, TRUE + } + + public int GetIntOfImageType(String s){ + return EnumImageType.valueOf(s.toUpperCase()).ordinal(); + } + public int GetIntOfTrueFalse(String s){ + return EnumTrueFalse.valueOf(s.toUpperCase()).ordinal(); + } + public static boolean ContainsImageType(String s){ + for (EnumImageType enumImage: EnumImageType.values()){ + return (enumImage.name().equalsIgnoreCase(s)) ? true : false; + } + return false; + } +} diff --git a/src/models/Image.java b/src/models/Image.java index 83fd751..f20bcbc 100644 --- a/src/models/Image.java +++ b/src/models/Image.java @@ -22,388 +22,388 @@ public class Image { private int BLACK_COLUMN_SEPARATION_MIN = 125; private int EDGE_COLUMN_DISTANCE_MAX = 100; private int COLUMN_SEPARATION_MIN = 75; - - public int[][] byteImage; - protected int[][] byteImage2; - - protected String name; - protected String parentName; - protected boolean containsPoem; - protected boolean checkValue; - protected int vertical; - protected int horizontal; - - protected double stanzaMean; - protected double stanzaStandardDeviation; - protected double stanzaMin; - protected double stanzaMax; - protected double stanzaRange; - - protected double jaggedLineMean; - protected double jaggedLineStandardDeviation; - protected double jaggedMin; - protected double jaggedMax; - protected double jaggedRange; - - protected double marginMean; - protected double marginStdDev; - protected double marginMin; - protected double marginMax; - protected double marginRange; - - protected double lengthMean; - protected double lengthStdDev; - protected double lengthMin; - protected double lengthMax; - protected double lengthRange; - - protected ArrayList columnBreaks; - - - - public double getLengthMean() { - return lengthMean; - } - - public void setLengthMean(double lengthMean) { - this.lengthMean = lengthMean; - } - - public double getLengthStdDev() { - return lengthStdDev; - } - - public void setLengthStdDev(double lengthStdDev) { - this.lengthStdDev = lengthStdDev; - } - - public double getLengthMin() { - return lengthMin; - } - - public void setLengthMin(double lengthMin) { - this.lengthMin = lengthMin; - } - - public double getLengthMax() { - return lengthMax; - } - - public void setLengthMax(double lengthMax) { - this.lengthMax = lengthMax; - } - - public double getLengthRange() { - return lengthRange; - } - - public void setLengthRange(double lengthRange) { - this.lengthRange = lengthRange; - } - protected int blurLevel; - - public double getStanzaMin() { - return stanzaMin; - } - - public void setStanzaMin(double stanzaMin) { - this.stanzaMin = stanzaMin; - } - - public double getStanzaMax() { - return stanzaMax; - } - - public void setStanzaMax(double stanzaMax) { - this.stanzaMax = stanzaMax; - } - - public double getStanzaRange() { - return stanzaRange; - } - - public void setStanzaRange(double stanzaRange) { - this.stanzaRange = stanzaRange; - } - - public double getJaggedMin() { - return jaggedMin; - } - - public void setJaggedMin(double jaggedMin) { - this.jaggedMin = jaggedMin; - } - - public double getJaggedMax() { - return jaggedMax; - } - - public void setJaggedMax(double jaggedMax) { - this.jaggedMax = jaggedMax; - } - - public double getJaggedRange() { - return jaggedRange; - } - - public void setJaggedRange(double jaggedRange) { - this.jaggedRange = jaggedRange; - } - - public double getMarginMin() { - return marginMin; - } - - public void setMarginMin(double marginMin) { - this.marginMin = marginMin; - } - - public double getMarginMax() { - return marginMax; - } - - public void setMarginMax(double marginMax) { - this.marginMax = marginMax; - } - - public double getMarginRange() { - return marginRange; - } - - public void setMarginRange(double marginRange) { - this.marginRange = marginRange; - } - - public double getMarginMean() { - return marginMean; - } - - public void setMarginMean(double marginMean) { - this.marginMean = marginMean; - } - - public double getMarginStdDev() { - return marginStdDev; - } - - public void setMarginStdDev(double marginStdDev) { - this.marginStdDev = marginStdDev; - } - protected double leftMarginSize; - protected double rightMarginSize; - - public void setParentName(String s){ - this.parentName = s; - } - - public String getParentName(){ - return this.parentName; - } - - public Image(){ - this.name = ""; - this.containsPoem = false; - this.checkValue = false; - this.vertical = 0; - this.horizontal = 0; - this.stanzaMean = 0; - this.stanzaStandardDeviation = 0; - this.blurLevel = 3; - this.jaggedLineMean = 0; - this.jaggedLineStandardDeviation = 0; - this.leftMarginSize = 0; - this.rightMarginSize = 0; - - } - - public Image(int h, int w){ - this.name = ""; - this.containsPoem = false; - this.checkValue = false; - this.vertical = h; - this.horizontal = w; - this.stanzaMean = 0; - this.stanzaStandardDeviation = 0; - this.blurLevel = 3; - this.jaggedLineMean = 0; - this.jaggedLineStandardDeviation = 0; - this.leftMarginSize = 0; - this.rightMarginSize = 0; - this.byteImage = new int[h][w]; - this.byteImage2 = new int[h][w]; - } - - public int[][] getByteImage2() { - return byteImage2; - } - - public void setByteImage2(int[][] byteImage2) { - this.byteImage2 = byteImage2; - } - - public double getLeftMarginSize() { - return leftMarginSize; - } - - public void setLeftMarginSize(double leftMarginSize) { - this.leftMarginSize = leftMarginSize; - } - - public double getRightMarginSize() { - return rightMarginSize; - } - - public void setRightMarginSize(double rightMarginSize) { - this.rightMarginSize = rightMarginSize; - } - - public double getJaggedLineMean() { - return jaggedLineMean; - } - - public void setJaggedLineMean(double jaggedLineMean) { - this.jaggedLineMean = jaggedLineMean; - } - - public double getJaggedLineStandardDeviation() { - return jaggedLineStandardDeviation; - } - - public void setJaggedLineStandardDeviation(double jaggedLineStandardDeviation) { - this.jaggedLineStandardDeviation = jaggedLineStandardDeviation; - } - - public int getVertical() { - return vertical; - } - - public void setVertical(int vertical) { - this.vertical = vertical; - } - - public int getHorizontal() { - return horizontal; - } - - public void setHorizontal(int horizontal) { - this.horizontal = horizontal; - } - - - - public int getBlurLevel() { - return blurLevel; - } - - public void setBlurLevel(int blurLevel) { - this.blurLevel = blurLevel; - } - - public double getStanzaMean() { - return stanzaMean; - } - - public void setStanzaMean(double stanzaMean) { - this.stanzaMean = stanzaMean; - } - - public double getStanzaStdDev() { - return stanzaStandardDeviation; - } - - public void setStanzaStandardDeviation(double stanzaStandardDeviation) { - this.stanzaStandardDeviation = stanzaStandardDeviation; - } - - public Image(String fileName, boolean checkFigure){ - this.name = fileName; - this.checkValue = checkFigure; - - } - - public boolean getCheckValue() { - return checkValue; - } - public void setCheckValue(boolean checkValue) { - this.checkValue = checkValue; - } - public int[][] getByteImage() { - return byteImage; - } - public void setByteImage(int[][] byteImage) { - this.byteImage = byteImage; - } - public String getName() { - return name; - } - public void setName(String name) { - this.name = name; - } - public boolean isContainsPoem() { - return containsPoem; - } - public void setContainsPoem(boolean containsPoem) { - this.containsPoem = containsPoem; - } + + public int[][] byteImage; + protected int[][] byteImage2; + + protected String name; + protected String parentName; + protected boolean containsPoem; + protected boolean checkValue; + protected int vertical; + protected int horizontal; + + protected double stanzaMean; + protected double stanzaStandardDeviation; + protected double stanzaMin; + protected double stanzaMax; + protected double stanzaRange; + + protected double jaggedLineMean; + protected double jaggedLineStandardDeviation; + protected double jaggedMin; + protected double jaggedMax; + protected double jaggedRange; + + protected double marginMean; + protected double marginStdDev; + protected double marginMin; + protected double marginMax; + protected double marginRange; + + protected double lengthMean; + protected double lengthStdDev; + protected double lengthMin; + protected double lengthMax; + protected double lengthRange; + + protected ArrayList columnBreaks; + + + + public double getLengthMean() { + return lengthMean; + } + + public void setLengthMean(double lengthMean) { + this.lengthMean = lengthMean; + } + + public double getLengthStdDev() { + return lengthStdDev; + } + + public void setLengthStdDev(double lengthStdDev) { + this.lengthStdDev = lengthStdDev; + } + + public double getLengthMin() { + return lengthMin; + } + + public void setLengthMin(double lengthMin) { + this.lengthMin = lengthMin; + } + + public double getLengthMax() { + return lengthMax; + } + + public void setLengthMax(double lengthMax) { + this.lengthMax = lengthMax; + } + + public double getLengthRange() { + return lengthRange; + } + + public void setLengthRange(double lengthRange) { + this.lengthRange = lengthRange; + } + protected int blurLevel; + + public double getStanzaMin() { + return stanzaMin; + } + + public void setStanzaMin(double stanzaMin) { + this.stanzaMin = stanzaMin; + } + + public double getStanzaMax() { + return stanzaMax; + } + + public void setStanzaMax(double stanzaMax) { + this.stanzaMax = stanzaMax; + } + + public double getStanzaRange() { + return stanzaRange; + } + + public void setStanzaRange(double stanzaRange) { + this.stanzaRange = stanzaRange; + } + + public double getJaggedMin() { + return jaggedMin; + } + + public void setJaggedMin(double jaggedMin) { + this.jaggedMin = jaggedMin; + } + + public double getJaggedMax() { + return jaggedMax; + } + + public void setJaggedMax(double jaggedMax) { + this.jaggedMax = jaggedMax; + } + + public double getJaggedRange() { + return jaggedRange; + } + + public void setJaggedRange(double jaggedRange) { + this.jaggedRange = jaggedRange; + } + + public double getMarginMin() { + return marginMin; + } + + public void setMarginMin(double marginMin) { + this.marginMin = marginMin; + } + + public double getMarginMax() { + return marginMax; + } + + public void setMarginMax(double marginMax) { + this.marginMax = marginMax; + } + + public double getMarginRange() { + return marginRange; + } + + public void setMarginRange(double marginRange) { + this.marginRange = marginRange; + } + + public double getMarginMean() { + return marginMean; + } + + public void setMarginMean(double marginMean) { + this.marginMean = marginMean; + } + + public double getMarginStdDev() { + return marginStdDev; + } + + public void setMarginStdDev(double marginStdDev) { + this.marginStdDev = marginStdDev; + } + protected double leftMarginSize; + protected double rightMarginSize; + + public void setParentName(String s){ + this.parentName = s; + } + + public String getParentName(){ + return this.parentName; + } + + public Image(){ + this.name = ""; + this.containsPoem = false; + this.checkValue = false; + this.vertical = 0; + this.horizontal = 0; + this.stanzaMean = 0; + this.stanzaStandardDeviation = 0; + this.blurLevel = 3; + this.jaggedLineMean = 0; + this.jaggedLineStandardDeviation = 0; + this.leftMarginSize = 0; + this.rightMarginSize = 0; + + } + + public Image(int h, int w){ + this.name = ""; + this.containsPoem = false; + this.checkValue = false; + this.vertical = h; + this.horizontal = w; + this.stanzaMean = 0; + this.stanzaStandardDeviation = 0; + this.blurLevel = 3; + this.jaggedLineMean = 0; + this.jaggedLineStandardDeviation = 0; + this.leftMarginSize = 0; + this.rightMarginSize = 0; + this.byteImage = new int[h][w]; + this.byteImage2 = new int[h][w]; + } + + public int[][] getByteImage2() { + return byteImage2; + } + + public void setByteImage2(int[][] byteImage2) { + this.byteImage2 = byteImage2; + } + + public double getLeftMarginSize() { + return leftMarginSize; + } + + public void setLeftMarginSize(double leftMarginSize) { + this.leftMarginSize = leftMarginSize; + } + + public double getRightMarginSize() { + return rightMarginSize; + } + + public void setRightMarginSize(double rightMarginSize) { + this.rightMarginSize = rightMarginSize; + } + + public double getJaggedLineMean() { + return jaggedLineMean; + } + + public void setJaggedLineMean(double jaggedLineMean) { + this.jaggedLineMean = jaggedLineMean; + } + + public double getJaggedLineStandardDeviation() { + return jaggedLineStandardDeviation; + } + + public void setJaggedLineStandardDeviation(double jaggedLineStandardDeviation) { + this.jaggedLineStandardDeviation = jaggedLineStandardDeviation; + } + + public int getVertical() { + return vertical; + } + + public void setVertical(int vertical) { + this.vertical = vertical; + } + + public int getHorizontal() { + return horizontal; + } + + public void setHorizontal(int horizontal) { + this.horizontal = horizontal; + } + + + + public int getBlurLevel() { + return blurLevel; + } + + public void setBlurLevel(int blurLevel) { + this.blurLevel = blurLevel; + } + + public double getStanzaMean() { + return stanzaMean; + } + + public void setStanzaMean(double stanzaMean) { + this.stanzaMean = stanzaMean; + } + + public double getStanzaStdDev() { + return stanzaStandardDeviation; + } + + public void setStanzaStandardDeviation(double stanzaStandardDeviation) { + this.stanzaStandardDeviation = stanzaStandardDeviation; + } + + public Image(String fileName, boolean checkFigure){ + this.name = fileName; + this.checkValue = checkFigure; + + } + + public boolean getCheckValue() { + return checkValue; + } + public void setCheckValue(boolean checkValue) { + this.checkValue = checkValue; + } + public int[][] getByteImage() { + return byteImage; + } + public void setByteImage(int[][] byteImage) { + this.byteImage = byteImage; + } + public String getName() { + return name; + } + public void setName(String name) { + this.name = name; + } + public boolean isContainsPoem() { + return containsPoem; + } + public void setContainsPoem(boolean containsPoem) { + this.containsPoem = containsPoem; + } public ArrayList getColumnBreaks(){ - return columnBreaks; - } - public void setColumnBreaks(ArrayList columns){ - this.columnBreaks = columns; - } - /** - * Method used to find the separation columns of a newspaper. - * These columns can be represented visually by either whitespace separating text columns or - * by continuous, straight black lines. - */ - public int findColumnBreaks(){ - - ArrayList whiteColumns = new ArrayList(); - int whiteCount; - int columnCount = 0; - for(int j = 0; j < this.horizontal; j++){ - whiteCount = 0; - for(int i = 0; i < this.vertical; i++){ - if(this.byteImage[i][j] == 255){ - whiteCount++; - } - } - if(whiteCount >= (this.vertical*.9)){ - columnCount++; - if(columnCount < COLUMN_COUNT_MAX){ - whiteColumns.add(j); - } - }else if(whiteCount < (this.vertical*.9)){ - if(whiteColumns.contains(j-1)){ - columnCount = 0; - }else if(!whiteColumns.contains(j-1) && columnCount >= COLUMN_COUNT_MAX){ - columnCount = 0; - } - } - } - - - ArrayList columns = new ArrayList(); - int marker = 0; - /*Search for the part of the array that makes a large jump, this indicates the end of - *a section of white columns and the beginning of a new section. Once found we find the - *middle index of the section of white columns and then set the marker to be the first - *white column of the next section of white columns. - */ - for(int k = 0; k < whiteColumns.size()-1; k++){ - if(whiteColumns.get(k+1) - whiteColumns.get(k) > WHITE_COLUMN_SEPARATION_MIN){ - int index = k-((k - marker)/2); - columns.add(whiteColumns.get(index)); - marker = k+1; - } - //Special condition used to find middle point of the final batch of white columns - //(the far right hand side of the newspaper) - if(k+1 == whiteColumns.size()-1){ - int index = (k+1)-(((k+1) - marker)/2); - columns.add(whiteColumns.get(index)); - } - } - + return columnBreaks; + } + public void setColumnBreaks(ArrayList columns){ + this.columnBreaks = columns; + } + /** + * Method used to find the separation columns of a newspaper. + * These columns can be represented visually by either whitespace separating text columns or + * by continuous, straight black lines. + */ + public int findColumnBreaks(){ + + ArrayList whiteColumns = new ArrayList(); + int whiteCount; + int columnCount = 0; + for(int j = 0; j < this.horizontal; j++){ + whiteCount = 0; + for(int i = 0; i < this.vertical; i++){ + if(this.byteImage[i][j] == 255){ + whiteCount++; + } + } + if(whiteCount >= (this.vertical*.9)){ + columnCount++; + if(columnCount < COLUMN_COUNT_MAX){ + whiteColumns.add(j); + } + }else if(whiteCount < (this.vertical*.9)){ + if(whiteColumns.contains(j-1)){ + columnCount = 0; + }else if(!whiteColumns.contains(j-1) && columnCount >= COLUMN_COUNT_MAX){ + columnCount = 0; + } + } + } + + + ArrayList columns = new ArrayList(); + int marker = 0; + /*Search for the part of the array that makes a large jump, this indicates the end of + *a section of white columns and the beginning of a new section. Once found we find the + *middle index of the section of white columns and then set the marker to be the first + *white column of the next section of white columns. + */ + for(int k = 0; k < whiteColumns.size()-1; k++){ + if(whiteColumns.get(k+1) - whiteColumns.get(k) > WHITE_COLUMN_SEPARATION_MIN){ + int index = k-((k - marker)/2); + columns.add(whiteColumns.get(index)); + marker = k+1; + } + //Special condition used to find middle point of the final batch of white columns + //(the far right hand side of the newspaper) + if(k+1 == whiteColumns.size()-1){ + int index = (k+1)-(((k+1) - marker)/2); + columns.add(whiteColumns.get(index)); + } + } + //Using white columns often isn't enough, so we check for continuous //black lines as well to indictate a column break. ArrayList blackColumns = new ArrayList(); @@ -439,167 +439,187 @@ public int findColumnBreaks(){ } Collections.sort(columns); - ArrayList columnsToAdd = new ArrayList(); - ArrayList columnsToRemove = new ArrayList(); - + ArrayList columnsToAdd = new ArrayList(); + ArrayList columnsToRemove = new ArrayList(); + //Determine extraneuous columns to remove - int index = 0; - while(columns.get(index) < EDGE_COLUMN_DISTANCE_MAX){ - columnsToRemove.add(columns.get(index)); - index++; - } - index = 1; - while(this.horizontal-columns.get(columns.size()-index) < EDGE_COLUMN_DISTANCE_MAX){ - columnsToRemove.add(columns.get(columns.size()-index)); - index++; - } - columns.removeAll(columnsToRemove); - columnsToRemove.clear(); + int index = 0; + while(columns.get(index) < EDGE_COLUMN_DISTANCE_MAX){ + columnsToRemove.add(columns.get(index)); + index++; + } + index = 1; + while(this.horizontal-columns.get(columns.size()-index) < EDGE_COLUMN_DISTANCE_MAX){ + columnsToRemove.add(columns.get(columns.size()-index)); + index++; + } + columns.removeAll(columnsToRemove); + columnsToRemove.clear(); //If columns are very close together remove the two columns and use //the average of the two for the final list of columns - for(int p = 0; p < columns.size()-1; p++){ - if(columns.get(p+1)-columns.get(p) < COLUMN_SEPARATION_MIN){ - columnsToRemove.add(columns.get(p)); - columnsToRemove.add(columns.get(p+1)); - int middle = columns.get(p+1) - ((columns.get(p+1)-columns.get(p))/2); - columnsToAdd.add(middle); - } - } - - columns.removeAll(columnsToRemove); - columns.addAll(columnsToAdd); - Collections.sort(columns); - columnsToRemove.clear(); - - //Rule 1: check for no columns found - if(columns.size() == 0){ - this.setColumnBreaks(columns); - return 1; + for(int p = 0; p < columns.size()-1; p++){ + if(columns.get(p+1)-columns.get(p) < COLUMN_SEPARATION_MIN){ + columnsToRemove.add(columns.get(p)); + columnsToRemove.add(columns.get(p+1)); + int middle = columns.get(p+1) - ((columns.get(p+1)-columns.get(p))/2); + columnsToAdd.add(middle); + } } - ArrayList columnWidth = new ArrayList(); - for(int p = 0; p < columns.size()-1; p++){ - columnWidth.add(columns.get(p+1)-columns.get(p)); - } - Collections.sort(columnWidth); + columns.removeAll(columnsToRemove); + columns.addAll(columnsToAdd); + Collections.sort(columns); + columnsToRemove.clear(); - //Rule 2,3: check for less than three columns and check if columns are on more than half of the page - if(columns.size()<3){ - this.setColumnBreaks(columns); - return 2; + //Rule 1: check if there is only on column + if(columns.size() == 0){ + columns.add(0,0); + columns.add(this.horizontal); } - - if(columns.get(0)>this.horizontal/2 || columns.get(columns.size()-1) < horizontal/2){ - this.setColumnBreaks(columns); - return 3; + //Rule 2: check for 2 columns + else if(columns.size()==1){ + //columnWidth.add(columns.get(0)); + //columnWidth.add(this.horizontal - columns.get(0)); + columns.add(0,0); + columns.add(this.horizontal); } - - int numOfColumnWidths = columnWidth.size(); - - int columnWidthMean = 0; - for(int width : columnWidth){ - columnWidthMean += width; + // columns.size() is 2 + else if (columns.size()==2){ + // columns(0) is around the center + if((this.horizontal/2)*(9/10) < columns.get(0) && columns.get(0) < (this.horizontal/2)*(11/10)){ + columns.add(0,0); + } + // columns(1) is around the center + else{ + columns.add(this.horizontal); + } } - columnWidthMean = columnWidthMean/numOfColumnWidths; - int columnWidthVarience = 0; - for(int width : columnWidth){ - int temp = width - columnWidthMean; - columnWidthVarience += Math.pow(temp,2); + else{ + // Collect column info + ArrayList columnWidth = new ArrayList(); + for(int p = 0; p < columns.size()-1; p++){ + columnWidth.add(columns.get(p+1)-columns.get(p)); + } + Collections.sort(columnWidth); + + for(int p = 0; p < columns.size()-1; p++){ + columnWidth.add(columns.get(p+1)-columns.get(p)); + } + Collections.sort(columnWidth); + //Rule 3: check if columns are on more than half of the page + + if(columns.get(0)>this.horizontal/2 || columns.get(columns.size()-1) < horizontal/2){ + this.setColumnBreaks(columns); + return 3; + } + + int numOfColumnWidths = columnWidth.size(); + + int columnWidthMean = 0; + for(int width : columnWidth){ + columnWidthMean += width; + } + columnWidthMean = columnWidthMean/numOfColumnWidths; + int columnWidthVarience = 0; + for(int width : columnWidth){ + int temp = width - columnWidthMean; + columnWidthVarience += Math.pow(temp,2); + } + columnWidthVarience = columnWidthVarience/numOfColumnWidths; + double columnWidthStdDev = Math.ceil(Math.sqrt(columnWidthVarience)); + System.out.println("Std Dev: "+columnWidthStdDev); + + //Rule 4: Check column width Std Dev. Good images were experimentally determined to be below 150 Std Dev. + if(columnWidthStdDev > 150) { + this.setColumnBreaks(columns); + return 4; + } + + //Add in columns based on the average width, columns added from the right hand side + int averageWidth = columnWidth.get((int) Math.floor(columnWidth.size()/2)); + for(int p = columns.size()-1; p >= 1; p--){ + if(columns.get(p-1) < columns.get(p)-averageWidth-COLUMN_SEPARATION_MIN){ + columns.add(p, columns.get(p)-averageWidth); + p++; + }else if(columns.get(p-1) > columns.get(p)-averageWidth+COLUMN_SEPARATION_MIN){ + columns.remove(p-1); + if(columns.get(p-1)-averageWidth > 0){ + columns.add(p-1, columns.get(p-1)-averageWidth); + } + } + } + + //If no edge column is found, insert the column + while(columns.get(0) > this.horizontal*.1 && columns.get(0)-averageWidth > 0){ + columns.add(0, columns.get(0)-averageWidth); + } + while(columns.get(columns.size()-1) < this.horizontal-(this.horizontal*.1) && columns.get(columns.size()-1)+averageWidth < this.horizontal){ + columns.add(columns.get(columns.size()-1)+averageWidth); + } } - columnWidthVarience = columnWidthVarience/numOfColumnWidths; - double columnWidthStdDev = Math.ceil(Math.sqrt(columnWidthVarience)); - System.out.println("Std Dev: "+columnWidthStdDev); - //Rule 4: Check column width Std Dev. Good images were experimentally determined to be below 150 Std Dev. - if(columnWidthStdDev > 150) { - this.setColumnBreaks(columns); - return 4; - } + System.out.println(columns); - //Add in columns based on the average width, columns added from the right hand side - int averageWidth = columnWidth.get((int) Math.floor(columnWidth.size()/2)); - for(int p = columns.size()-1; p >= 1; p--){ - if(columns.get(p-1) < columns.get(p)-averageWidth-COLUMN_SEPARATION_MIN){ - columns.add(p, columns.get(p)-averageWidth); - p++; - }else if(columns.get(p-1) > columns.get(p)-averageWidth+COLUMN_SEPARATION_MIN){ - columns.remove(p-1); - if(columns.get(p-1)-averageWidth > 0){ - columns.add(p-1, columns.get(p-1)-averageWidth); - } - } - } - - //If no edge column is found, insert the column - while(columns.get(0) > this.horizontal*.1 && columns.get(0)-averageWidth > 0){ - columns.add(0, columns.get(0)-averageWidth); + //stores the column breaks list in the class Image + this.setColumnBreaks(columns); + return 0; + } + /** + * Method to output an image with red lines indicating the column breaks + */ + public void showColumnBreaks(){ + BufferedImage OutputImage = new BufferedImage(this.horizontal,this.vertical,BufferedImage.TYPE_INT_RGB); + int i = 1; + int marker = this.columnBreaks.get(0); + int value = 0; + for (int x = 0; x < this.getHorizontal(); x++) { + //iterate and acquire column break marker at the correct time + if(i < this.columnBreaks.size()){ + if(x > marker){ + marker = this.columnBreaks.get(i); + i++; + } + } + for (int y = 0; y < this.getVertical(); y++) { + if(x == marker){ + value = 0xFF0000;//hexadecimal code for the color red + }else{ + //The following line offsets the pixels' values to fix the 'blue problem' + value = this.byteImage2[y][x] << 16 | this.byteImage2[y][x] << 8 | this.byteImage2[y][x]; + } + OutputImage.setRGB(x, y, value); + } } - while(columns.get(columns.size()-1) < this.horizontal-(this.horizontal*.1) && columns.get(columns.size()-1)+averageWidth < this.horizontal){ - columns.add(columns.get(columns.size()-1)+averageWidth); + //Output the image to a file of our choosing + File outputFile = new File(Constants.customOutput,this.name); + try { + ImageIO.write(OutputImage, "jpg", outputFile); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); } - - System.out.println(columns); - - //stores the column breaks list in the class Image - this.setColumnBreaks(columns); - return 0; - } - /** - * Method to output an image with red lines indicating the column breaks - */ - public void showColumnBreaks(){ - BufferedImage OutputImage = new BufferedImage(this.horizontal,this.vertical,BufferedImage.TYPE_INT_RGB); - int i = 1; - int marker = this.columnBreaks.get(0); - int value = 0; - for (int x = 0; x < this.getHorizontal(); x++) { - //iterate and acquire column break marker at the correct time - if(i < this.columnBreaks.size()){ - if(x > marker){ - marker = this.columnBreaks.get(i); - i++; - } - } - for (int y = 0; y < this.getVertical(); y++) { - if(x == marker){ - value = 0xFF0000;//hexadecimal code for the color red - }else{ - //The following line offsets the pixels' values to fix the 'blue problem' - value = this.byteImage2[y][x] << 16 | this.byteImage2[y][x] << 8 | this.byteImage2[y][x]; - } - OutputImage.setRGB(x, y, value); - } - } - //Output the image to a file of our choosing - File outputFile = new File(Constants.customOutput,this.name); - try { - ImageIO.write(OutputImage, "jpg", outputFile); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - - /** - * Helper method that will compare matricies to ensure that they are the same. - * Used for debugging purposes. - * @param A matrix - * @param B matrix - * @return number of different pixels - */ - public int compareMatrices(int[][] A, int[][] B){ - int difference = 0; - for(int i = 0; i < A.length; i++){ - for(int j = 0; j < A[0].length; j++){ - if(A[i][j] != B[i][j]){ - difference++; - } - } - } - return difference; - } - + } + + /** + * Helper method that will compare matricies to ensure that they are the same. + * Used for debugging purposes. + * @param A matrix + * @param B matrix + * @return number of different pixels + */ + public int compareMatrices(int[][] A, int[][] B){ + int difference = 0; + for(int i = 0; i < A.length; i++){ + for(int j = 0; j < A[0].length; j++){ + if(A[i][j] != B[i][j]){ + difference++; + } + } + } + return difference; + } + private int snippetHeight(){ int sum = 0; for(int i = 0; i < this.columnBreaks.size()-1; i++){ @@ -609,158 +629,158 @@ private int snippetHeight(){ int avgHeight = (int) ((14.0/9.0)*avgWidth); return avgHeight; } - - /** - * Final step in segmentation algorithm. Once column breaks have been found this method - * will use those breakpoints to dynamically create snippets of varying width and height (a 14/9 ratio). - * Snippets are outputted to the directory noted by Constants.Snippets and are grouped together by the full page they came from. - */ - public void convertPageToSnippets(boolean scaleDown){ - int height = snippetHeight(); - int nextBegin = 0; - int nextEnd = height; - int snippetRow = 0; - int snippetColumn = 0; - //Identify the parent image name. - String snippetSubName = this.getName().substring(0, this.getName().lastIndexOf('.')); - String issueName = snippetSubName.substring(0, snippetSubName.lastIndexOf('_')); - String parentName = this.getName().substring(0, this.getName().indexOf('_')); - - //populate snippet matrix with pixels from full page. - for(int i = 0; i < columnBreaks.size()-1; i++){ - int width = columnBreaks.get(i+1) - columnBreaks.get(i); - int[][] snippet = new int[height][width]; - int c = 0; - for(int j = columnBreaks.get(i); j < columnBreaks.get(i+1); j++){ - int r = 0; - for(int k = nextBegin; k < nextEnd; k++){ - snippet[r][c] = this.byteImage2[k][j]; - r++; - } - c++; - } - - //identify the location of snippet in the full page. Use as name of the snippet - String snippetName = snippetSubName+"_"+snippetRow+"_"+snippetColumn+".jpg"; - - //Create BufferedImage for file writing. If scale down is needed perform that - //first. For the time being scale is hard coded at 4x4. - BufferedImage OutputImage; - if(scaleDown){ - int scale = 4; - int[][] scaledSnippet = scaleDownSnippet(scale, snippet, height, width); - - OutputImage = new BufferedImage(width/scale, height/scale, BufferedImage.TYPE_INT_RGB); - for (int y = 0; y < height/scale; y++) { - for (int x = 0; x < width/scale; x++) { - //The following line offsets the pixels' values to fix the 'blue problem' - int value = scaledSnippet[y][x] << 16 | scaledSnippet[y][x] << 8 | scaledSnippet[y][x]; - OutputImage.setRGB(x, y, value); - } - } - }else{ - OutputImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { - //The following line offsets the pixels' values to fix the 'blue problem' - int value = snippet[y][x] << 16 | snippet[y][x] << 8 | snippet[y][x]; - OutputImage.setRGB(x, y, value); - } - } - } - - //Output the snippet to a file of our choosing - File outputFile = new File(Constants.Snippets+parentName+"/"+issueName+"/"+snippetSubName+"/",snippetName); - outputFile.mkdirs(); - try { - ImageIO.write(OutputImage, "jpg", outputFile); - } catch (IOException e) { - e.printStackTrace(); - } - snippetColumn++; - - //determine if row is complete, if yes move to next row, and if not move to next column - if(i == columnBreaks.size() - 2){ - nextBegin = nextEnd - (height/2); - nextEnd = nextBegin + height; - if(nextEnd <= this.getVertical()){ - i = -1; - snippetRow++; - snippetColumn = 0; - } - } - } - } - - /** - * When given an integer to represent the scale (3 for 3x3, 4 for 4x4, 5 for 5x5, etc) - * this function will average the pixels of the snippet based on the given scale. - * @param scale - * @param snippet - * @param height - * @param width - * @return - */ - private int[][] scaleDownSnippet(int scale, int[][] snippet, int height, int width){ - int r = 0,s = 0; - int[][] scaledImage = new int[height/scale][width/scale]; - for(int i = scale/2; i < height - (scale/2); i = i+scale){ - s = 0; - for(int j = scale/2; j < width - (scale/2); j = j+scale){ - scaledImage[r][s] = average(scale, snippet, i, j); - s++; - } - r++; - } - return scaledImage; - } - - /** - * returns the average pixel value of a pixels scale x scale area. - * This function can use both odd and even numbers. - * @param scale - * @param snippet - * @param i - * @param j - * @return - */ - private int average(int scale, int[][] snippet, int i, int j){ - int sum = 0; - if(scale%2 == 0){ - for(int a = i-(scale/2); a < i+(scale/2); a++){ - for(int b = j-(scale/2); b < j+(scale/2); b++){ - sum+=snippet[i][j]; - } - } - }else{ - for(int a = i-(scale/2); a <= i+(scale/2); a++){ - for(int b = j-(scale/2); b <= j+(scale/2); b++){ - sum+=snippet[i][j]; - } - } - } - return sum/(scale*scale); - } - - public void printImage(String filePath){ - int w = this.getHorizontal(),h = this.getVertical(); - BufferedImage OutputImage = new BufferedImage(w,h,BufferedImage.TYPE_INT_RGB); - int[][] pixels3 = this.getByteImage(); - for (int y = 0; y < this.getVertical(); y++) { - for (int x = 0; x < this.getHorizontal(); x++) { - //The following line offsets the pixels' values to fix the 'blue problem' - int value = pixels3[y][x] << 16 | pixels3[y][x] << 8 | pixels3[y][x]; - OutputImage.setRGB(x, y, value); - } - } - - File outputFile = new File(filePath,this.name); - try { - ImageIO.write(OutputImage, "jpg", outputFile); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - + + /** + * Final step in segmentation algorithm. Once column breaks have been found this method + * will use those breakpoints to dynamically create snippets of varying width and height (a 14/9 ratio). + * Snippets are outputted to the directory noted by Constants.Snippets and are grouped together by the full page they came from. + */ + public void convertPageToSnippets(boolean scaleDown){ + int height = snippetHeight(); + int nextBegin = 0; + int nextEnd = height; + int snippetRow = 0; + int snippetColumn = 0; + //Identify the parent image name. + String snippetSubName = this.getName().substring(0, this.getName().lastIndexOf('.')); + String issueName = snippetSubName.substring(0, snippetSubName.lastIndexOf('_')); + String parentName = this.getName().substring(0, this.getName().indexOf('_')); + + //populate snippet matrix with pixels from full page. + for(int i = 0; i < columnBreaks.size()-1; i++){ + int width = columnBreaks.get(i+1) - columnBreaks.get(i); + int[][] snippet = new int[height][width]; + int c = 0; + for(int j = columnBreaks.get(i); j < columnBreaks.get(i+1); j++){ + int r = 0; + for(int k = nextBegin; k < nextEnd; k++){ + snippet[r][c] = this.byteImage2[k][j]; + r++; + } + c++; + } + + //identify the location of snippet in the full page. Use as name of the snippet + String snippetName = snippetSubName+"_"+snippetRow+"_"+snippetColumn+".jpg"; + + //Create BufferedImage for file writing. If scale down is needed perform that + //first. For the time being scale is hard coded at 4x4. + BufferedImage OutputImage; + if(scaleDown){ + int scale = 4; + int[][] scaledSnippet = scaleDownSnippet(scale, snippet, height, width); + + OutputImage = new BufferedImage(width/scale, height/scale, BufferedImage.TYPE_INT_RGB); + for (int y = 0; y < height/scale; y++) { + for (int x = 0; x < width/scale; x++) { + //The following line offsets the pixels' values to fix the 'blue problem' + int value = scaledSnippet[y][x] << 16 | scaledSnippet[y][x] << 8 | scaledSnippet[y][x]; + OutputImage.setRGB(x, y, value); + } + } + }else{ + OutputImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + //The following line offsets the pixels' values to fix the 'blue problem' + int value = snippet[y][x] << 16 | snippet[y][x] << 8 | snippet[y][x]; + OutputImage.setRGB(x, y, value); + } + } + } + + //Output the snippet to a file of our choosing + File outputFile = new File(Constants.Snippets+parentName+"/"+issueName+"/"+snippetSubName+"/",snippetName); + outputFile.mkdirs(); + try { + ImageIO.write(OutputImage, "jpg", outputFile); + } catch (IOException e) { + e.printStackTrace(); + } + snippetColumn++; + + //determine if row is complete, if yes move to next row, and if not move to next column + if(i == columnBreaks.size() - 2){ + nextBegin = nextEnd - (height/2); + nextEnd = nextBegin + height; + if(nextEnd <= this.getVertical()){ + i = -1; + snippetRow++; + snippetColumn = 0; + } + } + } + } + + /** + * When given an integer to represent the scale (3 for 3x3, 4 for 4x4, 5 for 5x5, etc) + * this function will average the pixels of the snippet based on the given scale. + * @param scale + * @param snippet + * @param height + * @param width + * @return + */ + private int[][] scaleDownSnippet(int scale, int[][] snippet, int height, int width){ + int r = 0,s = 0; + int[][] scaledImage = new int[height/scale][width/scale]; + for(int i = scale/2; i < height - (scale/2); i = i+scale){ + s = 0; + for(int j = scale/2; j < width - (scale/2); j = j+scale){ + scaledImage[r][s] = average(scale, snippet, i, j); + s++; + } + r++; + } + return scaledImage; + } + + /** + * returns the average pixel value of a pixels scale x scale area. + * This function can use both odd and even numbers. + * @param scale + * @param snippet + * @param i + * @param j + * @return + */ + private int average(int scale, int[][] snippet, int i, int j){ + int sum = 0; + if(scale%2 == 0){ + for(int a = i-(scale/2); a < i+(scale/2); a++){ + for(int b = j-(scale/2); b < j+(scale/2); b++){ + sum+=snippet[i][j]; + } + } + }else{ + for(int a = i-(scale/2); a <= i+(scale/2); a++){ + for(int b = j-(scale/2); b <= j+(scale/2); b++){ + sum+=snippet[i][j]; + } + } + } + return sum/(scale*scale); + } + + public void printImage(String filePath){ + int w = this.getHorizontal(),h = this.getVertical(); + BufferedImage OutputImage = new BufferedImage(w,h,BufferedImage.TYPE_INT_RGB); + int[][] pixels3 = this.getByteImage(); + for (int y = 0; y < this.getVertical(); y++) { + for (int x = 0; x < this.getHorizontal(); x++) { + //The following line offsets the pixels' values to fix the 'blue problem' + int value = pixels3[y][x] << 16 | pixels3[y][x] << 8 | pixels3[y][x]; + OutputImage.setRGB(x, y, value); + } + } + + File outputFile = new File(filePath,this.name); + try { + ImageIO.write(OutputImage, "jpg", outputFile); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } diff --git a/src/models/ReadIni.java b/src/models/ReadIni.java new file mode 100644 index 0000000..97eec01 --- /dev/null +++ b/src/models/ReadIni.java @@ -0,0 +1,51 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package models; + +import java.util.*; +import java.io.*; +/** + * + * @author Mike + */ +public class ReadIni { + private EnumCollection myEnums = new EnumCollection(); + // Members + private int ImageType; + private int NeedBinarizing; + private int NeedBlurring; + private int NeedConsolidating; + + // Getters + public int GetImageType(){ + return this.ImageType; + } + public int GetNeedBinarizing(){ + return this.NeedBinarizing; + } + public int GetNeedBlurring(){ + return this.NeedBlurring; + } + public int GetNeedConsolidating(){ + return this.NeedConsolidating; + } + + // Load Init Info + public ReadIni(){ + Properties p = new Properties(); + try{ + p.load(new FileInputStream("../user.ini")); + // Load members' info from user.ini file + this.ImageType = myEnums.GetIntOfImageType(p.getProperty("ImageType").toUpperCase()); + this.NeedBinarizing = myEnums.GetIntOfTrueFalse(p.getProperty("NeedBinarizing").toUpperCase()); + this.NeedBlurring = myEnums.GetIntOfTrueFalse(p.getProperty("NeedBlurring").toUpperCase()); + this.NeedConsolidating = myEnums.GetIntOfTrueFalse(p.getProperty("NeedConsolidating").toUpperCase()); + } + catch (Exception e){ + System.out.println(e); + } + } +} diff --git a/tif_jar/jai_imageio.jar b/tif_jar/jai_imageio.jar new file mode 100644 index 0000000..359551e Binary files /dev/null and b/tif_jar/jai_imageio.jar differ diff --git a/user.ini b/user.ini new file mode 100644 index 0000000..13b92ee --- /dev/null +++ b/user.ini @@ -0,0 +1,4 @@ +ImageType=TIF +NeedBinarizing=TRUE +NeedBlurring=TRUE +NeedConsolidating=TRUE \ No newline at end of file