diff --git a/PageSegmentation_Workflow.html b/PageSegmentation_Workflow.html
new file mode 100644
index 0000000..e573976
--- /dev/null
+++ b/PageSegmentation_Workflow.html
@@ -0,0 +1,11 @@
+
+
+
+
+Untitled Diagram
+
+
+
+
+
+
\ No newline at end of file
diff --git a/README.md b/README.md
index 274832a..37fa0db 100755
--- a/README.md
+++ b/README.md
@@ -111,3 +111,4 @@ All output files created by the program are created in the data directory:
## Notes ##
* If using IDE, run the command line script first before running the file in
the IDE.
+* If one want to skip a binarizing process since the image is already binarized (such as .tiff), edit user.ini file NeedBinarizing=FALSE.
diff --git a/RunAll.sh b/RunAll.sh
index 2b688bf..d56371a 100755
--- a/RunAll.sh
+++ b/RunAll.sh
@@ -5,7 +5,7 @@ echo "AIDA: Poem Identifier"
cd data/
find Output_Snippets/ -type f > snippetPathList.txt
-find Output_Snippets/ -not -path '*/\.*' -type f -printf "%f\n" > SnippetNameList.txt
+find Output_Snippets/ -not -path '*/\.*' -type f -name '*.jpg' -exec basename {} \; > SnippetNameList.txt;
cd ../
NOME=$1
c=0
diff --git a/RunPageSegmentation.sh b/RunPageSegmentation.sh
index cb6f6aa..e7a0c84 100755
--- a/RunPageSegmentation.sh
+++ b/RunPageSegmentation.sh
@@ -1,6 +1,6 @@
#!/bin/bash
cd src/
-javac execute/RunPageSegmentation.java
+javac -cp ../tif_jar/jai_imageio.jar:./ execute/RunPageSegmentation.java
cd ../
beginYear=$1
@@ -22,7 +22,8 @@ for i in $( ls ); do
totalCount=$((totalCount + 1))
current=$(pwd)
cd $src
- java execute/RunPageSegmentation $current/$k
+#java execute/RunPageSegmentation $current/$k
+ java -cp ../tif_jar/jai_imageio.jar:./ execute/RunPageSegmentation $current/$k
cd $current
done
cd ..
diff --git a/data/FullPages/test1/test2/WO2_B0001ORIWEEJO_1718_12_20-0001.tif b/data/FullPages/test1/test2/WO2_B0001ORIWEEJO_1718_12_20-0001.tif
new file mode 100755
index 0000000..30b9b84
Binary files /dev/null and b/data/FullPages/test1/test2/WO2_B0001ORIWEEJO_1718_12_20-0001.tif differ
diff --git a/data/SnippetNameList.txt b/data/SnippetNameList.txt
new file mode 100644
index 0000000..e69de29
diff --git a/data/snippetPathList.txt b/data/snippetPathList.txt
new file mode 100644
index 0000000..20e5444
--- /dev/null
+++ b/data/snippetPathList.txt
@@ -0,0 +1 @@
+Output_Snippets//.keep
diff --git a/src/execute/RunPageSegmentation.java b/src/execute/RunPageSegmentation.java
index db850d7..e43b76a 100644
--- a/src/execute/RunPageSegmentation.java
+++ b/src/execute/RunPageSegmentation.java
@@ -20,205 +20,240 @@
import models.Image;
import blurring.ImageBlurrer;
-public class RunPageSegmentation {
+import models.ReadIni;
+import models.EnumCollection;
- /**
- * This main function is responsible for running the Full-page segmentation algorithm.
- * It reads in either a single .jpg image or a text list of .jpg images and performs segmentation.
- * If no argument given it runs on all images. Placement of these images is different from .txt or .jpg options.
- * @param args
- */
- public static void main(String[] args) {
- if(args.length > 0){
- //process images from a text file list
- if(args[0].contains(".txt")){
- String imageList = args[0];
- File inputImages = new File(Constants.imageLists,imageList);
- BufferedReader br = null;
- try {
- br = new BufferedReader(new FileReader(inputImages));
- } catch (FileNotFoundException e1) {
- e1.printStackTrace();
- }
- try {
- String line = br.readLine();
- int i = 1;
- while(line != null){
- System.out.println("Image "+i);
- Image img = importImage(line);
- try{
- segmentImage(img, false);
- }catch(RuntimeException r){
- System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the image isn't rotated and has good contrast");
- }catch(Exception e){
- System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the page isn't rotated and has good contrast");
- }
- line = br.readLine();
- i++;
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- //process only one image
- }else if(args[0].contains(".jpg")){
- Image img = importImage(args[0]);
- try{
- segmentImage(img, false);
- } catch(Exception e){
- e.printStackTrace();
- //System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the page isn't rotated");
- }
- }
- //Process all images in the AIDA file structure created by the image retrieval script.
- //Currently legacy code as we now have a script that will do this process using a bash script
- //that repeatedly calls the process single image option.
- }else{
- File start = new File(Constants.fullPagePath);
- File successFile = new File(Constants.successSegment);
- BufferedWriter successStream = null;
- try{
- successStream = new BufferedWriter(new FileWriter(successFile,false));
- }catch(Exception e){
- System.out.println("Failed to create BufferedWriter");
- }
- StringBuilder sb = new StringBuilder();
- File[] newspapers = start.listFiles(new FileFilter(){
- @Override
- public boolean accept(File file){
- return !file.isHidden();
- }
- });
- int numOfNewspapers = newspapers.length;
- int currentPaper = 0;
- System.out.println("Segmenting Images...");
- for(File file : newspapers){
- currentPaper++;
- File[] issues = file.listFiles(new FileFilter(){
- @Override
- public boolean accept(File file){
- return !file.isHidden();
- }
- });
- int numOfIssues = issues.length;
- int currentIssue = 0;
- for(File issue : issues){
- currentIssue++;
- File[] images = issue.listFiles(new FileFilter(){
- @Override
- public boolean accept(File file){
- return !file.isHidden();
- }
- });
- int numOfImages = images.length;
- int currentImage = 0;
- for(File image : images){
- if(image.getName().contains(".jpg")){
- currentImage++;
- String path = image.getAbsolutePath();
- Image img = importImage(path);
- try{
- segmentImage(img, false);
- System.out.print("\rSegmented: Newspaper "+currentPaper+"/"+numOfNewspapers+" Issue "+currentIssue+"/"+numOfIssues+" Image "+currentImage+"/"+numOfImages+" in "+file.getName()+" ");
- sb.append(img.getName()+"\n");
- }catch(RuntimeException r){
- System.out.println();
- System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the image isn't rotated and has good contrast");
- r.printStackTrace();
- File output = new File(Constants.data,"FailedList.txt");
+public class RunPageSegmentation {
+ /**
+ * This main function is responsible for running the Full-page segmentation algorithm.
+ * It reads in either a single .jpg image or a text list of .jpg images and performs segmentation.
+ * If no argument given it runs on all images. Placement of these images is different from .txt or .jpg options.
+ * @param args
+ */
+ public static void main(String[] args) {
+ if(args.length > 0){
+ //process images from a text file list
+ if(args[0].contains(".txt")){
+ String imageList = args[0];
+ File inputImages = new File(Constants.imageLists,imageList);
+ BufferedReader br = null;
+ try {
+ br = new BufferedReader(new FileReader(inputImages));
+ } catch (FileNotFoundException e1) {
+ e1.printStackTrace();
+ }
+
+ try {
+ String line = br.readLine();
+ int i = 1;
+ while(line != null){
+ System.out.println("Image "+i);
+ Image img = importImage(line);
+ try{
+ segmentImage(img, false);
+ }catch(RuntimeException r){
+ System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the image isn't rotated and has good contrast");
+ }catch(Exception e){
+ System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the page isn't rotated and has good contrast");
+ }
+ line = br.readLine();
+ i++;
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ //process only one image
+ }else if(args[0].contains(".jpg") || args[0].contains(".tif")){
+ Image img = importImage(args[0]);
+ try{
+ segmentImage(img, true);
+ } catch(Exception e){
+ e.printStackTrace();
+ //System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the page isn't rotated");
+ }
+ }
+ //Process all images in the AIDA file structure created by the image retrieval script.
+ //Currently legacy code as we now have a script that will do this process using a bash script
+ //that repeatedly calls the process single image option.
+ }else{
+ File start = new File(Constants.fullPagePath);
+ File successFile = new File(Constants.successSegment);
+ BufferedWriter successStream = null;
+ try{
+ successStream = new BufferedWriter(new FileWriter(successFile,false));
+ }catch(Exception e){
+ System.out.println("Failed to create BufferedWriter");
+ }
+ StringBuilder sb = new StringBuilder();
+ File[] newspapers = start.listFiles(new FileFilter(){
+ @Override
+ public boolean accept(File file){
+ return !file.isHidden();
+ }
+ });
+ int numOfNewspapers = newspapers.length;
+ int currentPaper = 0;
+ System.out.println("Segmenting Images...");
+ for(File file : newspapers){
+ currentPaper++;
+ File[] issues = file.listFiles(new FileFilter(){
+ @Override
+ public boolean accept(File file){
+ return !file.isHidden();
+ }
+ });
+ int numOfIssues = issues.length;
+ int currentIssue = 0;
+ for(File issue : issues){
+ currentIssue++;
+ File[] images = issue.listFiles(new FileFilter(){
+ @Override
+ public boolean accept(File file){
+ return !file.isHidden();
+ }
+ });
+ int numOfImages = images.length;
+ int currentImage = 0;
+ for(File image : images){
+ if(image.getName().contains(".jpg") || image.getName().contains(".tif")){
+ currentImage++;
+ String path = image.getAbsolutePath();
+ Image img = importImage(path);
+ try{
+ segmentImage(img, false);
+ System.out.print("\rSegmented: Newspaper "+currentPaper+"/"+numOfNewspapers+" Issue "+currentIssue+"/"+numOfIssues+" Image "+currentImage+"/"+numOfImages+" in "+file.getName()+" ");
+ sb.append(img.getName()+"\n");
+ }catch(RuntimeException r){
+ System.out.println();
+ System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the image isn't rotated and has good contrast");
+ r.printStackTrace();
+ File output = new File(Constants.data,"FailedList.txt");
try {
- if(!output.exists())
- output.createNewFile();
- FileWriter writer = new FileWriter(output,true);
- writer.write("Runtime Exception: "+img.getName()+"\n");
- writer.close();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
-
- }catch(Exception e){
- System.out.println();
- System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the page isn't rotated and has good contrast");
-
- File output = new File(Constants.data,"FailedList.txt");
- try {
- if(!output.exists())
- output.createNewFile();
- FileWriter writer = new FileWriter(output,true);
- writer.write("Normal Exception: "+img.getName()+"\n");
- writer.close();
- } catch (IOException i) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
-
- }
- }
- }
- }
- }
- try{
- successStream.append(sb.toString());
- successStream.flush();
- successStream.close();
- }catch(Exception e){
- System.out.println("Writing to file Failed. Unexpected error");
- }
- }
- }
-
-
- /**
- * This method imports the image whose filepath is passed as a parameter and returns an models.Image object.
- * @param inputFilename
- * @return models.Image
- */
- public static Image importImage(String inputFilename){
- BufferedImage inputImage = null;
- int w=0,h=0;
- try {
- //System.out.println("Loading Image..");
- File inputImageFile = new File(inputFilename);
- inputImage = ImageIO.read(inputImageFile);
-
- } catch (IOException e) {
- e.printStackTrace();
- }
-
- Raster raster = inputImage.getData();
- w = raster.getWidth();
- h = raster.getHeight();
- Image img = new Image(h,w);
- int pixels[][] = new int[h][w];
-
- //read the pixels from the input image
- for (int i = 0; i < h; i++) {
- for (int j = 0; j < w; j++) {
- pixels[i][j] = raster.getSample(j, i, 0);
- }
- }
-
- img.setByteImage(pixels);
- img.setByteImage2(pixels);
-
- img.setName(inputFilename.substring(inputFilename.lastIndexOf("/")+1));
-
- return img;
- }
-
- /**
- * A helper method for grouping together the function calls for image segmentation.
- * @param img
- */
- public static void segmentImage(Image img, boolean shouldShowColumns){
- ImageBlurrer imb = new ImageBlurrer();
+ if(!output.exists())
+ output.createNewFile();
+ FileWriter writer = new FileWriter(output,true);
+ writer.write("Runtime Exception: "+img.getName()+"\n");
+ writer.close();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ }catch(Exception e){
+ System.out.println();
+ System.out.println("ERROR: Unable to segment "+img.getName()+"\nPlease make sure that the page isn't rotated and has good contrast");
+
+ File output = new File(Constants.data,"FailedList.txt");
+ try {
+ if(!output.exists())
+ output.createNewFile();
+ FileWriter writer = new FileWriter(output,true);
+ writer.write("Normal Exception: "+img.getName()+"\n");
+ writer.close();
+ } catch (IOException i) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ }
+ }
+ }
+ }
+ }
+ try{
+ successStream.append(sb.toString());
+ successStream.flush();
+ successStream.close();
+ }catch(Exception e){
+ System.out.println("Writing to file Failed. Unexpected error");
+ }
+ }
+ }
+
+
+ /**
+ * This method imports the image whose filepath is passed as a parameter and returns an models.Image object.
+ * @param inputFilename
+ * @return models.Image
+ */
+ public static Image importImage(String inputFilename){
+ BufferedImage inputImage = null;
+ int w=0,h=0;
+ try {
+ //System.out.println("Loading Image..");
+ File inputImageFile = new File(inputFilename);
+ inputImage = ImageIO.read(inputImageFile);
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ Raster raster = inputImage.getData();
+ w = raster.getWidth();
+ h = raster.getHeight();
+ Image img = new Image(h,w);
+ int pixels[][] = new int[h][w];
+
+ //read the pixels from the input image
+ for (int i = 0; i < h; i++) {
+ for (int j = 0; j < w; j++) {
+ pixels[i][j] = raster.getSample(j, i, 0);
+ }
+ }
+
+ // Copy Yi's code
+ /***start***/
+ Boolean isBin = true;
+ /***Safety check***/
+ for(int i = 0; i < h; i++){
+ for(int j = 0; j < w; j++) {
+ if(pixels[i][j] != 1) {
+ isBin = false;
+ }
+ if(pixels[i][j] == 0) {
+ isBin = true;
+ }
+ if(!isBin) { break; }
+ }
+ if(!isBin) { break; }
+ }
+ if(isBin) {
+ for(int i = 0; i < h; i++){
+ for(int j = 0; j < w; j++) {
+ if(pixels[i][j] == 1){
+ pixels[i][j] = 255;
+ }
+ }
+ }
+ }
+ /***end***/
+ img.setByteImage(pixels);
+ img.setByteImage2(pixels);
+
+ img.setName(inputFilename.substring(inputFilename.lastIndexOf("/")+1));
+
+ return img;
+ }
+
+ /**
+ * A helper method for grouping together the function calls for image segmentation.
+ * @param img
+ */
+ public static void segmentImage(Image img, boolean shouldShowColumns){
+
+ ImageBlurrer imb = new ImageBlurrer();
+ ReadIni myConfig = new ReadIni();
+ EnumCollection myEnums = new EnumCollection();
//boolean values indicate if we want to output the intermediate stages of binarizing the image
//Stages: contrasted, binary, binary with Morphology
- imb.binarizeSegment(img, false, false, false);
-
- int shouldContinue = img.findColumnBreaks();
+
+ // 9.17.2017. Added this branch to skip binarization if image is already binarized
+ if(myConfig.GetNeedBinarizing() == myEnums.GetIntOfTrueFalse("TRUE")){
+ imb.binarizeSegment(img, false, false, false);
+ }
+
+ int shouldContinue = img.findColumnBreaks();
System.out.println(shouldContinue);
//Continue the process if image exited with no error
@@ -257,8 +292,8 @@ public static void segmentImage(Image img, boolean shouldShowColumns){
error = "Columns are only on half of the page, "+img.getColumnBreaks();
break;
case 4:
- error = "Std Dev Above 150, "+img.getColumnBreaks();
- break;
+ error = "Std Dev Above 150, "+img.getColumnBreaks();
+ break;
}
File output = new File(Constants.data, "imageFailedNeedHuman.txt");
try {
@@ -272,5 +307,5 @@ public static void segmentImage(Image img, boolean shouldShowColumns){
ioe.printStackTrace();
}
}
- }
+ }
}
diff --git a/src/execute/RunProgram.java b/src/execute/RunProgram.java
index 0a3ff7f..452fb7c 100644
--- a/src/execute/RunProgram.java
+++ b/src/execute/RunProgram.java
@@ -17,6 +17,8 @@
import models.BlurredImage;
import models.Image;
import blurring.ImageBlurrer;
+import models.EnumCollection;
+import models.ReadIni;
/**
* The method that is responsible for all processes. It calls methods from blurring.ImageBlurrer to blur, binarize and use
@@ -28,6 +30,9 @@
*/
public class RunProgram {
public static void main(String args[]) throws IOException{
+ ReadIni myConfig = new ReadIni();
+ EnumCollection myEnums = new EnumCollection();
+
int d = Integer.parseInt(args[0]);
final int tripleRegular = 1,consolidated = 2,train = 3,test = 4;
@@ -47,7 +52,7 @@ public static void main(String args[]) throws IOException{
// Manual Controls
int blurMode = consolidated; // tripleRegular or consolidated
- int whatSet = test; // 'test' for testing set of 7500+ snippets. 'train' for 400 training snippets
+ int whatSet = train; // 'test' for testing set of 7500+ snippets. 'train' for 400 training snippets
boolean outCustom = true; // Set true output the image, false when output of images themselves is not necessary
boolean outBinary = true; // Set true to output binary images
boolean outBlurred= false; // Set true to output blurred images.
@@ -128,7 +133,11 @@ public static void main(String args[]) throws IOException{
// ---------------------------------------------------------------------------
imb.blurImage(img5 ,outBlurred);
- imb.binarizeImage(img5, outBinary);
+
+ // 9.17.2017. Added this branch to skip binarization if image is already binarized
+ if(myConfig.GetNeedBinarizing() == myEnums.GetIntOfTrueFalse("FALSE")){
+ imb.binarizeImage(img5, outBinary);
+ }
if(isCustom){
imb.customBlur(img5,outCustom);
}
@@ -222,7 +231,8 @@ public static Image importImage(String inputFilename){
int w=0,h=0;
try {
System.out.println("Loading Image..");
- File inputImageFile = new File(Constants.data, inputFilename);
+ //File inputImageFile = new File(Constants.data, inputFilename);
+ File inputImageFile = new File("/Users/Mike/aida/data/", inputFilename);
BufferedImage inputImage = ImageIO.read(inputImageFile);
Raster raster = inputImage.getData();
diff --git a/src/models/EnumCollection.java b/src/models/EnumCollection.java
new file mode 100644
index 0000000..ffae9c5
--- /dev/null
+++ b/src/models/EnumCollection.java
@@ -0,0 +1,34 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package models;
+
+/**
+ *
+ * @author Mike
+ */
+public class EnumCollection {
+ // Enumurator for the Image Types
+ private enum EnumImageType{
+ TIF, JPG
+ }
+ // Enumurator for the T/F
+ private enum EnumTrueFalse{
+ FALSE, TRUE
+ }
+
+ public int GetIntOfImageType(String s){
+ return EnumImageType.valueOf(s.toUpperCase()).ordinal();
+ }
+ public int GetIntOfTrueFalse(String s){
+ return EnumTrueFalse.valueOf(s.toUpperCase()).ordinal();
+ }
+ public static boolean ContainsImageType(String s){
+ for (EnumImageType enumImage: EnumImageType.values()){
+ return (enumImage.name().equalsIgnoreCase(s)) ? true : false;
+ }
+ return false;
+ }
+}
diff --git a/src/models/Image.java b/src/models/Image.java
index 83fd751..f20bcbc 100644
--- a/src/models/Image.java
+++ b/src/models/Image.java
@@ -22,388 +22,388 @@ public class Image {
private int BLACK_COLUMN_SEPARATION_MIN = 125;
private int EDGE_COLUMN_DISTANCE_MAX = 100;
private int COLUMN_SEPARATION_MIN = 75;
-
- public int[][] byteImage;
- protected int[][] byteImage2;
-
- protected String name;
- protected String parentName;
- protected boolean containsPoem;
- protected boolean checkValue;
- protected int vertical;
- protected int horizontal;
-
- protected double stanzaMean;
- protected double stanzaStandardDeviation;
- protected double stanzaMin;
- protected double stanzaMax;
- protected double stanzaRange;
-
- protected double jaggedLineMean;
- protected double jaggedLineStandardDeviation;
- protected double jaggedMin;
- protected double jaggedMax;
- protected double jaggedRange;
-
- protected double marginMean;
- protected double marginStdDev;
- protected double marginMin;
- protected double marginMax;
- protected double marginRange;
-
- protected double lengthMean;
- protected double lengthStdDev;
- protected double lengthMin;
- protected double lengthMax;
- protected double lengthRange;
-
- protected ArrayList columnBreaks;
-
-
-
- public double getLengthMean() {
- return lengthMean;
- }
-
- public void setLengthMean(double lengthMean) {
- this.lengthMean = lengthMean;
- }
-
- public double getLengthStdDev() {
- return lengthStdDev;
- }
-
- public void setLengthStdDev(double lengthStdDev) {
- this.lengthStdDev = lengthStdDev;
- }
-
- public double getLengthMin() {
- return lengthMin;
- }
-
- public void setLengthMin(double lengthMin) {
- this.lengthMin = lengthMin;
- }
-
- public double getLengthMax() {
- return lengthMax;
- }
-
- public void setLengthMax(double lengthMax) {
- this.lengthMax = lengthMax;
- }
-
- public double getLengthRange() {
- return lengthRange;
- }
-
- public void setLengthRange(double lengthRange) {
- this.lengthRange = lengthRange;
- }
- protected int blurLevel;
-
- public double getStanzaMin() {
- return stanzaMin;
- }
-
- public void setStanzaMin(double stanzaMin) {
- this.stanzaMin = stanzaMin;
- }
-
- public double getStanzaMax() {
- return stanzaMax;
- }
-
- public void setStanzaMax(double stanzaMax) {
- this.stanzaMax = stanzaMax;
- }
-
- public double getStanzaRange() {
- return stanzaRange;
- }
-
- public void setStanzaRange(double stanzaRange) {
- this.stanzaRange = stanzaRange;
- }
-
- public double getJaggedMin() {
- return jaggedMin;
- }
-
- public void setJaggedMin(double jaggedMin) {
- this.jaggedMin = jaggedMin;
- }
-
- public double getJaggedMax() {
- return jaggedMax;
- }
-
- public void setJaggedMax(double jaggedMax) {
- this.jaggedMax = jaggedMax;
- }
-
- public double getJaggedRange() {
- return jaggedRange;
- }
-
- public void setJaggedRange(double jaggedRange) {
- this.jaggedRange = jaggedRange;
- }
-
- public double getMarginMin() {
- return marginMin;
- }
-
- public void setMarginMin(double marginMin) {
- this.marginMin = marginMin;
- }
-
- public double getMarginMax() {
- return marginMax;
- }
-
- public void setMarginMax(double marginMax) {
- this.marginMax = marginMax;
- }
-
- public double getMarginRange() {
- return marginRange;
- }
-
- public void setMarginRange(double marginRange) {
- this.marginRange = marginRange;
- }
-
- public double getMarginMean() {
- return marginMean;
- }
-
- public void setMarginMean(double marginMean) {
- this.marginMean = marginMean;
- }
-
- public double getMarginStdDev() {
- return marginStdDev;
- }
-
- public void setMarginStdDev(double marginStdDev) {
- this.marginStdDev = marginStdDev;
- }
- protected double leftMarginSize;
- protected double rightMarginSize;
-
- public void setParentName(String s){
- this.parentName = s;
- }
-
- public String getParentName(){
- return this.parentName;
- }
-
- public Image(){
- this.name = "";
- this.containsPoem = false;
- this.checkValue = false;
- this.vertical = 0;
- this.horizontal = 0;
- this.stanzaMean = 0;
- this.stanzaStandardDeviation = 0;
- this.blurLevel = 3;
- this.jaggedLineMean = 0;
- this.jaggedLineStandardDeviation = 0;
- this.leftMarginSize = 0;
- this.rightMarginSize = 0;
-
- }
-
- public Image(int h, int w){
- this.name = "";
- this.containsPoem = false;
- this.checkValue = false;
- this.vertical = h;
- this.horizontal = w;
- this.stanzaMean = 0;
- this.stanzaStandardDeviation = 0;
- this.blurLevel = 3;
- this.jaggedLineMean = 0;
- this.jaggedLineStandardDeviation = 0;
- this.leftMarginSize = 0;
- this.rightMarginSize = 0;
- this.byteImage = new int[h][w];
- this.byteImage2 = new int[h][w];
- }
-
- public int[][] getByteImage2() {
- return byteImage2;
- }
-
- public void setByteImage2(int[][] byteImage2) {
- this.byteImage2 = byteImage2;
- }
-
- public double getLeftMarginSize() {
- return leftMarginSize;
- }
-
- public void setLeftMarginSize(double leftMarginSize) {
- this.leftMarginSize = leftMarginSize;
- }
-
- public double getRightMarginSize() {
- return rightMarginSize;
- }
-
- public void setRightMarginSize(double rightMarginSize) {
- this.rightMarginSize = rightMarginSize;
- }
-
- public double getJaggedLineMean() {
- return jaggedLineMean;
- }
-
- public void setJaggedLineMean(double jaggedLineMean) {
- this.jaggedLineMean = jaggedLineMean;
- }
-
- public double getJaggedLineStandardDeviation() {
- return jaggedLineStandardDeviation;
- }
-
- public void setJaggedLineStandardDeviation(double jaggedLineStandardDeviation) {
- this.jaggedLineStandardDeviation = jaggedLineStandardDeviation;
- }
-
- public int getVertical() {
- return vertical;
- }
-
- public void setVertical(int vertical) {
- this.vertical = vertical;
- }
-
- public int getHorizontal() {
- return horizontal;
- }
-
- public void setHorizontal(int horizontal) {
- this.horizontal = horizontal;
- }
-
-
-
- public int getBlurLevel() {
- return blurLevel;
- }
-
- public void setBlurLevel(int blurLevel) {
- this.blurLevel = blurLevel;
- }
-
- public double getStanzaMean() {
- return stanzaMean;
- }
-
- public void setStanzaMean(double stanzaMean) {
- this.stanzaMean = stanzaMean;
- }
-
- public double getStanzaStdDev() {
- return stanzaStandardDeviation;
- }
-
- public void setStanzaStandardDeviation(double stanzaStandardDeviation) {
- this.stanzaStandardDeviation = stanzaStandardDeviation;
- }
-
- public Image(String fileName, boolean checkFigure){
- this.name = fileName;
- this.checkValue = checkFigure;
-
- }
-
- public boolean getCheckValue() {
- return checkValue;
- }
- public void setCheckValue(boolean checkValue) {
- this.checkValue = checkValue;
- }
- public int[][] getByteImage() {
- return byteImage;
- }
- public void setByteImage(int[][] byteImage) {
- this.byteImage = byteImage;
- }
- public String getName() {
- return name;
- }
- public void setName(String name) {
- this.name = name;
- }
- public boolean isContainsPoem() {
- return containsPoem;
- }
- public void setContainsPoem(boolean containsPoem) {
- this.containsPoem = containsPoem;
- }
+
+ public int[][] byteImage;
+ protected int[][] byteImage2;
+
+ protected String name;
+ protected String parentName;
+ protected boolean containsPoem;
+ protected boolean checkValue;
+ protected int vertical;
+ protected int horizontal;
+
+ protected double stanzaMean;
+ protected double stanzaStandardDeviation;
+ protected double stanzaMin;
+ protected double stanzaMax;
+ protected double stanzaRange;
+
+ protected double jaggedLineMean;
+ protected double jaggedLineStandardDeviation;
+ protected double jaggedMin;
+ protected double jaggedMax;
+ protected double jaggedRange;
+
+ protected double marginMean;
+ protected double marginStdDev;
+ protected double marginMin;
+ protected double marginMax;
+ protected double marginRange;
+
+ protected double lengthMean;
+ protected double lengthStdDev;
+ protected double lengthMin;
+ protected double lengthMax;
+ protected double lengthRange;
+
+ protected ArrayList columnBreaks;
+
+
+
+ public double getLengthMean() {
+ return lengthMean;
+ }
+
+ public void setLengthMean(double lengthMean) {
+ this.lengthMean = lengthMean;
+ }
+
+ public double getLengthStdDev() {
+ return lengthStdDev;
+ }
+
+ public void setLengthStdDev(double lengthStdDev) {
+ this.lengthStdDev = lengthStdDev;
+ }
+
+ public double getLengthMin() {
+ return lengthMin;
+ }
+
+ public void setLengthMin(double lengthMin) {
+ this.lengthMin = lengthMin;
+ }
+
+ public double getLengthMax() {
+ return lengthMax;
+ }
+
+ public void setLengthMax(double lengthMax) {
+ this.lengthMax = lengthMax;
+ }
+
+ public double getLengthRange() {
+ return lengthRange;
+ }
+
+ public void setLengthRange(double lengthRange) {
+ this.lengthRange = lengthRange;
+ }
+ protected int blurLevel;
+
+ public double getStanzaMin() {
+ return stanzaMin;
+ }
+
+ public void setStanzaMin(double stanzaMin) {
+ this.stanzaMin = stanzaMin;
+ }
+
+ public double getStanzaMax() {
+ return stanzaMax;
+ }
+
+ public void setStanzaMax(double stanzaMax) {
+ this.stanzaMax = stanzaMax;
+ }
+
+ public double getStanzaRange() {
+ return stanzaRange;
+ }
+
+ public void setStanzaRange(double stanzaRange) {
+ this.stanzaRange = stanzaRange;
+ }
+
+ public double getJaggedMin() {
+ return jaggedMin;
+ }
+
+ public void setJaggedMin(double jaggedMin) {
+ this.jaggedMin = jaggedMin;
+ }
+
+ public double getJaggedMax() {
+ return jaggedMax;
+ }
+
+ public void setJaggedMax(double jaggedMax) {
+ this.jaggedMax = jaggedMax;
+ }
+
+ public double getJaggedRange() {
+ return jaggedRange;
+ }
+
+ public void setJaggedRange(double jaggedRange) {
+ this.jaggedRange = jaggedRange;
+ }
+
+ public double getMarginMin() {
+ return marginMin;
+ }
+
+ public void setMarginMin(double marginMin) {
+ this.marginMin = marginMin;
+ }
+
+ public double getMarginMax() {
+ return marginMax;
+ }
+
+ public void setMarginMax(double marginMax) {
+ this.marginMax = marginMax;
+ }
+
+ public double getMarginRange() {
+ return marginRange;
+ }
+
+ public void setMarginRange(double marginRange) {
+ this.marginRange = marginRange;
+ }
+
+ public double getMarginMean() {
+ return marginMean;
+ }
+
+ public void setMarginMean(double marginMean) {
+ this.marginMean = marginMean;
+ }
+
+ public double getMarginStdDev() {
+ return marginStdDev;
+ }
+
+ public void setMarginStdDev(double marginStdDev) {
+ this.marginStdDev = marginStdDev;
+ }
+ protected double leftMarginSize;
+ protected double rightMarginSize;
+
+ public void setParentName(String s){
+ this.parentName = s;
+ }
+
+ public String getParentName(){
+ return this.parentName;
+ }
+
+ public Image(){
+ this.name = "";
+ this.containsPoem = false;
+ this.checkValue = false;
+ this.vertical = 0;
+ this.horizontal = 0;
+ this.stanzaMean = 0;
+ this.stanzaStandardDeviation = 0;
+ this.blurLevel = 3;
+ this.jaggedLineMean = 0;
+ this.jaggedLineStandardDeviation = 0;
+ this.leftMarginSize = 0;
+ this.rightMarginSize = 0;
+
+ }
+
+ public Image(int h, int w){
+ this.name = "";
+ this.containsPoem = false;
+ this.checkValue = false;
+ this.vertical = h;
+ this.horizontal = w;
+ this.stanzaMean = 0;
+ this.stanzaStandardDeviation = 0;
+ this.blurLevel = 3;
+ this.jaggedLineMean = 0;
+ this.jaggedLineStandardDeviation = 0;
+ this.leftMarginSize = 0;
+ this.rightMarginSize = 0;
+ this.byteImage = new int[h][w];
+ this.byteImage2 = new int[h][w];
+ }
+
+ public int[][] getByteImage2() {
+ return byteImage2;
+ }
+
+ public void setByteImage2(int[][] byteImage2) {
+ this.byteImage2 = byteImage2;
+ }
+
+ public double getLeftMarginSize() {
+ return leftMarginSize;
+ }
+
+ public void setLeftMarginSize(double leftMarginSize) {
+ this.leftMarginSize = leftMarginSize;
+ }
+
+ public double getRightMarginSize() {
+ return rightMarginSize;
+ }
+
+ public void setRightMarginSize(double rightMarginSize) {
+ this.rightMarginSize = rightMarginSize;
+ }
+
+ public double getJaggedLineMean() {
+ return jaggedLineMean;
+ }
+
+ public void setJaggedLineMean(double jaggedLineMean) {
+ this.jaggedLineMean = jaggedLineMean;
+ }
+
+ public double getJaggedLineStandardDeviation() {
+ return jaggedLineStandardDeviation;
+ }
+
+ public void setJaggedLineStandardDeviation(double jaggedLineStandardDeviation) {
+ this.jaggedLineStandardDeviation = jaggedLineStandardDeviation;
+ }
+
+ public int getVertical() {
+ return vertical;
+ }
+
+ public void setVertical(int vertical) {
+ this.vertical = vertical;
+ }
+
+ public int getHorizontal() {
+ return horizontal;
+ }
+
+ public void setHorizontal(int horizontal) {
+ this.horizontal = horizontal;
+ }
+
+
+
+ public int getBlurLevel() {
+ return blurLevel;
+ }
+
+ public void setBlurLevel(int blurLevel) {
+ this.blurLevel = blurLevel;
+ }
+
+ public double getStanzaMean() {
+ return stanzaMean;
+ }
+
+ public void setStanzaMean(double stanzaMean) {
+ this.stanzaMean = stanzaMean;
+ }
+
+ public double getStanzaStdDev() {
+ return stanzaStandardDeviation;
+ }
+
+ public void setStanzaStandardDeviation(double stanzaStandardDeviation) {
+ this.stanzaStandardDeviation = stanzaStandardDeviation;
+ }
+
+ public Image(String fileName, boolean checkFigure){
+ this.name = fileName;
+ this.checkValue = checkFigure;
+
+ }
+
+ public boolean getCheckValue() {
+ return checkValue;
+ }
+ public void setCheckValue(boolean checkValue) {
+ this.checkValue = checkValue;
+ }
+ public int[][] getByteImage() {
+ return byteImage;
+ }
+ public void setByteImage(int[][] byteImage) {
+ this.byteImage = byteImage;
+ }
+ public String getName() {
+ return name;
+ }
+ public void setName(String name) {
+ this.name = name;
+ }
+ public boolean isContainsPoem() {
+ return containsPoem;
+ }
+ public void setContainsPoem(boolean containsPoem) {
+ this.containsPoem = containsPoem;
+ }
public ArrayList getColumnBreaks(){
- return columnBreaks;
- }
- public void setColumnBreaks(ArrayList columns){
- this.columnBreaks = columns;
- }
- /**
- * Method used to find the separation columns of a newspaper.
- * These columns can be represented visually by either whitespace separating text columns or
- * by continuous, straight black lines.
- */
- public int findColumnBreaks(){
-
- ArrayList whiteColumns = new ArrayList();
- int whiteCount;
- int columnCount = 0;
- for(int j = 0; j < this.horizontal; j++){
- whiteCount = 0;
- for(int i = 0; i < this.vertical; i++){
- if(this.byteImage[i][j] == 255){
- whiteCount++;
- }
- }
- if(whiteCount >= (this.vertical*.9)){
- columnCount++;
- if(columnCount < COLUMN_COUNT_MAX){
- whiteColumns.add(j);
- }
- }else if(whiteCount < (this.vertical*.9)){
- if(whiteColumns.contains(j-1)){
- columnCount = 0;
- }else if(!whiteColumns.contains(j-1) && columnCount >= COLUMN_COUNT_MAX){
- columnCount = 0;
- }
- }
- }
-
-
- ArrayList columns = new ArrayList();
- int marker = 0;
- /*Search for the part of the array that makes a large jump, this indicates the end of
- *a section of white columns and the beginning of a new section. Once found we find the
- *middle index of the section of white columns and then set the marker to be the first
- *white column of the next section of white columns.
- */
- for(int k = 0; k < whiteColumns.size()-1; k++){
- if(whiteColumns.get(k+1) - whiteColumns.get(k) > WHITE_COLUMN_SEPARATION_MIN){
- int index = k-((k - marker)/2);
- columns.add(whiteColumns.get(index));
- marker = k+1;
- }
- //Special condition used to find middle point of the final batch of white columns
- //(the far right hand side of the newspaper)
- if(k+1 == whiteColumns.size()-1){
- int index = (k+1)-(((k+1) - marker)/2);
- columns.add(whiteColumns.get(index));
- }
- }
-
+ return columnBreaks;
+ }
+ public void setColumnBreaks(ArrayList columns){
+ this.columnBreaks = columns;
+ }
+ /**
+ * Method used to find the separation columns of a newspaper.
+ * These columns can be represented visually by either whitespace separating text columns or
+ * by continuous, straight black lines.
+ */
+ public int findColumnBreaks(){
+
+ ArrayList whiteColumns = new ArrayList();
+ int whiteCount;
+ int columnCount = 0;
+ for(int j = 0; j < this.horizontal; j++){
+ whiteCount = 0;
+ for(int i = 0; i < this.vertical; i++){
+ if(this.byteImage[i][j] == 255){
+ whiteCount++;
+ }
+ }
+ if(whiteCount >= (this.vertical*.9)){
+ columnCount++;
+ if(columnCount < COLUMN_COUNT_MAX){
+ whiteColumns.add(j);
+ }
+ }else if(whiteCount < (this.vertical*.9)){
+ if(whiteColumns.contains(j-1)){
+ columnCount = 0;
+ }else if(!whiteColumns.contains(j-1) && columnCount >= COLUMN_COUNT_MAX){
+ columnCount = 0;
+ }
+ }
+ }
+
+
+ ArrayList columns = new ArrayList();
+ int marker = 0;
+ /*Search for the part of the array that makes a large jump, this indicates the end of
+ *a section of white columns and the beginning of a new section. Once found we find the
+ *middle index of the section of white columns and then set the marker to be the first
+ *white column of the next section of white columns.
+ */
+ for(int k = 0; k < whiteColumns.size()-1; k++){
+ if(whiteColumns.get(k+1) - whiteColumns.get(k) > WHITE_COLUMN_SEPARATION_MIN){
+ int index = k-((k - marker)/2);
+ columns.add(whiteColumns.get(index));
+ marker = k+1;
+ }
+ //Special condition used to find middle point of the final batch of white columns
+ //(the far right hand side of the newspaper)
+ if(k+1 == whiteColumns.size()-1){
+ int index = (k+1)-(((k+1) - marker)/2);
+ columns.add(whiteColumns.get(index));
+ }
+ }
+
//Using white columns often isn't enough, so we check for continuous
//black lines as well to indictate a column break.
ArrayList blackColumns = new ArrayList();
@@ -439,167 +439,187 @@ public int findColumnBreaks(){
}
Collections.sort(columns);
- ArrayList columnsToAdd = new ArrayList();
- ArrayList columnsToRemove = new ArrayList();
-
+ ArrayList columnsToAdd = new ArrayList();
+ ArrayList columnsToRemove = new ArrayList();
+
//Determine extraneuous columns to remove
- int index = 0;
- while(columns.get(index) < EDGE_COLUMN_DISTANCE_MAX){
- columnsToRemove.add(columns.get(index));
- index++;
- }
- index = 1;
- while(this.horizontal-columns.get(columns.size()-index) < EDGE_COLUMN_DISTANCE_MAX){
- columnsToRemove.add(columns.get(columns.size()-index));
- index++;
- }
- columns.removeAll(columnsToRemove);
- columnsToRemove.clear();
+ int index = 0;
+ while(columns.get(index) < EDGE_COLUMN_DISTANCE_MAX){
+ columnsToRemove.add(columns.get(index));
+ index++;
+ }
+ index = 1;
+ while(this.horizontal-columns.get(columns.size()-index) < EDGE_COLUMN_DISTANCE_MAX){
+ columnsToRemove.add(columns.get(columns.size()-index));
+ index++;
+ }
+ columns.removeAll(columnsToRemove);
+ columnsToRemove.clear();
//If columns are very close together remove the two columns and use
//the average of the two for the final list of columns
- for(int p = 0; p < columns.size()-1; p++){
- if(columns.get(p+1)-columns.get(p) < COLUMN_SEPARATION_MIN){
- columnsToRemove.add(columns.get(p));
- columnsToRemove.add(columns.get(p+1));
- int middle = columns.get(p+1) - ((columns.get(p+1)-columns.get(p))/2);
- columnsToAdd.add(middle);
- }
- }
-
- columns.removeAll(columnsToRemove);
- columns.addAll(columnsToAdd);
- Collections.sort(columns);
- columnsToRemove.clear();
-
- //Rule 1: check for no columns found
- if(columns.size() == 0){
- this.setColumnBreaks(columns);
- return 1;
+ for(int p = 0; p < columns.size()-1; p++){
+ if(columns.get(p+1)-columns.get(p) < COLUMN_SEPARATION_MIN){
+ columnsToRemove.add(columns.get(p));
+ columnsToRemove.add(columns.get(p+1));
+ int middle = columns.get(p+1) - ((columns.get(p+1)-columns.get(p))/2);
+ columnsToAdd.add(middle);
+ }
}
- ArrayList columnWidth = new ArrayList();
- for(int p = 0; p < columns.size()-1; p++){
- columnWidth.add(columns.get(p+1)-columns.get(p));
- }
- Collections.sort(columnWidth);
+ columns.removeAll(columnsToRemove);
+ columns.addAll(columnsToAdd);
+ Collections.sort(columns);
+ columnsToRemove.clear();
- //Rule 2,3: check for less than three columns and check if columns are on more than half of the page
- if(columns.size()<3){
- this.setColumnBreaks(columns);
- return 2;
+ //Rule 1: check if there is only on column
+ if(columns.size() == 0){
+ columns.add(0,0);
+ columns.add(this.horizontal);
}
-
- if(columns.get(0)>this.horizontal/2 || columns.get(columns.size()-1) < horizontal/2){
- this.setColumnBreaks(columns);
- return 3;
+ //Rule 2: check for 2 columns
+ else if(columns.size()==1){
+ //columnWidth.add(columns.get(0));
+ //columnWidth.add(this.horizontal - columns.get(0));
+ columns.add(0,0);
+ columns.add(this.horizontal);
}
-
- int numOfColumnWidths = columnWidth.size();
-
- int columnWidthMean = 0;
- for(int width : columnWidth){
- columnWidthMean += width;
+ // columns.size() is 2
+ else if (columns.size()==2){
+ // columns(0) is around the center
+ if((this.horizontal/2)*(9/10) < columns.get(0) && columns.get(0) < (this.horizontal/2)*(11/10)){
+ columns.add(0,0);
+ }
+ // columns(1) is around the center
+ else{
+ columns.add(this.horizontal);
+ }
}
- columnWidthMean = columnWidthMean/numOfColumnWidths;
- int columnWidthVarience = 0;
- for(int width : columnWidth){
- int temp = width - columnWidthMean;
- columnWidthVarience += Math.pow(temp,2);
+ else{
+ // Collect column info
+ ArrayList columnWidth = new ArrayList();
+ for(int p = 0; p < columns.size()-1; p++){
+ columnWidth.add(columns.get(p+1)-columns.get(p));
+ }
+ Collections.sort(columnWidth);
+
+ for(int p = 0; p < columns.size()-1; p++){
+ columnWidth.add(columns.get(p+1)-columns.get(p));
+ }
+ Collections.sort(columnWidth);
+ //Rule 3: check if columns are on more than half of the page
+
+ if(columns.get(0)>this.horizontal/2 || columns.get(columns.size()-1) < horizontal/2){
+ this.setColumnBreaks(columns);
+ return 3;
+ }
+
+ int numOfColumnWidths = columnWidth.size();
+
+ int columnWidthMean = 0;
+ for(int width : columnWidth){
+ columnWidthMean += width;
+ }
+ columnWidthMean = columnWidthMean/numOfColumnWidths;
+ int columnWidthVarience = 0;
+ for(int width : columnWidth){
+ int temp = width - columnWidthMean;
+ columnWidthVarience += Math.pow(temp,2);
+ }
+ columnWidthVarience = columnWidthVarience/numOfColumnWidths;
+ double columnWidthStdDev = Math.ceil(Math.sqrt(columnWidthVarience));
+ System.out.println("Std Dev: "+columnWidthStdDev);
+
+ //Rule 4: Check column width Std Dev. Good images were experimentally determined to be below 150 Std Dev.
+ if(columnWidthStdDev > 150) {
+ this.setColumnBreaks(columns);
+ return 4;
+ }
+
+ //Add in columns based on the average width, columns added from the right hand side
+ int averageWidth = columnWidth.get((int) Math.floor(columnWidth.size()/2));
+ for(int p = columns.size()-1; p >= 1; p--){
+ if(columns.get(p-1) < columns.get(p)-averageWidth-COLUMN_SEPARATION_MIN){
+ columns.add(p, columns.get(p)-averageWidth);
+ p++;
+ }else if(columns.get(p-1) > columns.get(p)-averageWidth+COLUMN_SEPARATION_MIN){
+ columns.remove(p-1);
+ if(columns.get(p-1)-averageWidth > 0){
+ columns.add(p-1, columns.get(p-1)-averageWidth);
+ }
+ }
+ }
+
+ //If no edge column is found, insert the column
+ while(columns.get(0) > this.horizontal*.1 && columns.get(0)-averageWidth > 0){
+ columns.add(0, columns.get(0)-averageWidth);
+ }
+ while(columns.get(columns.size()-1) < this.horizontal-(this.horizontal*.1) && columns.get(columns.size()-1)+averageWidth < this.horizontal){
+ columns.add(columns.get(columns.size()-1)+averageWidth);
+ }
}
- columnWidthVarience = columnWidthVarience/numOfColumnWidths;
- double columnWidthStdDev = Math.ceil(Math.sqrt(columnWidthVarience));
- System.out.println("Std Dev: "+columnWidthStdDev);
- //Rule 4: Check column width Std Dev. Good images were experimentally determined to be below 150 Std Dev.
- if(columnWidthStdDev > 150) {
- this.setColumnBreaks(columns);
- return 4;
- }
+ System.out.println(columns);
- //Add in columns based on the average width, columns added from the right hand side
- int averageWidth = columnWidth.get((int) Math.floor(columnWidth.size()/2));
- for(int p = columns.size()-1; p >= 1; p--){
- if(columns.get(p-1) < columns.get(p)-averageWidth-COLUMN_SEPARATION_MIN){
- columns.add(p, columns.get(p)-averageWidth);
- p++;
- }else if(columns.get(p-1) > columns.get(p)-averageWidth+COLUMN_SEPARATION_MIN){
- columns.remove(p-1);
- if(columns.get(p-1)-averageWidth > 0){
- columns.add(p-1, columns.get(p-1)-averageWidth);
- }
- }
- }
-
- //If no edge column is found, insert the column
- while(columns.get(0) > this.horizontal*.1 && columns.get(0)-averageWidth > 0){
- columns.add(0, columns.get(0)-averageWidth);
+ //stores the column breaks list in the class Image
+ this.setColumnBreaks(columns);
+ return 0;
+ }
+ /**
+ * Method to output an image with red lines indicating the column breaks
+ */
+ public void showColumnBreaks(){
+ BufferedImage OutputImage = new BufferedImage(this.horizontal,this.vertical,BufferedImage.TYPE_INT_RGB);
+ int i = 1;
+ int marker = this.columnBreaks.get(0);
+ int value = 0;
+ for (int x = 0; x < this.getHorizontal(); x++) {
+ //iterate and acquire column break marker at the correct time
+ if(i < this.columnBreaks.size()){
+ if(x > marker){
+ marker = this.columnBreaks.get(i);
+ i++;
+ }
+ }
+ for (int y = 0; y < this.getVertical(); y++) {
+ if(x == marker){
+ value = 0xFF0000;//hexadecimal code for the color red
+ }else{
+ //The following line offsets the pixels' values to fix the 'blue problem'
+ value = this.byteImage2[y][x] << 16 | this.byteImage2[y][x] << 8 | this.byteImage2[y][x];
+ }
+ OutputImage.setRGB(x, y, value);
+ }
}
- while(columns.get(columns.size()-1) < this.horizontal-(this.horizontal*.1) && columns.get(columns.size()-1)+averageWidth < this.horizontal){
- columns.add(columns.get(columns.size()-1)+averageWidth);
+ //Output the image to a file of our choosing
+ File outputFile = new File(Constants.customOutput,this.name);
+ try {
+ ImageIO.write(OutputImage, "jpg", outputFile);
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
}
-
- System.out.println(columns);
-
- //stores the column breaks list in the class Image
- this.setColumnBreaks(columns);
- return 0;
- }
- /**
- * Method to output an image with red lines indicating the column breaks
- */
- public void showColumnBreaks(){
- BufferedImage OutputImage = new BufferedImage(this.horizontal,this.vertical,BufferedImage.TYPE_INT_RGB);
- int i = 1;
- int marker = this.columnBreaks.get(0);
- int value = 0;
- for (int x = 0; x < this.getHorizontal(); x++) {
- //iterate and acquire column break marker at the correct time
- if(i < this.columnBreaks.size()){
- if(x > marker){
- marker = this.columnBreaks.get(i);
- i++;
- }
- }
- for (int y = 0; y < this.getVertical(); y++) {
- if(x == marker){
- value = 0xFF0000;//hexadecimal code for the color red
- }else{
- //The following line offsets the pixels' values to fix the 'blue problem'
- value = this.byteImage2[y][x] << 16 | this.byteImage2[y][x] << 8 | this.byteImage2[y][x];
- }
- OutputImage.setRGB(x, y, value);
- }
- }
- //Output the image to a file of our choosing
- File outputFile = new File(Constants.customOutput,this.name);
- try {
- ImageIO.write(OutputImage, "jpg", outputFile);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
-
- /**
- * Helper method that will compare matricies to ensure that they are the same.
- * Used for debugging purposes.
- * @param A matrix
- * @param B matrix
- * @return number of different pixels
- */
- public int compareMatrices(int[][] A, int[][] B){
- int difference = 0;
- for(int i = 0; i < A.length; i++){
- for(int j = 0; j < A[0].length; j++){
- if(A[i][j] != B[i][j]){
- difference++;
- }
- }
- }
- return difference;
- }
-
+ }
+
+ /**
+ * Helper method that will compare matricies to ensure that they are the same.
+ * Used for debugging purposes.
+ * @param A matrix
+ * @param B matrix
+ * @return number of different pixels
+ */
+ public int compareMatrices(int[][] A, int[][] B){
+ int difference = 0;
+ for(int i = 0; i < A.length; i++){
+ for(int j = 0; j < A[0].length; j++){
+ if(A[i][j] != B[i][j]){
+ difference++;
+ }
+ }
+ }
+ return difference;
+ }
+
private int snippetHeight(){
int sum = 0;
for(int i = 0; i < this.columnBreaks.size()-1; i++){
@@ -609,158 +629,158 @@ private int snippetHeight(){
int avgHeight = (int) ((14.0/9.0)*avgWidth);
return avgHeight;
}
-
- /**
- * Final step in segmentation algorithm. Once column breaks have been found this method
- * will use those breakpoints to dynamically create snippets of varying width and height (a 14/9 ratio).
- * Snippets are outputted to the directory noted by Constants.Snippets and are grouped together by the full page they came from.
- */
- public void convertPageToSnippets(boolean scaleDown){
- int height = snippetHeight();
- int nextBegin = 0;
- int nextEnd = height;
- int snippetRow = 0;
- int snippetColumn = 0;
- //Identify the parent image name.
- String snippetSubName = this.getName().substring(0, this.getName().lastIndexOf('.'));
- String issueName = snippetSubName.substring(0, snippetSubName.lastIndexOf('_'));
- String parentName = this.getName().substring(0, this.getName().indexOf('_'));
-
- //populate snippet matrix with pixels from full page.
- for(int i = 0; i < columnBreaks.size()-1; i++){
- int width = columnBreaks.get(i+1) - columnBreaks.get(i);
- int[][] snippet = new int[height][width];
- int c = 0;
- for(int j = columnBreaks.get(i); j < columnBreaks.get(i+1); j++){
- int r = 0;
- for(int k = nextBegin; k < nextEnd; k++){
- snippet[r][c] = this.byteImage2[k][j];
- r++;
- }
- c++;
- }
-
- //identify the location of snippet in the full page. Use as name of the snippet
- String snippetName = snippetSubName+"_"+snippetRow+"_"+snippetColumn+".jpg";
-
- //Create BufferedImage for file writing. If scale down is needed perform that
- //first. For the time being scale is hard coded at 4x4.
- BufferedImage OutputImage;
- if(scaleDown){
- int scale = 4;
- int[][] scaledSnippet = scaleDownSnippet(scale, snippet, height, width);
-
- OutputImage = new BufferedImage(width/scale, height/scale, BufferedImage.TYPE_INT_RGB);
- for (int y = 0; y < height/scale; y++) {
- for (int x = 0; x < width/scale; x++) {
- //The following line offsets the pixels' values to fix the 'blue problem'
- int value = scaledSnippet[y][x] << 16 | scaledSnippet[y][x] << 8 | scaledSnippet[y][x];
- OutputImage.setRGB(x, y, value);
- }
- }
- }else{
- OutputImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
- for (int y = 0; y < height; y++) {
- for (int x = 0; x < width; x++) {
- //The following line offsets the pixels' values to fix the 'blue problem'
- int value = snippet[y][x] << 16 | snippet[y][x] << 8 | snippet[y][x];
- OutputImage.setRGB(x, y, value);
- }
- }
- }
-
- //Output the snippet to a file of our choosing
- File outputFile = new File(Constants.Snippets+parentName+"/"+issueName+"/"+snippetSubName+"/",snippetName);
- outputFile.mkdirs();
- try {
- ImageIO.write(OutputImage, "jpg", outputFile);
- } catch (IOException e) {
- e.printStackTrace();
- }
- snippetColumn++;
-
- //determine if row is complete, if yes move to next row, and if not move to next column
- if(i == columnBreaks.size() - 2){
- nextBegin = nextEnd - (height/2);
- nextEnd = nextBegin + height;
- if(nextEnd <= this.getVertical()){
- i = -1;
- snippetRow++;
- snippetColumn = 0;
- }
- }
- }
- }
-
- /**
- * When given an integer to represent the scale (3 for 3x3, 4 for 4x4, 5 for 5x5, etc)
- * this function will average the pixels of the snippet based on the given scale.
- * @param scale
- * @param snippet
- * @param height
- * @param width
- * @return
- */
- private int[][] scaleDownSnippet(int scale, int[][] snippet, int height, int width){
- int r = 0,s = 0;
- int[][] scaledImage = new int[height/scale][width/scale];
- for(int i = scale/2; i < height - (scale/2); i = i+scale){
- s = 0;
- for(int j = scale/2; j < width - (scale/2); j = j+scale){
- scaledImage[r][s] = average(scale, snippet, i, j);
- s++;
- }
- r++;
- }
- return scaledImage;
- }
-
- /**
- * returns the average pixel value of a pixels scale x scale area.
- * This function can use both odd and even numbers.
- * @param scale
- * @param snippet
- * @param i
- * @param j
- * @return
- */
- private int average(int scale, int[][] snippet, int i, int j){
- int sum = 0;
- if(scale%2 == 0){
- for(int a = i-(scale/2); a < i+(scale/2); a++){
- for(int b = j-(scale/2); b < j+(scale/2); b++){
- sum+=snippet[i][j];
- }
- }
- }else{
- for(int a = i-(scale/2); a <= i+(scale/2); a++){
- for(int b = j-(scale/2); b <= j+(scale/2); b++){
- sum+=snippet[i][j];
- }
- }
- }
- return sum/(scale*scale);
- }
-
- public void printImage(String filePath){
- int w = this.getHorizontal(),h = this.getVertical();
- BufferedImage OutputImage = new BufferedImage(w,h,BufferedImage.TYPE_INT_RGB);
- int[][] pixels3 = this.getByteImage();
- for (int y = 0; y < this.getVertical(); y++) {
- for (int x = 0; x < this.getHorizontal(); x++) {
- //The following line offsets the pixels' values to fix the 'blue problem'
- int value = pixels3[y][x] << 16 | pixels3[y][x] << 8 | pixels3[y][x];
- OutputImage.setRGB(x, y, value);
- }
- }
-
- File outputFile = new File(filePath,this.name);
- try {
- ImageIO.write(OutputImage, "jpg", outputFile);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
-
+
+ /**
+ * Final step in segmentation algorithm. Once column breaks have been found this method
+ * will use those breakpoints to dynamically create snippets of varying width and height (a 14/9 ratio).
+ * Snippets are outputted to the directory noted by Constants.Snippets and are grouped together by the full page they came from.
+ */
+ public void convertPageToSnippets(boolean scaleDown){
+ int height = snippetHeight();
+ int nextBegin = 0;
+ int nextEnd = height;
+ int snippetRow = 0;
+ int snippetColumn = 0;
+ //Identify the parent image name.
+ String snippetSubName = this.getName().substring(0, this.getName().lastIndexOf('.'));
+ String issueName = snippetSubName.substring(0, snippetSubName.lastIndexOf('_'));
+ String parentName = this.getName().substring(0, this.getName().indexOf('_'));
+
+ //populate snippet matrix with pixels from full page.
+ for(int i = 0; i < columnBreaks.size()-1; i++){
+ int width = columnBreaks.get(i+1) - columnBreaks.get(i);
+ int[][] snippet = new int[height][width];
+ int c = 0;
+ for(int j = columnBreaks.get(i); j < columnBreaks.get(i+1); j++){
+ int r = 0;
+ for(int k = nextBegin; k < nextEnd; k++){
+ snippet[r][c] = this.byteImage2[k][j];
+ r++;
+ }
+ c++;
+ }
+
+ //identify the location of snippet in the full page. Use as name of the snippet
+ String snippetName = snippetSubName+"_"+snippetRow+"_"+snippetColumn+".jpg";
+
+ //Create BufferedImage for file writing. If scale down is needed perform that
+ //first. For the time being scale is hard coded at 4x4.
+ BufferedImage OutputImage;
+ if(scaleDown){
+ int scale = 4;
+ int[][] scaledSnippet = scaleDownSnippet(scale, snippet, height, width);
+
+ OutputImage = new BufferedImage(width/scale, height/scale, BufferedImage.TYPE_INT_RGB);
+ for (int y = 0; y < height/scale; y++) {
+ for (int x = 0; x < width/scale; x++) {
+ //The following line offsets the pixels' values to fix the 'blue problem'
+ int value = scaledSnippet[y][x] << 16 | scaledSnippet[y][x] << 8 | scaledSnippet[y][x];
+ OutputImage.setRGB(x, y, value);
+ }
+ }
+ }else{
+ OutputImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
+ for (int y = 0; y < height; y++) {
+ for (int x = 0; x < width; x++) {
+ //The following line offsets the pixels' values to fix the 'blue problem'
+ int value = snippet[y][x] << 16 | snippet[y][x] << 8 | snippet[y][x];
+ OutputImage.setRGB(x, y, value);
+ }
+ }
+ }
+
+ //Output the snippet to a file of our choosing
+ File outputFile = new File(Constants.Snippets+parentName+"/"+issueName+"/"+snippetSubName+"/",snippetName);
+ outputFile.mkdirs();
+ try {
+ ImageIO.write(OutputImage, "jpg", outputFile);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ snippetColumn++;
+
+ //determine if row is complete, if yes move to next row, and if not move to next column
+ if(i == columnBreaks.size() - 2){
+ nextBegin = nextEnd - (height/2);
+ nextEnd = nextBegin + height;
+ if(nextEnd <= this.getVertical()){
+ i = -1;
+ snippetRow++;
+ snippetColumn = 0;
+ }
+ }
+ }
+ }
+
+ /**
+ * When given an integer to represent the scale (3 for 3x3, 4 for 4x4, 5 for 5x5, etc)
+ * this function will average the pixels of the snippet based on the given scale.
+ * @param scale
+ * @param snippet
+ * @param height
+ * @param width
+ * @return
+ */
+ private int[][] scaleDownSnippet(int scale, int[][] snippet, int height, int width){
+ int r = 0,s = 0;
+ int[][] scaledImage = new int[height/scale][width/scale];
+ for(int i = scale/2; i < height - (scale/2); i = i+scale){
+ s = 0;
+ for(int j = scale/2; j < width - (scale/2); j = j+scale){
+ scaledImage[r][s] = average(scale, snippet, i, j);
+ s++;
+ }
+ r++;
+ }
+ return scaledImage;
+ }
+
+ /**
+ * returns the average pixel value of a pixels scale x scale area.
+ * This function can use both odd and even numbers.
+ * @param scale
+ * @param snippet
+ * @param i
+ * @param j
+ * @return
+ */
+ private int average(int scale, int[][] snippet, int i, int j){
+ int sum = 0;
+ if(scale%2 == 0){
+ for(int a = i-(scale/2); a < i+(scale/2); a++){
+ for(int b = j-(scale/2); b < j+(scale/2); b++){
+ sum+=snippet[i][j];
+ }
+ }
+ }else{
+ for(int a = i-(scale/2); a <= i+(scale/2); a++){
+ for(int b = j-(scale/2); b <= j+(scale/2); b++){
+ sum+=snippet[i][j];
+ }
+ }
+ }
+ return sum/(scale*scale);
+ }
+
+ public void printImage(String filePath){
+ int w = this.getHorizontal(),h = this.getVertical();
+ BufferedImage OutputImage = new BufferedImage(w,h,BufferedImage.TYPE_INT_RGB);
+ int[][] pixels3 = this.getByteImage();
+ for (int y = 0; y < this.getVertical(); y++) {
+ for (int x = 0; x < this.getHorizontal(); x++) {
+ //The following line offsets the pixels' values to fix the 'blue problem'
+ int value = pixels3[y][x] << 16 | pixels3[y][x] << 8 | pixels3[y][x];
+ OutputImage.setRGB(x, y, value);
+ }
+ }
+
+ File outputFile = new File(filePath,this.name);
+ try {
+ ImageIO.write(OutputImage, "jpg", outputFile);
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
+
}
diff --git a/src/models/ReadIni.java b/src/models/ReadIni.java
new file mode 100644
index 0000000..97eec01
--- /dev/null
+++ b/src/models/ReadIni.java
@@ -0,0 +1,51 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package models;
+
+import java.util.*;
+import java.io.*;
+/**
+ *
+ * @author Mike
+ */
+public class ReadIni {
+ private EnumCollection myEnums = new EnumCollection();
+ // Members
+ private int ImageType;
+ private int NeedBinarizing;
+ private int NeedBlurring;
+ private int NeedConsolidating;
+
+ // Getters
+ public int GetImageType(){
+ return this.ImageType;
+ }
+ public int GetNeedBinarizing(){
+ return this.NeedBinarizing;
+ }
+ public int GetNeedBlurring(){
+ return this.NeedBlurring;
+ }
+ public int GetNeedConsolidating(){
+ return this.NeedConsolidating;
+ }
+
+ // Load Init Info
+ public ReadIni(){
+ Properties p = new Properties();
+ try{
+ p.load(new FileInputStream("../user.ini"));
+ // Load members' info from user.ini file
+ this.ImageType = myEnums.GetIntOfImageType(p.getProperty("ImageType").toUpperCase());
+ this.NeedBinarizing = myEnums.GetIntOfTrueFalse(p.getProperty("NeedBinarizing").toUpperCase());
+ this.NeedBlurring = myEnums.GetIntOfTrueFalse(p.getProperty("NeedBlurring").toUpperCase());
+ this.NeedConsolidating = myEnums.GetIntOfTrueFalse(p.getProperty("NeedConsolidating").toUpperCase());
+ }
+ catch (Exception e){
+ System.out.println(e);
+ }
+ }
+}
diff --git a/tif_jar/jai_imageio.jar b/tif_jar/jai_imageio.jar
new file mode 100644
index 0000000..359551e
Binary files /dev/null and b/tif_jar/jai_imageio.jar differ
diff --git a/user.ini b/user.ini
new file mode 100644
index 0000000..13b92ee
--- /dev/null
+++ b/user.ini
@@ -0,0 +1,4 @@
+ImageType=TIF
+NeedBinarizing=TRUE
+NeedBlurring=TRUE
+NeedConsolidating=TRUE
\ No newline at end of file