Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 /* 00017 * comparepages.c 00018 * 00019 * This compares text pages using the location of word bounding boxes. 00020 * The goal is to get a fast and robust determination for whether 00021 * two pages are the same. 00022 */ 00023 00024 #include <stdio.h> 00025 #include <stdlib.h> 00026 #include "allheaders.h" 00027 00028 main(int argc, 00029 char **argv) 00030 { 00031 l_int32 w, h, n, same; 00032 BOXA *boxa1, *boxa2; 00033 NUMA *nai1, *nai2; 00034 NUMAA *naa1, *naa1r, *naa2; 00035 PIX *pixs, *pixt, *pixb1, *pixb2; 00036 static char mainName[] = "comparepages"; 00037 00038 pixs = pixRead("lucasta-47.jpg"); 00039 pixb1 = pixConvertTo1(pixs, 128); 00040 pixGetWordBoxesInTextlines(pixb1, 1, 10, 10, 500, 50, &boxa1, &nai1); 00041 pixt = pixDrawBoxaRandom(pixs, boxa1, 2); 00042 pixDisplay(pixt, 100, 100); 00043 pixWrite("junkpixt", pixt, IFF_PNG); 00044 naa1 = boxaExtractSortedPattern(boxa1, nai1); 00045 numaaWrite("junknaa1", naa1); 00046 naa1r = numaaRead("junknaa1"); 00047 numaaWrite("junknaa1r", naa1r); 00048 n = numaaGetCount(naa1); 00049 fprintf(stderr, "Number of textlines = %d\n", n); 00050 pixDisplay(pixb1, 300, 0); 00051 00052 /* Translate */ 00053 pixb2 = pixCreateTemplate(pixb1); 00054 pixGetDimensions(pixb1, &w, &h, NULL); 00055 pixRasterop(pixb2, 148, 133, w, h, PIX_SRC, pixb1, 0, 0); 00056 pixDisplay(pixb2, 600, 0); 00057 pixGetWordBoxesInTextlines(pixb2, 1, 10, 10, 500, 50, &boxa2, &nai2); 00058 naa2 = boxaExtractSortedPattern(boxa2, nai2); 00059 numaaCompareImagesByBoxes(naa1, naa2, 5, 10, 150, 150, 20, 20, &same, 1); 00060 fprintf(stderr, "Translation. same?: %d\n\n", same); 00061 boxaDestroy(&boxa2); 00062 numaDestroy(&nai2); 00063 pixDestroy(&pixb2); 00064 numaaDestroy(&naa2); 00065 00066 /* Aligned part is below h/3 */ 00067 pixb2 = pixCreateTemplate(pixb1); 00068 pixGetDimensions(pixb1, &w, &h, NULL); 00069 pixRasterop(pixb2, 0, 0, w, h / 3, PIX_SRC, pixb1, 0, 2 * h / 3); 00070 pixRasterop(pixb2, 0, h / 3, w, 2 * h / 3, PIX_SRC, pixb1, 0, h / 3); 00071 pixDisplay(pixb2, 900, 0); 00072 pixGetWordBoxesInTextlines(pixb2, 1, 10, 10, 500, 50, &boxa2, &nai2); 00073 naa2 = boxaExtractSortedPattern(boxa2, nai2); 00074 numaaCompareImagesByBoxes(naa1, naa2, 5, 10, 150, 150, 20, 20, &same, 1); 00075 fprintf(stderr, "Aligned part below h/3. same?: %d\n\n", same); 00076 boxaDestroy(&boxa2); 00077 numaDestroy(&nai2); 00078 pixDestroy(&pixb2); 00079 numaaDestroy(&naa2); 00080 00081 /* Top and bottom switched; no aligned parts */ 00082 pixb2 = pixCreateTemplate(pixb1); 00083 pixGetDimensions(pixb1, &w, &h, NULL); 00084 pixRasterop(pixb2, 0, 0, w, h / 3, PIX_SRC, pixb1, 0, 2 * h / 3); 00085 pixRasterop(pixb2, 0, h / 3, w, 2 * h / 3, PIX_SRC, pixb1, 0, 0); 00086 pixDisplay(pixb2, 1200, 0); 00087 pixGetWordBoxesInTextlines(pixb2, 1, 10, 10, 500, 50, &boxa2, &nai2); 00088 naa2 = boxaExtractSortedPattern(boxa2, nai2); 00089 numaaCompareImagesByBoxes(naa1, naa2, 5, 10, 150, 150, 20, 20, &same, 1); 00090 fprintf(stderr, "Top/Bot switched; no alignment. Same?: %d\n", same); 00091 boxaDestroy(&boxa2); 00092 numaDestroy(&nai2); 00093 pixDestroy(&pixb2); 00094 numaaDestroy(&naa2); 00095 00096 boxaDestroy(&boxa1); 00097 numaDestroy(&nai1); 00098 pixDestroy(&pixs); 00099 pixDestroy(&pixb1); 00100 pixDestroy(&pixt); 00101 numaaDestroy(&naa1); 00102 numaaDestroy(&naa1r); 00103 return 0; 00104 }