Leptonica 1.68
C Image Processing Library

comparepages.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 /*
00017  * comparepages.c
00018  *
00019  *    This compares text pages using the location of word bounding boxes.
00020  *    The goal is to get a fast and robust determination for whether
00021  *    two pages are the same.
00022  */
00023 
00024 #include <stdio.h>
00025 #include <stdlib.h>
00026 #include "allheaders.h"
00027 
00028 main(int    argc,
00029      char **argv)
00030 {
00031 l_int32      w, h, n, same;
00032 BOXA        *boxa1, *boxa2;
00033 NUMA        *nai1, *nai2;
00034 NUMAA       *naa1, *naa1r, *naa2;
00035 PIX         *pixs, *pixt, *pixb1, *pixb2;
00036 static char  mainName[] = "comparepages";
00037 
00038     pixs = pixRead("lucasta-47.jpg");
00039     pixb1 = pixConvertTo1(pixs, 128);
00040     pixGetWordBoxesInTextlines(pixb1, 1, 10, 10, 500, 50, &boxa1, &nai1);
00041     pixt = pixDrawBoxaRandom(pixs, boxa1, 2);
00042     pixDisplay(pixt, 100, 100);
00043     pixWrite("junkpixt", pixt, IFF_PNG);
00044     naa1 = boxaExtractSortedPattern(boxa1, nai1);
00045     numaaWrite("junknaa1", naa1);
00046     naa1r = numaaRead("junknaa1");
00047     numaaWrite("junknaa1r", naa1r);
00048     n = numaaGetCount(naa1);
00049     fprintf(stderr, "Number of textlines = %d\n", n);
00050     pixDisplay(pixb1, 300, 0);
00051 
00052         /* Translate */
00053     pixb2 = pixCreateTemplate(pixb1);
00054     pixGetDimensions(pixb1, &w, &h, NULL);
00055     pixRasterop(pixb2, 148, 133, w, h, PIX_SRC, pixb1, 0, 0);
00056     pixDisplay(pixb2, 600, 0);
00057     pixGetWordBoxesInTextlines(pixb2, 1, 10, 10, 500, 50, &boxa2, &nai2);
00058     naa2 = boxaExtractSortedPattern(boxa2, nai2);
00059     numaaCompareImagesByBoxes(naa1, naa2, 5, 10, 150, 150, 20, 20, &same, 1);
00060     fprintf(stderr, "Translation.  same?: %d\n\n", same);
00061     boxaDestroy(&boxa2);
00062     numaDestroy(&nai2);
00063     pixDestroy(&pixb2);
00064     numaaDestroy(&naa2);
00065 
00066         /* Aligned part is below h/3 */
00067     pixb2 = pixCreateTemplate(pixb1);
00068     pixGetDimensions(pixb1, &w, &h, NULL);
00069     pixRasterop(pixb2, 0, 0, w, h / 3, PIX_SRC, pixb1, 0, 2 * h / 3);
00070     pixRasterop(pixb2, 0, h / 3, w, 2 * h / 3, PIX_SRC, pixb1, 0, h / 3);
00071     pixDisplay(pixb2, 900, 0);
00072     pixGetWordBoxesInTextlines(pixb2, 1, 10, 10, 500, 50, &boxa2, &nai2);
00073     naa2 = boxaExtractSortedPattern(boxa2, nai2);
00074     numaaCompareImagesByBoxes(naa1, naa2, 5, 10, 150, 150, 20, 20, &same, 1);
00075     fprintf(stderr, "Aligned part below h/3.  same?: %d\n\n", same);
00076     boxaDestroy(&boxa2);
00077     numaDestroy(&nai2);
00078     pixDestroy(&pixb2);
00079     numaaDestroy(&naa2);
00080 
00081         /* Top and bottom switched; no aligned parts */
00082     pixb2 = pixCreateTemplate(pixb1);
00083     pixGetDimensions(pixb1, &w, &h, NULL);
00084     pixRasterop(pixb2, 0, 0, w, h / 3, PIX_SRC, pixb1, 0, 2 * h / 3);
00085     pixRasterop(pixb2, 0, h / 3, w, 2 * h / 3, PIX_SRC, pixb1, 0, 0);
00086     pixDisplay(pixb2, 1200, 0);
00087     pixGetWordBoxesInTextlines(pixb2, 1, 10, 10, 500, 50, &boxa2, &nai2);
00088     naa2 = boxaExtractSortedPattern(boxa2, nai2);
00089     numaaCompareImagesByBoxes(naa1, naa2, 5, 10, 150, 150, 20, 20, &same, 1);
00090     fprintf(stderr, "Top/Bot switched; no alignment.  Same?: %d\n", same);
00091     boxaDestroy(&boxa2);
00092     numaDestroy(&nai2);
00093     pixDestroy(&pixb2);
00094     numaaDestroy(&naa2);
00095 
00096     boxaDestroy(&boxa1);
00097     numaDestroy(&nai1);
00098     pixDestroy(&pixs);
00099     pixDestroy(&pixb1);
00100     pixDestroy(&pixt);
00101     numaaDestroy(&naa1);
00102     numaaDestroy(&naa1r);
00103     return 0;
00104 }
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines