53 #include "allheaders.h" 60 static l_int32 testLineAlignmentX(
NUMA *na1,
NUMA *na2, l_int32 shiftx,
61 l_int32 delx, l_int32 nperline);
62 static l_int32 countAlignedMatches(
NUMA *nai1,
NUMA *nai2,
NUMA *nasx,
63 NUMA *nasy, l_int32 n1, l_int32 n2,
64 l_int32 delx, l_int32 dely,
65 l_int32 nreq, l_int32 *psame,
67 static void printRowIndices(l_int32 *index1, l_int32 n1,
68 l_int32 *index2, l_int32 n2);
100 const char *rootname,
106 l_int32 nfiles, i, numpages;
113 PROCNAME(
"jbCorrelation");
116 return ERROR_INT(
"dirin not defined", procName, 1);
118 return ERROR_INT(
"rootname not defined", procName, 1);
119 if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
120 components != JB_WORDS)
121 return ERROR_INT(
"components invalid", procName, 1);
127 classer = jbCorrelationInit(components, 0, 0, thresh, weight);
128 jbAddPages(classer, safiles);
131 data = jbDataSave(classer);
132 jbDataWrite(rootname, data);
136 pixa = jbDataRender(data, FALSE);
138 if (numpages != nfiles)
139 fprintf(stderr,
"numpages = %d, nfiles = %d, not equal!\n",
141 for (i = 0; i < numpages; i++) {
143 snprintf(filename,
L_BUF_SIZE,
"%s.%04d", rootname, i);
144 fprintf(stderr,
"filename: %s\n", filename);
145 pixWrite(filename,
pix, IFF_PNG);
152 jbClasserDestroy(&classer);
153 jbDataDestroy(&data);
182 const char *rootname,
188 l_int32 nfiles, i, numpages;
195 PROCNAME(
"jbRankHaus");
198 return ERROR_INT(
"dirin not defined", procName, 1);
200 return ERROR_INT(
"rootname not defined", procName, 1);
201 if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
202 components != JB_WORDS)
203 return ERROR_INT(
"components invalid", procName, 1);
209 classer = jbRankHausInit(components, 0, 0, size, rank);
210 jbAddPages(classer, safiles);
213 data = jbDataSave(classer);
214 jbDataWrite(rootname, data);
218 pixa = jbDataRender(data, FALSE);
220 if (numpages != nfiles)
221 fprintf(stderr,
"numpages = %d, nfiles = %d, not equal!\n",
223 for (i = 0; i < numpages; i++) {
225 snprintf(filename,
L_BUF_SIZE,
"%s.%04d", rootname, i);
226 fprintf(stderr,
"filename: %s\n", filename);
227 pixWrite(filename,
pix, IFF_PNG);
234 jbClasserDestroy(&classer);
235 jbDataDestroy(&data);
277 l_int32 nfiles, i,
w,
h;
285 PROCNAME(
"jbWordsInTextlines");
288 return (
JBCLASSER *)ERROR_PTR(
"&natl not defined", procName, NULL);
291 return (
JBCLASSER *)ERROR_PTR(
"dirin not defined", procName, NULL);
292 if (reduction != 1 && reduction != 2)
293 return (
JBCLASSER *)ERROR_PTR(
"reduction not in {1,2}", procName, NULL);
299 classer = jbCorrelationInit(JB_WORDS, maxwidth, maxheight, thresh, weight);
303 for (i = 0; i < nfiles; i++) {
305 if ((pix1 =
pixRead(fname)) == NULL) {
306 L_WARNING(
"image file %d not read\n", procName, i);
319 jbAddPageComponents(classer, pix2, boxa, pixa);
396 PROCNAME(
"pixGetWordsInTextlines");
398 if (!pboxad || !ppixad || !pnai)
399 return ERROR_INT(
"&boxad, &pixad, &nai not all defined", procName, 1);
404 return ERROR_INT(
"pixs not defined", procName, 1);
407 pixWordBoxesByDilation(pixs, minwidth, minheight, maxwidth, maxheight,
469 PROCNAME(
"pixGetWordBoxesInTextlines");
471 if (pnai) *pnai = NULL;
473 return ERROR_INT(
"&boxad and &nai not both defined", procName, 1);
476 return ERROR_INT(
"pixs not defined", procName, 1);
479 pixWordBoxesByDilation(pixs, minwidth, minheight, maxwidth, maxheight,
528 const char *debugdir)
530 char *debugfile, *subdir;
531 l_int32 i, xs, ys, xb, yb, nb;
534 BOXA *boxa1, *boxa1a, *boxa2, *boxa3, *boxa4, *boxa5, *boxaw;
536 PIX *pix1, *pix2, *pix3, *pix3a, *pix4, *pix5;
538 PROCNAME(
"pixFindWordAndCharacterBoxes");
540 if (pboxaw) *pboxaw = NULL;
541 if (pboxaac) *pboxaac = NULL;
542 if (!pboxaw || !pboxaac)
543 return ERROR_INT(
"&boxaw and &boxaac not defined", procName, 1);
544 if (!pixs || pixGetDepth(pixs) == 1)
545 return ERROR_INT(
"pixs not defined or 1 bpp", procName, 1);
547 L_WARNING(
"threshold is %d; may be too high\n", procName, thresh);
551 return ERROR_INT(
"pix1 not made", procName, 1);
571 boxa1 =
boxaTransform(boxa1a, 0, 0, 1.0 / scalefact, 1.0 / scalefact);
578 debugfile =
stringJoin(debugdir,
"/words.png");
579 pixWrite(debugfile, pix4, IFF_PNG);
581 LEPT_FREE(debugfile);
590 for (i = 0; i < nb; i++) {
634 debugfile =
stringJoin(debugdir,
"/chars.png");
635 pixWrite(debugfile, pix4, IFF_PNG);
639 LEPT_FREE(debugfile);
669 l_int32 index, nbox, row, prevrow, x, y,
w,
h;
674 PROCNAME(
"boxaExtractSortedPattern");
677 return (
NUMAA *)ERROR_PTR(
"boxa not defined", procName, NULL);
679 return (
NUMAA *)ERROR_PTR(
"na not defined", procName, NULL);
687 for (index = 0; index < nbox; index++) {
765 l_int32 n1, n2, i, j, nbox, y1, y2, xl1, xl2;
766 l_int32 shiftx, shifty, match;
767 l_int32 *line1, *line2;
768 l_int32 *yloc1, *yloc2;
769 l_int32 *xleft1, *xleft2;
770 NUMA *na1, *na2, *nai1, *nai2, *nasx, *nasy;
772 PROCNAME(
"numaaCompareImagesByBoxes");
775 return ERROR_INT(
"&same not defined", procName, 1);
778 return ERROR_INT(
"naa1 not defined", procName, 1);
780 return ERROR_INT(
"naa2 not defined", procName, 1);
782 return ERROR_INT(
"nperline < 1", procName, 1);
784 return ERROR_INT(
"nreq < 1", procName, 1);
788 if (n1 < nreq || n2 < nreq)
794 line1 = (l_int32 *)LEPT_CALLOC(n1,
sizeof(l_int32));
795 line2 = (l_int32 *)LEPT_CALLOC(n2,
sizeof(l_int32));
796 yloc1 = (l_int32 *)LEPT_CALLOC(n1,
sizeof(l_int32));
797 yloc2 = (l_int32 *)LEPT_CALLOC(n2,
sizeof(l_int32));
798 xleft1 = (l_int32 *)LEPT_CALLOC(n1,
sizeof(l_int32));
799 xleft2 = (l_int32 *)LEPT_CALLOC(n2,
sizeof(l_int32));
800 if (!line1 || !line2 || !yloc1 || !yloc2 || !xleft1 || !xleft2)
801 return ERROR_INT(
"callof failure for an array", procName, 1);
802 for (i = 0; i < n1; i++) {
807 if (nbox >= nperline)
811 for (i = 0; i < n2; i++) {
816 if (nbox >= nperline)
832 for (i = 0; i < n1; i++) {
833 if (line1[i] == 0)
continue;
837 for (j = 0; j < n2; j++) {
838 if (line2[j] == 0)
continue;
840 if (L_ABS(y1 - y2) > maxshifty)
continue;
842 if (L_ABS(xl1 - xl2) > maxshiftx)
continue;
848 match = testLineAlignmentX(na1, na2, shiftx, delx, nperline);
864 countAlignedMatches(nai1, nai2, nasx, nasy, n1, n2, delx, dely,
865 nreq, psame, debugflag);
882 testLineAlignmentX(
NUMA *na1,
888 l_int32 i, xl1, xr1, xl2, xr2, diffl, diffr;
890 PROCNAME(
"testLineAlignmentX");
893 return ERROR_INT(
"na1 not defined", procName, 1);
895 return ERROR_INT(
"na2 not defined", procName, 1);
897 for (i = 0; i < nperline; i++) {
902 diffl = L_ABS(xl1 - xl2 - shiftx);
903 diffr = L_ABS(xr1 - xr2 - shiftx);
904 if (diffl > delx || diffr > delx)
934 countAlignedMatches(
NUMA *nai1,
946 l_int32 i, j, nm, shiftx, shifty, nmatch, diffx, diffy;
947 l_int32 *ia1, *ia2, *iasx, *iasy, *index1, *index2;
949 PROCNAME(
"countAlignedMatches");
951 if (!nai1 || !nai2 || !nasx || !nasy)
952 return ERROR_INT(
"4 input numas not defined", procName, 1);
954 return ERROR_INT(
"&same not defined", procName, 1);
971 index1 = (l_int32 *)LEPT_CALLOC(n1,
sizeof(l_int32));
972 index2 = (l_int32 *)LEPT_CALLOC(n2,
sizeof(l_int32));
973 if (!index1 || !index2)
974 return ERROR_INT(
"calloc fail for array", procName, 1);
975 for (i = 0; i < nm; i++) {
980 memset(index1, 0, 4 * n1);
981 memset(index2, 0, 4 * n2);
983 index1[ia1[i]] = nmatch;
984 index2[ia2[i]] = nmatch;
991 for (j = 0; j < nm; j++) {
992 if (j == i)
continue;
994 if (index1[ia1[j]] > 0 || index2[ia2[j]] > 0)
continue;
996 diffx = L_ABS(shiftx - iasx[j]);
997 diffy = L_ABS(shifty - iasy[j]);
998 if (diffx > delx || diffy > dely)
continue;
1001 index1[ia1[j]] = nmatch;
1002 index2[ia2[j]] = nmatch;
1003 if (nmatch >= nreq) {
1006 printRowIndices(index1, n1, index2, n2);
1023 printRowIndices(l_int32 *index1,
1030 fprintf(stderr,
"Index1: ");
1031 for (i = 0; i < n1; i++) {
1032 if (i && (i % 20 == 0))
1033 fprintf(stderr,
"\n ");
1034 fprintf(stderr,
"%3d", index1[i]);
1036 fprintf(stderr,
"\n");
1038 fprintf(stderr,
"Index2: ");
1039 for (i = 0; i < n2; i++) {
1040 if (i && (i % 20 == 0))
1041 fprintf(stderr,
"\n ");
1042 fprintf(stderr,
"%3d", index2[i]);
1044 fprintf(stderr,
"\n");
void pixaaDestroy(PIXAA **ppaa)
pixaaDestroy()
PIX * pixConvertTo1(PIX *pixs, l_int32 threshold)
pixConvertTo1()
l_int32 lept_mkdir(const char *subdir)
lept_mkdir()
NUMAA * boxaExtractSortedPattern(BOXA *boxa, NUMA *na)
boxaExtractSortedPattern()
PIXAA * pixaSort2dByIndex(PIXA *pixas, NUMAA *naa, l_int32 copyflag)
pixaSort2dByIndex()
BOXA * boxaSort(BOXA *boxas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex)
boxaSort()
PIX * pixConvertTo32(PIX *pixs)
pixConvertTo32()
SARRAY * sarrayCopy(SARRAY *sa)
sarrayCopy()
l_ok numaAddNumber(NUMA *na, l_float32 val)
numaAddNumber()
BOXA * boxaSelectBySize(BOXA *boxas, l_int32 width, l_int32 height, l_int32 type, l_int32 relation, l_int32 *pchanged)
boxaSelectBySize()
PIX * pixConvertTo8(PIX *pixs, l_int32 cmapflag)
pixConvertTo8()
PIX * pixScaleToResolution(PIX *pixs, l_float32 target, l_float32 assumed, l_float32 *pscalefact)
pixScaleToResolution()
PIXA * pixaaFlattenToPixa(PIXAA *paa, NUMA **pnaindex, l_int32 copyflag)
pixaaFlattenToPixa()
l_ok numaaCompareImagesByBoxes(NUMAA *naa1, NUMAA *naa2, l_int32 nperline, l_int32 nreq, l_int32 maxshiftx, l_int32 maxshifty, l_int32 delx, l_int32 dely, l_int32 *psame, l_int32 debugflag)
numaaCompareImagesByBoxes()
l_ok jbCorrelation(const char *dirin, l_float32 thresh, l_float32 weight, l_int32 components, const char *rootname, l_int32 firstpage, l_int32 npages, l_int32 renderflag)
jbCorrelation()
NUMA * numaCreate(l_int32 n)
numaCreate()
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
PIXA * pixaCreateFromBoxa(PIX *pixs, BOXA *boxa, l_int32 start, l_int32 num, l_int32 *pcropwarn)
pixaCreateFromBoxa()
BOX * boxTransform(BOX *box, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley)
boxTransform()
static const l_int32 JB_WORDS_MIN_WIDTH
NUMAA * numaaCreate(l_int32 n)
numaaCreate()
PIX * pixClipRectangle(PIX *pixs, BOX *box, BOX **pboxc)
pixClipRectangle()
l_ok pixFindWordAndCharacterBoxes(PIX *pixs, BOX *boxs, l_int32 thresh, BOXA **pboxaw, BOXAA **pboxaac, const char *debugdir)
pixFindWordAndCharacterBoxes()
void numaaDestroy(NUMAA **pnaa)
numaaDestroy()
static const l_int32 JB_WORDS_MIN_HEIGHT
l_int32 * numaGetIArray(NUMA *na)
numaGetIArray()
l_ok boxaaAddBoxa(BOXAA *baa, BOXA *ba, l_int32 copyflag)
boxaaAddBoxa()
NUMA * numaaGetNuma(NUMAA *naa, l_int32 index, l_int32 accessflag)
numaaGetNuma()
l_ok numaGetIValue(NUMA *na, l_int32 index, l_int32 *pival)
numaGetIValue()
void boxaaDestroy(BOXAA **pbaa)
boxaaDestroy()
BOXA * boxaTransform(BOXA *boxas, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley)
boxaTransform()
l_int32 numaGetCount(NUMA *na)
numaGetCount()
BOXA * pixConnCompBB(PIX *pixs, l_int32 connectivity)
pixConnCompBB()
PIX * pixMorphSequence(PIX *pixs, const char *sequence, l_int32 dispsep)
pixMorphSequence()
l_ok boxaAddBox(BOXA *boxa, BOX *box, l_int32 copyflag)
boxaAddBox()
l_ok pixGetWordsInTextlines(PIX *pixs, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, PIXA **ppixad, NUMA **pnai)
pixGetWordsInTextlines()
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
l_ok jbRankHaus(const char *dirin, l_int32 size, l_float32 rank, l_int32 components, const char *rootname, l_int32 firstpage, l_int32 npages, l_int32 renderflag)
jbRankHaus()
PIX * pixClone(PIX *pixs)
pixClone()
void pixDestroy(PIX **ppix)
pixDestroy()
l_ok pixGetWordBoxesInTextlines(PIX *pixs, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, NUMA **pnai)
pixGetWordBoxesInTextlines()
BOX * boxaGetBox(BOXA *boxa, l_int32 index, l_int32 accessflag)
boxaGetBox()
SARRAY * getSortedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 first, l_int32 nfiles)
getSortedPathnamesInDirectory()
JBCLASSER * jbWordsInTextlines(const char *dirin, l_int32 reduction, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weight, NUMA **pnatl, l_int32 firstpage, l_int32 npages)
jbWordsInTextlines()
void numaDestroy(NUMA **pna)
numaDestroy()
l_ok pixGetDimensions(const PIX *pix, l_int32 *pw, l_int32 *ph, l_int32 *pd)
pixGetDimensions()
BOXAA * boxaaCreate(l_int32 n)
boxaaCreate()
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
BOXA * boxaAdjustSides(BOXA *boxas, l_int32 delleft, l_int32 delright, l_int32 deltop, l_int32 delbot)
boxaAdjustSides()
PIX * pixRead(const char *filename)
pixRead()
l_ok numaJoin(NUMA *nad, NUMA *nas, l_int32 istart, l_int32 iend)
numaJoin()
l_int32 numaaGetCount(NUMAA *naa)
numaaGetCount()
l_ok pixRenderBoxaArb(PIX *pix, BOXA *boxa, l_int32 width, l_uint8 rval, l_uint8 gval, l_uint8 bval)
pixRenderBoxaArb()
PIX * pixaGetPix(PIXA *pixa, l_int32 index, l_int32 accesstype)
pixaGetPix()
char * stringJoin(const char *src1, const char *src2)
stringJoin()
BOXA * boxaCreate(l_int32 n)
boxaCreate()
void boxDestroy(BOX **pbox)
boxDestroy()
BOXAA * boxaSort2d(BOXA *boxas, NUMAA **pnaad, l_int32 delta1, l_int32 delta2, l_int32 minh1)
boxaSort2d()
l_int32 boxaGetCount(BOXA *boxa)
boxaGetCount()
BOXA * boxaaFlattenToBoxa(BOXAA *baa, NUMA **pnaindex, l_int32 copyflag)
boxaaFlattenToBoxa()
l_ok boxGetGeometry(BOX *box, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxGetGeometry()
PIX * pixReduceRankBinaryCascade(PIX *pixs, l_int32 level1, l_int32 level2, l_int32 level3, l_int32 level4)
pixReduceRankBinaryCascade()
void pixaDestroy(PIXA **ppixa)
pixaDestroy()
char * stringReplaceSubstr(const char *src, const char *sub1, const char *sub2, l_int32 *pfound, l_int32 *ploc)
stringReplaceSubstr()
l_int32 pixaGetCount(PIXA *pixa)
pixaGetCount()
l_ok numaaAddNuma(NUMAA *naa, NUMA *na, l_int32 copyflag)
numaaAddNuma()
static const l_int32 L_BUF_SIZE
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
BOXA * pixaGetBoxa(PIXA *pixa, l_int32 accesstype)
pixaGetBoxa()