Leptonica  1.77.0
Image processing and image analysis suite
partition.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
45 #include "allheaders.h"
46 
49  l_float32 size; /* sorting key */
50  BOX *box; /* region of the element */
51  BOXA *boxa; /* set of intersecting boxes */
52 };
53 typedef struct PartitionElement PARTEL;
54 
55 static PARTEL * partelCreate(BOX *box);
56 static void partelDestroy(PARTEL **ppartel);
57 static l_int32 partelSetSize(PARTEL *partel, l_int32 sortflag);
58 static BOXA * boxaGenerateSubboxes(BOX *box, BOXA *boxa, l_int32 maxperim,
59  l_float32 fract);
60 static BOX * boxaSelectPivotBox(BOX *box, BOXA *boxa, l_int32 maxperim,
61  l_float32 fract);
62 static l_int32 boxCheckIfOverlapIsBig(BOX *box, BOXA *boxa,
63  l_float32 maxoverlap);
64 
65 static const l_int32 DEFAULT_MAX_POPS = 20000;
66 
67 
68 #ifndef NO_CONSOLE_IO
69 #define OUTPUT_HEAP_STATS 0
70 #endif /* ~NO_CONSOLE_IO */
71 
72 
73 /*------------------------------------------------------------------*
74  * Whitespace block extraction *
75  *------------------------------------------------------------------*/
188 BOXA *
190  BOX *box,
191  l_int32 sortflag,
192  l_int32 maxboxes,
193  l_float32 maxoverlap,
194  l_int32 maxperim,
195  l_float32 fract,
196  l_int32 maxpops)
197 {
198 l_int32 i, w, h, n, nsub, npush, npop;
199 BOX *boxsub;
200 BOXA *boxa, *boxa4, *boxasub, *boxad;
201 PARTEL *partel;
202 L_HEAP *lh;
203 
204  PROCNAME("boxaGetWhiteblocks");
205 
206  if (!boxas)
207  return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
208  if (sortflag != L_SORT_BY_WIDTH && sortflag != L_SORT_BY_HEIGHT &&
209  sortflag != L_SORT_BY_MIN_DIMENSION &&
210  sortflag != L_SORT_BY_MAX_DIMENSION &&
211  sortflag != L_SORT_BY_PERIMETER && sortflag != L_SORT_BY_AREA)
212  return (BOXA *)ERROR_PTR("invalid sort flag", procName, NULL);
213  if (maxboxes < 1) {
214  maxboxes = 1;
215  L_WARNING("setting maxboxes = 1\n", procName);
216  }
217  if (maxoverlap < 0.0 || maxoverlap > 1.0)
218  return (BOXA *)ERROR_PTR("invalid maxoverlap", procName, NULL);
219  if (maxpops == 0)
220  maxpops = DEFAULT_MAX_POPS;
221 
222  if (!box) {
223  boxaGetExtent(boxas, &w, &h, NULL);
224  box = boxCreate(0, 0, w, h);
225  }
226 
227  /* Prime the heap */
228  lh = lheapCreate(20, L_SORT_DECREASING);
229  partel = partelCreate(box);
230  partel->boxa = boxaCopy(boxas, L_CLONE);
231  partelSetSize(partel, sortflag);
232  lheapAdd(lh, partel);
233 
234  npush = 1;
235  npop = 0;
236  boxad = boxaCreate(0);
237  while (1) {
238  if ((partel = (PARTEL *)lheapRemove(lh)) == NULL) /* we're done */
239  break;
240 
241  npop++; /* How many boxes have we retrieved from the queue? */
242  if (npop > maxpops) {
243  partelDestroy(&partel);
244  break;
245  }
246 
247  /* Extract the contents */
248  boxa = boxaCopy(partel->boxa, L_CLONE);
249  box = boxClone(partel->box);
250  partelDestroy(&partel);
251 
252  /* Can we output this one? */
253  n = boxaGetCount(boxa);
254  if (n == 0) {
255  if (boxCheckIfOverlapIsBig(box, boxad, maxoverlap) == 0)
256  boxaAddBox(boxad, box, L_INSERT);
257  else
258  boxDestroy(&box);
259  boxaDestroy(&boxa);
260  if (boxaGetCount(boxad) >= maxboxes) /* we're done */
261  break;
262  continue;
263  }
264 
265 
266  /* Generate up to 4 subboxes and put them on the heap */
267  boxa4 = boxaGenerateSubboxes(box, boxa, maxperim, fract);
268  boxDestroy(&box);
269  nsub = boxaGetCount(boxa4);
270  for (i = 0; i < nsub; i++) {
271  boxsub = boxaGetBox(boxa4, i, L_CLONE);
272  boxasub = boxaIntersectsBox(boxa, boxsub);
273  partel = partelCreate(boxsub);
274  partel->boxa = boxasub;
275  partelSetSize(partel, sortflag);
276  lheapAdd(lh, partel);
277  boxDestroy(&boxsub);
278  }
279  npush += nsub; /* How many boxes have we put on the queue? */
280 
281 /* boxaWriteStream(stderr, boxa4); */
282 
283  boxaDestroy(&boxa4);
284  boxaDestroy(&boxa);
285  }
286 
287 #if OUTPUT_HEAP_STATS
288  fprintf(stderr, "Heap statistics:\n");
289  fprintf(stderr, " Number of boxes pushed: %d\n", npush);
290  fprintf(stderr, " Number of boxes popped: %d\n", npop);
291  fprintf(stderr, " Number of boxes on heap: %d\n", lheapGetCount(lh));
292 #endif /* OUTPUT_HEAP_STATS */
293 
294  /* Clean up the heap */
295  while ((partel = (PARTEL *)lheapRemove(lh)) != NULL)
296  partelDestroy(&partel);
297  lheapDestroy(&lh, FALSE);
298 
299  return boxad;
300 }
301 
302 
303 /*------------------------------------------------------------------*
304  * Helpers *
305  *------------------------------------------------------------------*/
312 static PARTEL *
314 {
315 PARTEL *partel;
316 
317  PROCNAME("partelCreate");
318 
319  if ((partel = (PARTEL *)LEPT_CALLOC(1, sizeof(PARTEL))) == NULL)
320  return (PARTEL *)ERROR_PTR("partel not made", procName, NULL);
321 
322  partel->box = boxCopy(box);
323  return partel;
324 }
325 
326 
333 static void
335 {
336 PARTEL *partel;
337 
338  PROCNAME("partelDestroy");
339 
340  if (ppartel == NULL) {
341  L_WARNING("ptr address is null!\n", procName);
342  return;
343  }
344 
345  if ((partel = *ppartel) == NULL)
346  return;
347 
348  boxDestroy(&partel->box);
349  boxaDestroy(&partel->boxa);
350  LEPT_FREE(partel);
351  *ppartel = NULL;
352  return;
353 }
354 
355 
365 static l_int32
367  l_int32 sortflag)
368 {
369 l_int32 w, h;
370 
371  PROCNAME("partelSetSize");
372 
373  if (!partel)
374  return ERROR_INT("partel not defined", procName, 1);
375 
376  boxGetGeometry(partel->box, NULL, NULL, &w, &h);
377  if (sortflag == L_SORT_BY_WIDTH)
378  partel->size = (l_float32)w;
379  else if (sortflag == L_SORT_BY_HEIGHT)
380  partel->size = (l_float32)h;
381  else if (sortflag == L_SORT_BY_MIN_DIMENSION)
382  partel->size = (l_float32)L_MIN(w, h);
383  else if (sortflag == L_SORT_BY_MAX_DIMENSION)
384  partel->size = (l_float32)L_MAX(w, h);
385  else if (sortflag == L_SORT_BY_PERIMETER)
386  partel->size = (l_float32)(w + h);
387  else if (sortflag == L_SORT_BY_AREA)
388  partel->size = (l_float32)(w * h);
389  else
390  return ERROR_INT("invalid sortflag", procName, 1);
391  return 0;
392 }
393 
394 
407 static BOXA *
409  BOXA *boxa,
410  l_int32 maxperim,
411  l_float32 fract)
412 {
413 l_int32 x, y, w, h, xp, yp, wp, hp;
414 BOX *boxp; /* pivot box */
415 BOX *boxsub;
416 BOXA *boxa4;
417 
418  PROCNAME("boxaGenerateSubboxes");
419 
420  if (!box)
421  return (BOXA *)ERROR_PTR("box not defined", procName, NULL);
422  if (!boxa)
423  return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL);
424 
425  boxa4 = boxaCreate(4);
426  boxp = boxaSelectPivotBox(box, boxa, maxperim, fract);
427  boxGetGeometry(box, &x, &y, &w, &h);
428  boxGetGeometry(boxp, &xp, &yp, &wp, &hp);
429  boxDestroy(&boxp);
430  if (xp > x) { /* left sub-box */
431  boxsub = boxCreate(x, y, xp - x, h);
432  boxaAddBox(boxa4, boxsub, L_INSERT);
433  }
434  if (yp > y) { /* top sub-box */
435  boxsub = boxCreate(x, y, w, yp - y);
436  boxaAddBox(boxa4, boxsub, L_INSERT);
437  }
438  if (xp + wp < x + w) { /* right sub-box */
439  boxsub = boxCreate(xp + wp, y, x + w - xp - wp, h);
440  boxaAddBox(boxa4, boxsub, L_INSERT);
441  }
442  if (yp + hp < y + h) { /* bottom sub-box */
443  boxsub = boxCreate(x, yp + hp, w, y + h - yp - hp);
444  boxaAddBox(boxa4, boxsub, L_INSERT);
445  }
446 
447  return boxa4;
448 }
449 
450 
485 static BOX *
487  BOXA *boxa,
488  l_int32 maxperim,
489  l_float32 fract)
490 {
491 l_int32 i, n, bw, bh, w, h;
492 l_int32 smallfound, minindex, perim, minsize;
493 l_float32 delx, dely, mindist, threshdist, dist, x, y, cx, cy;
494 BOX *boxt;
495 
496  PROCNAME("boxaSelectPivotBox");
497 
498  if (!box)
499  return (BOX *)ERROR_PTR("box not defined", procName, NULL);
500  if (!boxa)
501  return (BOX *)ERROR_PTR("boxa not defined", procName, NULL);
502  n = boxaGetCount(boxa);
503  if (n == 0)
504  return (BOX *)ERROR_PTR("no boxes in boxa", procName, NULL);
505  if (fract < 0.0 || fract > 1.0) {
506  L_WARNING("fract out of bounds; using 0.0\n", procName);
507  fract = 0.0;
508  }
509 
510  boxGetGeometry(box, NULL, NULL, &w, &h);
511  boxGetCenter(box, &x, &y);
512  threshdist = fract * (w * w + h * h);
513  mindist = 1000000000.;
514  minindex = 0;
515  smallfound = FALSE;
516  for (i = 0; i < n; i++) {
517  boxt = boxaGetBox(boxa, i, L_CLONE);
518  boxGetGeometry(boxt, NULL, NULL, &bw, &bh);
519  boxGetCenter(boxt, &cx, &cy);
520  boxDestroy(&boxt);
521  if (bw + bh > maxperim)
522  continue;
523  smallfound = TRUE;
524  delx = cx - x;
525  dely = cy - y;
526  dist = delx * delx + dely * dely;
527  if (dist <= threshdist)
528  return boxaGetBox(boxa, i, L_COPY);
529  if (dist < mindist) {
530  minindex = i;
531  mindist = dist;
532  }
533  }
534 
535  /* If there are small boxes but none are within 'fract' of the
536  * centroid, return the nearest one. */
537  if (smallfound == TRUE)
538  return boxaGetBox(boxa, minindex, L_COPY);
539 
540  /* No small boxes; return the smallest of the large boxes */
541  minsize = 1000000000;
542  minindex = 0;
543  for (i = 0; i < n; i++) {
544  boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh);
545  perim = bw + bh;
546  if (perim < minsize) {
547  minsize = perim;
548  minindex = i;
549  }
550  }
551  return boxaGetBox(boxa, minindex, L_COPY);
552 }
553 
554 
565 static l_int32
567  BOXA *boxa,
568  l_float32 maxoverlap)
569 {
570 l_int32 i, n, bigoverlap;
571 l_float32 fract;
572 BOX *boxt;
573 
574  PROCNAME("boxCheckIfOverlapIsBig");
575 
576  if (!box)
577  return ERROR_INT("box not defined", procName, 1);
578  if (!boxa)
579  return ERROR_INT("boxa not defined", procName, 1);
580  if (maxoverlap < 0.0 || maxoverlap > 1.0)
581  return ERROR_INT("invalid maxoverlap", procName, 1);
582 
583  n = boxaGetCount(boxa);
584  if (n == 0 || maxoverlap == 1.0)
585  return 0;
586 
587  bigoverlap = 0;
588  for (i = 0; i < n; i++) {
589  boxt = boxaGetBox(boxa, i, L_CLONE);
590  boxOverlapFraction(boxt, box, &fract);
591  boxDestroy(&boxt);
592  if (fract > maxoverlap) {
593  bigoverlap = 1;
594  break;
595  }
596  }
597 
598  return bigoverlap;
599 }
600 
601 
620 BOXA *
622  l_float32 maxoverlap)
623 {
624 l_int32 i, j, n, remove;
625 l_float32 fract;
626 BOX *box1, *box2;
627 BOXA *boxad;
628 
629  PROCNAME("boxaPruneSortedOnOverlap");
630 
631  if (!boxas)
632  return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
633  if (maxoverlap < 0.0 || maxoverlap > 1.0)
634  return (BOXA *)ERROR_PTR("invalid maxoverlap", procName, NULL);
635 
636  n = boxaGetCount(boxas);
637  if (n == 0 || maxoverlap == 1.0)
638  return boxaCopy(boxas, L_COPY);
639 
640  boxad = boxaCreate(0);
641  box2 = boxaGetBox(boxas, 0, L_COPY);
642  boxaAddBox(boxad, box2, L_INSERT);
643  for (j = 1; j < n; j++) { /* prune on j */
644  box2 = boxaGetBox(boxas, j, L_COPY);
645  remove = FALSE;
646  for (i = 0; i < j; i++) { /* test on i */
647  box1 = boxaGetBox(boxas, i, L_CLONE);
648  boxOverlapFraction(box1, box2, &fract);
649  boxDestroy(&box1);
650  if (fract > maxoverlap) {
651  remove = TRUE;
652  break;
653  }
654  }
655  if (remove == TRUE)
656  boxDestroy(&box2);
657  else
658  boxaAddBox(boxad, box2, L_INSERT);
659  }
660 
661  return boxad;
662 }
BOXA * boxaIntersectsBox(BOXA *boxas, BOX *box)
boxaIntersectsBox()
Definition: boxfunc1.c:303
Definition: pix.h:717
static l_int32 partelSetSize(PARTEL *partel, l_int32 sortflag)
partelSetSize()
Definition: partition.c:366
l_ok lheapAdd(L_HEAP *lh, void *item)
lheapAdd()
Definition: heap.c:186
static BOXA * boxaGenerateSubboxes(BOX *box, BOXA *boxa, l_int32 maxperim, l_float32 fract)
boxaGenerateSubboxes()
Definition: partition.c:408
l_ok boxGetCenter(BOX *box, l_float32 *pcx, l_float32 *pcy)
boxGetCenter()
Definition: boxfunc1.c:1444
BOXA * boxaCopy(BOXA *boxa, l_int32 copyflag)
boxaCopy()
Definition: boxbasic.c:534
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
Definition: boxbasic.c:580
void lheapDestroy(L_HEAP **plh, l_int32 freeflag)
lheapDestroy()
Definition: heap.c:145
BOXA * boxaGetWhiteblocks(BOXA *boxas, BOX *box, l_int32 sortflag, l_int32 maxboxes, l_float32 maxoverlap, l_int32 maxperim, l_float32 fract, l_int32 maxpops)
boxaGetWhiteblocks()
Definition: partition.c:189
Definition: pix.h:492
l_int32 lheapGetCount(L_HEAP *lh)
lheapGetCount()
Definition: heap.c:271
BOX * boxClone(BOX *box)
boxClone()
Definition: boxbasic.c:252
static void partelDestroy(PARTEL **ppartel)
partelDestroy()
Definition: partition.c:334
static BOX * boxaSelectPivotBox(BOX *box, BOXA *boxa, l_int32 maxperim, l_float32 fract)
boxaSelectPivotBox()
Definition: partition.c:486
l_ok boxaGetBoxGeometry(BOXA *boxa, l_int32 index, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxaGetBoxGeometry()
Definition: boxbasic.c:863
static l_int32 boxCheckIfOverlapIsBig(BOX *box, BOXA *boxa, l_float32 maxoverlap)
boxCheckIfOverlapIsBig()
Definition: partition.c:566
l_ok boxaAddBox(BOXA *boxa, BOX *box, l_int32 copyflag)
boxaAddBox()
Definition: boxbasic.c:618
L_HEAP * lheapCreate(l_int32 nalloc, l_int32 direction)
lheapCreate()
Definition: heap.c:102
void * lheapRemove(L_HEAP *lh)
lheapRemove()
Definition: heap.c:242
l_ok boxOverlapFraction(BOX *box1, BOX *box2, l_float32 *pfract)
boxOverlapFraction()
Definition: boxfunc1.c:755
static PARTEL * partelCreate(BOX *box)
partelCreate()
Definition: partition.c:313
BOX * boxaGetBox(BOXA *boxa, l_int32 index, l_int32 accessflag)
boxaGetBox()
Definition: boxbasic.c:763
Definition: heap.h:77
l_ok boxaGetExtent(BOXA *boxa, l_int32 *pw, l_int32 *ph, BOX **pbox)
boxaGetExtent()
Definition: boxfunc4.c:943
Definition: pix.h:718
BOX * boxCopy(BOX *box)
boxCopy()
Definition: boxbasic.c:230
Definition: pix.h:719
BOXA * boxaCreate(l_int32 n)
boxaCreate()
Definition: boxbasic.c:499
void boxDestroy(BOX **pbox)
boxDestroy()
Definition: boxbasic.c:278
l_int32 boxaGetCount(BOXA *boxa)
boxaGetCount()
Definition: boxbasic.c:718
l_ok boxGetGeometry(BOX *box, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxGetGeometry()
Definition: boxbasic.c:310
Definition: pix.h:480
BOX * boxCreate(l_int32 x, l_int32 y, l_int32 w, l_int32 h)
boxCreate()
Definition: boxbasic.c:165
BOXA * boxaPruneSortedOnOverlap(BOXA *boxas, l_float32 maxoverlap)
boxaPruneSortedOnOverlap()
Definition: partition.c:621