Leptonica  1.77.0
Image processing and image analysis suite
boxfunc5.c File Reference
#include <math.h>
#include "allheaders.h"

Go to the source code of this file.

Functions

static l_int32 boxaTestEvenOddHeight (BOXA *boxa1, BOXA *boxa2, l_int32 start, l_float32 *pdel1, l_float32 *pdel2)
 
static l_int32 boxaFillAll (BOXA *boxa)
 
BOXAboxaSmoothSequenceLS (BOXA *boxas, l_float32 factor, l_int32 subflag, l_int32 maxdiff, l_int32 extrapixels, l_int32 debug)
 
BOXAboxaSmoothSequenceMedian (BOXA *boxas, l_int32 halfwin, l_int32 subflag, l_int32 maxdiff, l_int32 extrapixels, l_int32 debug)
 
BOXAboxaLinearFit (BOXA *boxas, l_float32 factor, l_int32 debug)
 
BOXAboxaWindowedMedian (BOXA *boxas, l_int32 halfwin, l_int32 debug)
 
BOXAboxaModifyWithBoxa (BOXA *boxas, BOXA *boxam, l_int32 subflag, l_int32 maxdiff, l_int32 extrapixels)
 
BOXAboxaConstrainSize (BOXA *boxas, l_int32 width, l_int32 widthflag, l_int32 height, l_int32 heightflag)
 
BOXAboxaReconcileEvenOddHeight (BOXA *boxas, l_int32 sides, l_int32 delh, l_int32 op, l_float32 factor, l_int32 start)
 
BOXAboxaReconcilePairWidth (BOXA *boxas, l_int32 delw, l_int32 op, l_float32 factor, NUMA *na)
 
l_ok boxaSizeConsistency1 (BOXA *boxas, l_int32 type, l_float32 threshp, l_float32 threshm, l_float32 *pfvarp, l_float32 *pfvarm, l_int32 *psame)
 
l_ok boxaSizeConsistency2 (BOXA *boxas, l_float32 *pfdevw, l_float32 *pfdevh, l_int32 debug)
 
BOXAboxaReconcileSizeByMedian (BOXA *boxas, l_int32 type, l_float32 fract, l_float32 factor, NUMA **pnadelw, NUMA **pnadelh, l_float32 *pratiowh)
 
l_ok boxaPlotSides (BOXA *boxa, const char *plotname, NUMA **pnal, NUMA **pnat, NUMA **pnar, NUMA **pnab, PIX **ppixd)
 
l_ok boxaPlotSizes (BOXA *boxa, const char *plotname, NUMA **pnaw, NUMA **pnah, PIX **ppixd)
 
BOXAboxaFillSequence (BOXA *boxas, l_int32 useflag, l_int32 debug)
 
l_ok boxaSizeVariation (BOXA *boxa, l_int32 type, l_float32 *pdel_evenodd, l_float32 *prms_even, l_float32 *prms_odd, l_float32 *prms_all)
 
l_ok boxaMedianDimensions (BOXA *boxas, l_int32 *pmedw, l_int32 *pmedh, l_int32 *pmedwe, l_int32 *pmedwo, l_int32 *pmedhe, l_int32 *pmedho, NUMA **pnadelw, NUMA **pnadelh)
 

Detailed Description

 Boxa sequence fitting
      BOXA     *boxaSmoothSequenceLS()
      BOXA     *boxaSmoothSequenceMedian()
      BOXA     *boxaLinearFit()
      BOXA     *boxaWindowedMedian()
      BOXA     *boxaModifyWithBoxa()
      BOXA     *boxaConstrainSize()
      BOXA     *boxaReconcileEvenOddHeight()
static l_int32 boxaTestEvenOddHeight() BOXA *boxaReconcilePairWidth() l_int32 boxaSizeConsistency1() l_int32 boxaSizeConsistency2() BOXA *boxaReconcileSizeByMedian() l_int32 boxaPlotSides() [for debugging] l_int32 boxaPlotSizes() [for debugging] BOXA *boxaFillSequence() static l_int32 boxaFillAll() l_int32 boxaSizeVariation() l_int32 boxaMedianDimensions()

Definition in file boxfunc5.c.

Function Documentation

◆ boxaConstrainSize()

BOXA* boxaConstrainSize ( BOXA boxas,
l_int32  width,
l_int32  widthflag,
l_int32  height,
l_int32  heightflag 
)

boxaConstrainSize()

Parameters
[in]boxas
[in]widthforce width of all boxes to this size; input 0 to use the median width
[in]widthflagL_ADJUST_SKIP, L_ADJUST_LEFT, L_ADJUST_RIGHT, or L_ADJUST_LEFT_AND_RIGHT
[in]heightforce height of all boxes to this size; input 0 to use the median height
[in]heightflagL_ADJUST_SKIP, L_ADJUST_TOP, L_ADJUST_BOT, or L_ADJUST_TOP_AND_BOT
Returns
boxad adjusted so all boxes are the same size
Notes:
     (1) Forces either width or height (or both) of every box in
         the boxa to a specified size, by moving the indicated sides.
     (2) Not all input boxes need to be valid.  Median values will be
         used with invalid boxes.
     (3) Typical input might be the output of boxaLinearFit(),
         where each side has been fit.
     (4) Unlike boxaAdjustWidthToTarget() and boxaAdjustHeightToTarget(),
         this is not dependent on a difference threshold to change the size.
     (5) On error, a message is issued and a copy of the input boxa
         is returned.

Definition at line 716 of file boxfunc5.c.

References boxaAddBox(), boxaCopy(), boxaCreate(), boxAdjustSides(), boxaGetCount(), boxaGetMedianVals(), boxaGetValidBox(), boxaGetValidCount(), boxCopy(), boxCreate(), boxDestroy(), boxGetGeometry(), L_ADJUST_BOT, L_ADJUST_LEFT, L_ADJUST_RIGHT, L_ADJUST_TOP, L_COPY, and L_INSERT.

◆ boxaFillAll()

static l_int32 boxaFillAll ( BOXA boxa)
static

boxaFillAll()

Parameters
[in]boxa
Returns
0 if OK, 1 on error
Notes:
     (1) This static function replaces every invalid box with the
         nearest valid box.  If there are no valid boxes, it
         issues a warning.

Definition at line 1888 of file boxfunc5.c.

References boxaGetBox(), boxaGetCount(), boxaGetValidBox(), boxaGetValidCount(), boxaReplaceBox(), boxDestroy(), L_CLONE, and L_COPY.

Referenced by boxaFillSequence().

◆ boxaFillSequence()

BOXA* boxaFillSequence ( BOXA boxas,
l_int32  useflag,
l_int32  debug 
)

boxaFillSequence()

Parameters
[in]boxaswith at least 3 boxes
[in]useflagL_USE_ALL_BOXES, L_USE_SAME_PARITY_BOXES
[in]debug1 for debug output
Returns
boxad filled boxa, or NULL on error
Notes:
     (1) This simple function replaces invalid boxes with a copy of
         the nearest valid box, selected from either the entire
         sequence (L_USE_ALL_BOXES) or from the boxes with the
         same parity (L_USE_SAME_PARITY_BOXES).  It returns a new boxa.
     (2) This is useful if you expect boxes in the sequence to
         vary slowly with index.

Definition at line 1829 of file boxfunc5.c.

References boxaCopy(), boxaDestroy(), boxaFillAll(), boxaGetCount(), boxaGetValidCount(), boxaMergeEvenOdd(), boxaSplitEvenOdd(), L_COPY, L_USE_ALL_BOXES, and L_USE_SAME_PARITY_BOXES.

Referenced by boxaPlotSides(), boxaPlotSizes(), and boxaWindowedMedian().

◆ boxaLinearFit()

BOXA* boxaLinearFit ( BOXA boxas,
l_float32  factor,
l_int32  debug 
)

boxaLinearFit()

Parameters
[in]boxassource boxa
[in]factorreject outliers with widths and heights deviating from the median by more than factor times the median deviation from the median; typically ~3
[in]debug1 for debug output
Returns
boxad fitted boxa, or NULL on error
Notes:
     (1) This finds a set of boxes (boxad) where each edge of each box is
         a linear least square fit (LSF) to the edges of the
         input set of boxes (boxas).  Before fitting, outliers in
         the boxes in boxas are removed (see below).
     (2) This is useful when each of the box edges in boxas are expected
         to vary linearly with box index in the set.  These could
         be, for example, noisy measurements of similar regions
         on successive scanned pages.
     (3) Method: there are 2 steps:
         (a) Find and remove outliers, separately based on the deviation
             from the median of the width and height of the box.
             Use factor to specify tolerance to outliers; use a very
             large value of factor to avoid rejecting any box sides
             in the linear LSF.
         (b) On the remaining boxes, do a linear LSF independently
             for each of the four sides.
     (4) Invalid input boxes are not used in computation of the LSF.
     (5) The returned boxad can then be used in boxaModifyWithBoxa()
         to selectively change the boxes in boxas.

Definition at line 310 of file boxfunc5.c.

References boxaAddBox(), boxaCreate(), boxaDestroy(), boxaExtractAsNuma(), boxaExtractAsPta(), boxaGetCount(), boxaGetValidBox(), boxaPlotSides(), boxaPlotSizes(), boxaWriteDebug(), boxCreate(), boxDestroy(), boxGetGeometry(), L_CLONE, L_COPY, L_INSERT, lept_mkdir(), numaDestroy(), numaGetMedianDevFromMedian(), ptaDestroy(), ptaGetLinearLSF(), and ptaWriteDebug().

Referenced by boxaSmoothSequenceLS().

◆ boxaMedianDimensions()

l_ok boxaMedianDimensions ( BOXA boxas,
l_int32 *  pmedw,
l_int32 *  pmedh,
l_int32 *  pmedwe,
l_int32 *  pmedwo,
l_int32 *  pmedhe,
l_int32 *  pmedho,
NUMA **  pnadelw,
NUMA **  pnadelh 
)

boxaMedianDimensions()

Parameters
[in]boxascontaining at least 3 valid boxes in even and odd
[out]pmedw[optional] median width of all boxes
[out]pmedh[optional] median height of all boxes
[out]pmedwe[optional] median width of even boxes
[out]pmedwo[optional] median width of odd boxes
[out]pmedhe[optional] median height of even boxes
[out]pmedho[optional] median height of odd boxes
[out]pnadelw[optional] width diff of each box from median
[out]pnadelh[optional] height diff of each box from median
Returns
0 if OK, 1 on error
Notes:
     (1) This provides information that (1) allows identification of
         boxes that have unusual (outlier) width or height, and (2) can
         be used to regularize the sizes of the outlier boxes, assuming
         that the boxes satisfy a fairly regular sequence and should
         mostly have the same width and height.
     (2) This finds the median width and height, as well as separate
         median widths and heights of even and odd boxes.  It also
         generates arrays that give the difference in width and height
         of each box from the median, which can be used to correct
         individual boxes.
     (3) All return values are optional.

Definition at line 2075 of file boxfunc5.c.

References boxaDestroy(), boxaGetBoxGeometry(), boxaGetCount(), boxaGetMedianVals(), boxaGetValidCount(), boxaSplitEvenOdd(), numaAddNumber(), numaCreate(), and numaDestroy().

Referenced by boxaReconcileSizeByMedian().

◆ boxaModifyWithBoxa()

BOXA* boxaModifyWithBoxa ( BOXA boxas,
BOXA boxam,
l_int32  subflag,
l_int32  maxdiff,
l_int32  extrapixels 
)

boxaModifyWithBoxa()

Parameters
[in]boxas
[in]boxamboxa with boxes used to modify those in boxas
[in]subflagL_USE_MINSIZE, L_USE_MAXSIZE, L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN, L_USE_CAPPED_MAX
[in]maxdiffparameter used with L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN, L_USE_CAPPED_MAX
[in]extrapixelspixels added on all sides (or subtracted if extrapixels < 0) when using L_SUB_ON_LOC_DIFF and L_SUB_ON_SIZE_DIFF
Returns
boxad result after adjusting boxes in boxas, or NULL on error.
Notes:
     (1) This takes two input boxa (boxas, boxam) and constructs boxad,
         where each box in boxad is generated from the corresponding
         boxes in boxas and boxam.  The rule for constructing each
         output box depends on subflag and maxdiff.  Let boxs be
         a box from boxas and boxm be a box from boxam.
         * If subflag == L_USE_MINSIZE: the output box is the intersection
           of the two input boxes.
         * If subflag == L_USE_MAXSIZE: the output box is the union of the
           two input boxes; i.e., the minimum bounding rectangle for the
           two input boxes.
         * If subflag == L_SUB_ON_LOC_DIFF: each side of the output box
           is found separately from the corresponding side of boxs and boxm.
           Use the boxm side, expanded by extrapixels, if greater than
           maxdiff pixels from the boxs side.
         * If subflag == L_SUB_ON_SIZE_DIFF: the sides of the output box
           are determined in pairs from the width and height of boxs
           and boxm.  If the boxm width differs by more than maxdiff
           pixels from boxs, use the boxm left and right sides,
           expanded by extrapixels.  Ditto for the height difference.
         For the last two flags, each side of the output box is found
         separately from the corresponding side of boxs and boxm,
         according to these rules, where "smaller"("bigger") mean in a
         direction that decreases(increases) the size of the output box:
         * If subflag == L_USE_CAPPED_MIN: use the Min of boxm
           with the Max of (boxs, boxm +- maxdiff), where the sign
           is adjusted to make the box smaller (e.g., use "+" on left side).
         * If subflag == L_USE_CAPPED_MAX: use the Max of boxm
           with the Min of (boxs, boxm +- maxdiff), where the sign
           is adjusted to make the box bigger (e.g., use "-" on left side).
         Use of the last 2 flags is further explained in (3) and (4).
     (2) boxas and boxam must be the same size.  If boxam == NULL,
         this returns a copy of boxas with a warning.
     (3) If subflag == L_SUB_ON_LOC_DIFF, use boxm for each side
         where the corresponding sides differ by more than maxdiff.
         Two extreme cases:
         (a) set maxdiff == 0 to use only values from boxam in boxad.
         (b) set maxdiff == 10000 to ignore all values from boxam;
             then boxad will be the same as boxas.
     (4) If subflag == L_USE_CAPPED_MAX: use boxm if boxs is smaller;
         use boxs if boxs is bigger than boxm by an amount up to maxdiff;
         and use boxm +- maxdiff (the 'capped' value) if boxs is
         bigger than boxm by an amount larger than maxdiff.
         Similarly, with interchange of Min/Max and sign of maxdiff,
         for subflag == L_USE_CAPPED_MIN.
     (5) If either of corresponding boxes in boxas and boxam is invalid,
         an invalid box is copied to the result.
     (6) Typical input for boxam may be the output of boxaLinearFit().
         where outliers have been removed and each side is LS fit to a line.
     (7) Unlike boxaAdjustWidthToTarget() and boxaAdjustHeightToTarget(),
         this uses two boxes and does not specify target dimensions.
         Additional constraints on the size of each box can be enforced
         by following this operation with boxaConstrainSize(), taking
         boxad as input.

Definition at line 600 of file boxfunc5.c.

References boxaAddBox(), boxaCopy(), boxaCreate(), boxaGetCount(), boxaGetValidBox(), boxCreate(), boxDestroy(), boxGetGeometry(), L_CLONE, L_COPY, L_INSERT, L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MAX, L_USE_CAPPED_MIN, L_USE_MAXSIZE, and L_USE_MINSIZE.

Referenced by boxaSmoothSequenceLS(), and boxaSmoothSequenceMedian().

◆ boxaPlotSides()

l_ok boxaPlotSides ( BOXA boxa,
const char *  plotname,
NUMA **  pnal,
NUMA **  pnat,
NUMA **  pnar,
NUMA **  pnab,
PIX **  ppixd 
)

boxaPlotSides()

Parameters
[in]boxasource boxa
[in]plotname[optional], can be NULL
[out]pnal[optional] na of left sides
[out]pnat[optional] na of top sides
[out]pnar[optional] na of right sides
[out]pnab[optional] na of bottom sides
[out]ppixd[optional] pix of the output plot
Returns
0 if OK, 1 on error
Notes:
     (1) This debugging function shows the progression of the four
         sides in the boxa.  There must be at least 2 boxes.
     (2) If there are invalid boxes (e.g., if only even or odd
         indices have valid boxes), this will fill them with the
         nearest valid box before plotting.
     (3) The plotfiles are put in /tmp/lept/plots/, and are named
         either with plotname or, if NULL, a default name.  If
         plotname is used, make sure is has no whitespace characters.

Definition at line 1629 of file boxfunc5.c.

References boxaDestroy(), boxaFillSequence(), boxaGetBoxGeometry(), boxaGetCount(), gplotCreate(), L_USE_ALL_BOXES, lept_mkdir(), numaAddNumber(), and numaCreate().

Referenced by boxaLinearFit(), boxaSmoothSequenceMedian(), and boxaWindowedMedian().

◆ boxaPlotSizes()

l_ok boxaPlotSizes ( BOXA boxa,
const char *  plotname,
NUMA **  pnaw,
NUMA **  pnah,
PIX **  ppixd 
)

boxaPlotSizes()

Parameters
[in]boxasource boxa
[in]plotname[optional], can be NULL
[out]pnaw[optional] na of widths
[out]pnah[optional] na of heights
[out]ppixd[optional] pix of the output plot
Returns
0 if OK, 1 on error
Notes:
     (1) This debugging function shows the progression of box width
         and height in the boxa.  There must be at least 2 boxes.
     (2) If there are invalid boxes (e.g., if only even or odd
         indices have valid boxes), this will fill them with the
         nearest valid box before plotting.
     (3) The plotfiles are put in /tmp/lept/plots/, and are named
         either with plotname or, if NULL, a default name.  If
         plotname is used, make sure is has no whitespace characters.

Definition at line 1741 of file boxfunc5.c.

References boxaDestroy(), boxaFillSequence(), boxaGetBoxGeometry(), boxaGetCount(), gplotCreate(), L_USE_ALL_BOXES, lept_mkdir(), numaAddNumber(), and numaCreate().

Referenced by boxaLinearFit(), boxaSizeConsistency2(), boxaSmoothSequenceMedian(), and boxaWindowedMedian().

◆ boxaReconcileEvenOddHeight()

BOXA* boxaReconcileEvenOddHeight ( BOXA boxas,
l_int32  sides,
l_int32  delh,
l_int32  op,
l_float32  factor,
l_int32  start 
)

boxaReconcileEvenOddHeight()

Parameters
[in]boxascontaining at least 3 valid boxes in even and odd
[in]sidesL_ADJUST_TOP, L_ADJUST_BOT, L_ADJUST_TOP_AND_BOT
[in]delhthreshold on median height difference
[in]opL_ADJUST_CHOOSE_MIN, L_ADJUST_CHOOSE_MAX
[in]factor> 0.0, typically near 1.0
[in]start0 if pairing (0,1), etc; 1 if pairing (1,2), etc
Returns
boxad adjusted, or a copy of boxas on error
Notes:
     (1) The basic idea is to reconcile differences in box height
         in the even and odd boxes, by moving the top and/or bottom
         edges in the even and odd boxes.  Choose the edge or edges
         to be moved, whether to adjust the boxes with the min
         or the max of the medians, and the threshold on the median
         difference between even and odd box heights for the operations
         to take place.  The same threshold is also used to
         determine if each individual box edge is to be adjusted.
     (2) Boxes are conditionally reset with either the same top (y)
         value or the same bottom value, or both.  The value is
         determined by the greater or lesser of the medians of the
         even and odd boxes, with the choice depending on the value
         of op, which selects for either min or max median height.
         If the median difference between even and odd boxes is
         greater than dely, then any individual box edge that differs
         from the selected median by more than dely is set to
         the selected median times a factor typically near 1.0.
     (3) Note that if selecting for minimum height, you will choose
         the largest y-value for the top and the smallest y-value for
         the bottom of the box.
     (4) Typical input might be the output of boxaSmoothSequence(),
         where even and odd boxa have been independently regulated.
     (5) Require at least 3 valid even boxes and 3 valid odd boxes.
         Median values will be used for invalid boxes.
     (6) If the median height is not representative of the boxes
         in boxas, this can make things much worse.  In that case,
         ignore the value of op, and force pairwise equality of the
         heights, with pairwise maximal vertical extension.

Definition at line 825 of file boxfunc5.c.

References boxaAdjustHeightToTarget(), boxaCopy(), boxaDestroy(), boxaGetMedianVals(), boxaGetValidCount(), boxaMergeEvenOdd(), boxaSplitEvenOdd(), boxaTestEvenOddHeight(), L_ADJUST_BOT, L_ADJUST_CHOOSE_MIN, L_ADJUST_TOP, L_ADJUST_TOP_AND_BOT, L_CLONE, and L_COPY.

◆ boxaReconcilePairWidth()

BOXA* boxaReconcilePairWidth ( BOXA boxas,
l_int32  delw,
l_int32  op,
l_float32  factor,
NUMA na 
)

boxaReconcilePairWidth()

Parameters
[in]boxas
[in]delwthreshold on adjacent width difference
[in]opL_ADJUST_CHOOSE_MIN, L_ADJUST_CHOOSE_MAX
[in]factor> 0.0, typically near 1.0
[in]na[optional] indicator array allowing change
Returns
boxad adjusted, or a copy of boxas on error
Notes:
     (1) This reconciles differences in the width of adjacent boxes,
         by moving one side of one of the boxes in each pair.
         If the widths in the pair differ by more than some
         threshold, move either the left side for even boxes or
         the right side for odd boxes, depending on if we're choosing
         the min or max.  If choosing min, the width of the max is
         set to factor * (width of min).  If choosing max, the width
         of the min is set to factor * (width of max).
     (2) If na exists, it is an indicator array corresponding to the
         boxes in boxas.  If na != NULL, only boxes with an
         indicator value of 1 are allowed to adjust; otherwise,
         all boxes can adjust.
     (3) Typical input might be the output of boxaSmoothSequence(),
         where even and odd boxa have been independently regulated.

Definition at line 1006 of file boxfunc5.c.

References boxaDestroy(), boxaGetBox(), boxaGetCount(), boxaMergeEvenOdd(), boxaSplitEvenOdd(), boxDestroy(), boxGetGeometry(), boxSetGeometry(), L_ADJUST_CHOOSE_MIN, L_CLONE, and numaGetIValue().

◆ boxaReconcileSizeByMedian()

BOXA* boxaReconcileSizeByMedian ( BOXA boxas,
l_int32  type,
l_float32  fract,
l_float32  factor,
NUMA **  pnadelw,
NUMA **  pnadelh,
l_float32 *  pratiowh 
)

boxaReconcileSizeByMedian()

Parameters
[in]boxascontaining at least 6 valid boxes
[in]typeL_CHECK_WIDTH, L_CHECK_HEIGHT, L_CHECK_BOTH
[in]fractthreshold fraction of size variation from median; in range (0 ... 1); typ. about 0.05.
[in]factorexpansion for fixed box beyond median width; should be near 1.0.
[out]pnadelw[optional] diff from median width for boxes above threshold
[out]pnadelh[optional] diff from median height for boxes above threshold
[out]ratiowh[optional] ratio of median width/height of boxas
Returns
boxad possibly adjusted from boxas; a copy of boxas on error
Notes:
     (1) The basic idea is to identify significant differences in box
         dimension (either width or height) and modify the outlier boxes.
     (2) type specifies if we are reconciling the width, height or both.
     (3) fract specifies the tolerance for different dimensions. Any
         box with a fractional difference from the median size that
         exceeds fract will be altered.
     (4) Median width and height are found for all valid boxes (i.e.,
         for all boxes with width and height > 0.
         Median side locations are found separately for even and odd boxes,
         using only boxes that are "inliers"; i.e., that are within
         tolerance for width or height.
     (5) If all box dimensions are within threshold of the median,
         just return a copy.  Otherwise, find the side farthest
         from the median side location of the "inliers".  Adjust
         that side so that the final dimension (width or height)
         is the median dimension, expanded by factor.
     (6) The arrays that are the initial deviation from median size
         (width and height) are optionally returned.  Also optionally
         returned is the median w/h asperity ratio of the input boxas.

Definition at line 1370 of file boxfunc5.c.

References boxaAddBox(), boxaCopy(), boxaCreate(), boxaDestroy(), boxaGetBox(), boxaGetCount(), boxaGetMedianVals(), boxaGetValidBox(), boxaGetValidCount(), boxaMedianDimensions(), boxDestroy(), boxGetGeometry(), boxGetSideLocations(), boxIsValid(), boxSetSide(), L_CHECK_BOTH, L_CHECK_HEIGHT, L_CHECK_WIDTH, L_COPY, L_INSERT, L_SET_BOT, L_SET_LEFT, L_SET_RIGHT, L_SET_TOP, numaAddNumber(), numaCreate(), numaDestroy(), and numaGetIValue().

◆ boxaSizeConsistency1()

l_ok boxaSizeConsistency1 ( BOXA boxas,
l_int32  type,
l_float32  threshp,
l_float32  threshm,
l_float32 *  pfvarp,
l_float32 *  pfvarm,
l_int32 *  psame 
)

boxaSizeConsistency1()

Parameters
[in]boxasof size >= 10
[in]typeL_CHECK_WIDTH, L_CHECK_HEIGHT
[in]threshpthreshold for pairwise fractional variation
[in]threshmthreshold for fractional variation from median
[out]pfvarp[optional] average fractional pairwise variation
[out]pfvarm[optional] average fractional median variation
[out]psamedecision for uniformity of page size (1, 0, -1)
Notes:
     (1) This evaluates a boxa for particular types of dimensional
         variation.  Select either width or height variation.  Then
         it returns two numbers: one is based on pairwise (even/odd)
         variation; the other is based on the average variation
         from the boxa median.
     (2) For the pairwise variation, get the fraction of the absolute
         difference in dimension of each pair of boxes, and take
         the average value.  The median variation is simply the
         the average of the fractional deviation from the median
         of all the boxes.
     (3) Use 0 for default values of threshp and threshm.  They are
           threshp:  0.02
           threshm:  0.015
     (4) The intended application is that the boxes are a sequence of
         page regions in a book scan, and we calculate two numbers
         that can give an indication if the pages are approximately
         the same size.  The pairwise variation should be small if
         the boxes are correctly calculated.  If there are a
         significant number of random or systematic outliers, the
         pairwise variation will be large, and no decision will be made
         (i.e., return same == -1).  Here are the possible outcomes:
           Pairwise Var    Median Var    Decision

small small same size (1) small large different size (0) large small/large unknown (-1)

Definition at line 1129 of file boxfunc5.c.

References boxaGetBoxGeometry(), boxaGetCount(), boxaGetValidCount(), L_CHECK_HEIGHT, L_CHECK_WIDTH, numaAddNumber(), numaCreate(), numaDestroy(), numaGetMeanDevFromMedian(), and numaGetMedian().

◆ boxaSizeConsistency2()

l_ok boxaSizeConsistency2 ( BOXA boxas,
l_float32 *  pfdevw,
l_float32 *  pfdevh,
l_int32  debug 
)

boxaSizeConsistency2()

Parameters
[in]boxasof size >= 10
[out]pfdevwaverage fractional deviation from median width
[out]pfdevhaverage fractional deviation from median height
[in]debug1 for debug plot output of input and regularized width and height
Notes:
     (1) This evaluates a boxa for consistency of the box sizes.
         The intended application is that the boxes are a sequence of
         page regions in a book scan, and the output is a decision
         about whether the pages should be approximately the same size.
         The determination should be robust to outliers, both random
         and (for many cases) systematic.
     (2) This differs from boxaSizeConsistency1() in that it attempts
         to correct for box dimensional errors before doing the
         evaluation.  For this reason, it may be less robust.
     (3) Adjacent even and odd boxes are expected to be the same size.
         Take them pairwise, and assume the minimum height, hmin,
         is correct.  Then for (the usual case) wmin/hmin > 0.5, assume
         the minimum width is correct.  If wmin/hmin <= 0.5, assume
         the maximum width is correct.
     (4) After correcting each pair so that they are the same size,
         compute the average fractional deviation, from median width and
         height.  A deviation of width or height by more than about
         0.02 is evidence that the boxes may be from a non-homogeneous
         source, such as a book with significantly different page sizes.

Definition at line 1241 of file boxfunc5.c.

References boxaAddBox(), boxaCreate(), boxaDestroy(), boxaGetBoxGeometry(), boxaGetCount(), boxaPlotSizes(), boxCreate(), L_COPY, L_INSERT, lept_mkdir(), numaAddNumber(), numaCreate(), numaDestroy(), numaGetMeanDevFromMedian(), numaGetMedian(), pixaAddPix(), pixaCreate(), and pixaDisplayTiledInColumns().

◆ boxaSizeVariation()

l_ok boxaSizeVariation ( BOXA boxa,
l_int32  type,
l_float32 *  pdel_evenodd,
l_float32 *  prms_even,
l_float32 *  prms_odd,
l_float32 *  prms_all 
)

boxaSizeVariation()

Parameters
[in]boxaat least 4 boxes
[in]typeL_SELECT_WIDTH, L_SELECT_HEIGHT
[out]pdel_evenodd[optional] average absolute value of (even - odd) size pairs
[out]prms_even[optional] rms deviation of even boxes
[out]prms_odd[optional] rms deviation of odd boxes
[out]prms_all[optional] rms deviation of all boxes
Returns
0 if OK, 1 on error
Notes:
     (1) This gives several measures of the smoothness of either the
         width or height of a sequence of boxes.
         See boxaMedianDimensions() for some other measures.
     (2) Statistics can be found separately for even and odd boxes.
         Additionally, the average pair-wise difference between
         adjacent even and odd boxes can be returned.
     (3) The use case is bounding boxes for scanned page images,
         where ideally the sizes should have little variance.

Definition at line 1972 of file boxfunc5.c.

References boxaDestroy(), boxaGetCount(), boxaGetSizes(), boxaSplitEvenOdd(), L_SELECT_HEIGHT, L_SELECT_WIDTH, numaDestroy(), numaGetIValue(), and numaSimpleStats().

◆ boxaSmoothSequenceLS()

BOXA* boxaSmoothSequenceLS ( BOXA boxas,
l_float32  factor,
l_int32  subflag,
l_int32  maxdiff,
l_int32  extrapixels,
l_int32  debug 
)

boxaSmoothSequenceLS()

Parameters
[in]boxassource boxa
[in]factorreject outliers with widths and heights deviating from the median by more than factor times the median variation from the median; typically ~3
[in]subflagL_USE_MINSIZE, L_USE_MAXSIZE, L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN, L_USE_CAPPED_MAX
[in]maxdiffparameter used with L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN, L_USE_CAPPED_MAX
[in]extrapixelspixels added on all sides (or subtracted if extrapixels < 0) when using L_SUB_ON_LOC_DIFF and L_SUB_ON_SIZE_DIFF
[in]debug1 for debug output
Returns
boxad fitted boxa, or NULL on error
Notes:
     (1) This returns a modified version of boxas by constructing
         for each input box a box that has been linear least square fit
         (LSF) to the entire set.  The linear fitting is done to each of
         the box sides independently, after outliers are rejected,
         and it is computed separately for sequences of even and
         odd boxes.  Once the linear LSF box is found, the output box
         (in boxad) is constructed from the input box and the LSF
         box, depending on subflag.  See boxaModifyWithBoxa() for
         details on the use of subflag and maxdiff.
     (2) This is useful if, in both the even and odd sets, the box
         edges vary roughly linearly with its index in the set.

Definition at line 98 of file boxfunc5.c.

References boxaCopy(), boxaDestroy(), boxaGetCount(), boxaLinearFit(), boxaMergeEvenOdd(), boxaModifyWithBoxa(), boxaSplitEvenOdd(), boxaWriteDebug(), L_COPY, L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MAX, L_USE_CAPPED_MIN, L_USE_MAXSIZE, L_USE_MINSIZE, and lept_mkdir().

◆ boxaSmoothSequenceMedian()

BOXA* boxaSmoothSequenceMedian ( BOXA boxas,
l_int32  halfwin,
l_int32  subflag,
l_int32  maxdiff,
l_int32  extrapixels,
l_int32  debug 
)

boxaSmoothSequenceMedian()

Parameters
[in]boxassource boxa
[in]halfwinhalf-width of sliding window; used to find median
[in]subflagL_USE_MINSIZE, L_USE_MAXSIZE, L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN, L_USE_CAPPED_MAX
[in]maxdiffparameter used with L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN, L_USE_CAPPED_MAX
[in]extrapixelspixels added on all sides (or subtracted if extrapixels < 0) when using L_SUB_ON_LOC_DIFF and L_SUB_ON_SIZE_DIFF
[in]debug1 for debug output
Returns
boxad fitted boxa, or NULL on error
Notes:
     (1) The target width of the sliding window is 2 * halfwin + 1.
         If necessary, this will be reduced by boxaWindowedMedian().
     (2) This returns a modified version of boxas by constructing
         for each input box a box that has been smoothed with windowed
         median filtering.  The filtering is done to each of the
         box sides independently, and it is computed separately for
         sequences of even and odd boxes.  The output boxad is
         constructed from the input boxa and the filtered boxa,
         depending on subflag.  See boxaModifyWithBoxa() for
         details on the use of subflag, maxdiff and extrapixels.
     (3) This is useful for removing noise separately in the even
         and odd sets, where the box edge locations can have
         discontinuities but otherwise vary roughly linearly within
         intervals of size halfwin or larger.
     (4) If you don't need to handle even and odd sets separately,
         just do this:
             boxam = boxaWindowedMedian(boxas, halfwin, debug);
             boxad = boxaModifyWithBoxa(boxas, boxam, subflag, maxdiff,
                                        extrapixels);
             boxaDestroy(&boxam);

Definition at line 204 of file boxfunc5.c.

References boxaCopy(), boxaDestroy(), boxaGetCount(), boxaMergeEvenOdd(), boxaModifyWithBoxa(), boxaPlotSides(), boxaPlotSizes(), boxaSplitEvenOdd(), boxaWindowedMedian(), boxaWriteDebug(), L_COPY, L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MAX, L_USE_CAPPED_MIN, L_USE_MAXSIZE, L_USE_MINSIZE, and lept_mkdir().

◆ boxaTestEvenOddHeight()

static l_int32 boxaTestEvenOddHeight ( BOXA boxa1,
BOXA boxa2,
l_int32  start,
l_float32 *  pdel1,
l_float32 *  pdel2 
)
static

boxaTestEvenOddHeight()

Parameters
[in]boxa1,boxa2
[in]start0 if pairing (0,1), etc; 1 if pairing (1,2), etc
[out]pdel1root mean of (dely^2 + delh^2 for boxa1
[out]pdel2root mean of (dely^2 + delh^2 for boxa2
Returns
0 if OK, 1 on error
Notes:
     (1) This compares differences in the y location and height of
         adjacent boxes, in each of the input boxa.

Definition at line 934 of file boxfunc5.c.

References boxaGetBoxGeometry(), and boxaGetCount().

Referenced by boxaReconcileEvenOddHeight().

◆ boxaWindowedMedian()

BOXA* boxaWindowedMedian ( BOXA boxas,
l_int32  halfwin,
l_int32  debug 
)

boxaWindowedMedian()

Parameters
[in]boxassource boxa
[in]halfwinhalf width of window over which the median is found
[in]debug1 for debug output
Returns
boxad smoothed boxa, or NULL on error
Notes:
     (1) This finds a set of boxes (boxad) where each edge of each box is
         a windowed median smoothed value to the edges of the
         input set of boxes (boxas).
     (2) Invalid input boxes are filled from nearby ones.
     (3) The returned boxad can then be used in boxaModifyWithBoxa()
         to selectively change the boxes in the source boxa.

Definition at line 462 of file boxfunc5.c.

References boxaAddBox(), boxaCopy(), boxaCreate(), boxaDestroy(), boxaExtractAsNuma(), boxaFillSequence(), boxaGetCount(), boxaPlotSides(), boxaPlotSizes(), boxCreate(), L_COPY, L_INSERT, L_USE_ALL_BOXES, numaDestroy(), numaGetIValue(), and numaWindowedMedian().

Referenced by boxaSmoothSequenceMedian().