Leptonica  1.77.0
Image processing and image analysis suite
finditalic.c
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*
28  * finditalic.c
29  *
30  * l_int32 pixItalicWords()
31  *
32  * Locate italic words. This is an example of the use of
33  * hit-miss binary morphology with binary reconstruction
34  * (filling from a seed into a mask).
35  *
36  * To see how this works, run with prog/italic.png.
37  */
38 
39 #include "allheaders.h"
40 
41  /* --------------------------------------------------------------- *
42  * These hit-miss sels match the slanted edge of italic characters *
43  * --------------------------------------------------------------- */
44 static const char *str_ital1 = " o x"
45  " "
46  " "
47  " "
48  " o x "
49  " "
50  " C "
51  " "
52  " o x "
53  " "
54  " "
55  " "
56  "o x ";
57 
58 static const char *str_ital2 = " o x"
59  " "
60  " "
61  " o x "
62  " C "
63  " "
64  " o x "
65  " "
66  " "
67  "o x ";
68 
69  /* ------------------------------------------------------------- *
70  * This sel removes noise that is not oriented as a slanted edge *
71  * ------------------------------------------------------------- */
72 static const char *str_ital3 = " x"
73  "Cx"
74  "x "
75  "x ";
76 
109 l_ok
110 pixItalicWords(PIX *pixs,
111  BOXA *boxaw,
112  PIX *pixw,
113  BOXA **pboxa,
114  l_int32 debugflag)
115 {
116 char opstring[32];
117 l_int32 size;
118 BOXA *boxa;
119 PIX *pixsd, *pixm, *pixd;
120 SEL *sel_ital1, *sel_ital2, *sel_ital3;
121 
122  PROCNAME("pixItalicWords");
123 
124  if (!pixs)
125  return ERROR_INT("pixs not defined", procName, 1);
126  if (!pboxa)
127  return ERROR_INT("&boxa not defined", procName, 1);
128  if (boxaw && pixw)
129  return ERROR_INT("both boxaw and pixw are defined", procName, 1);
130 
131  sel_ital1 = selCreateFromString(str_ital1, 13, 6, NULL);
132  sel_ital2 = selCreateFromString(str_ital2, 10, 6, NULL);
133  sel_ital3 = selCreateFromString(str_ital3, 4, 2, NULL);
134 
135  /* Make the italic seed: extract with HMT; remove noise.
136  * The noise removal close/open is important to exclude
137  * situations where a small slanted line accidentally
138  * matches sel_ital1. */
139  pixsd = pixHMT(NULL, pixs, sel_ital1);
140  pixClose(pixsd, pixsd, sel_ital3);
141  pixOpen(pixsd, pixsd, sel_ital3);
142 
143  /* Make the word mask. Use input boxes or mask if given. */
144  size = 0; /* init */
145  if (boxaw) {
146  pixm = pixCreateTemplate(pixs);
147  pixMaskBoxa(pixm, pixm, boxaw, L_SET_PIXELS);
148  } else if (pixw) {
149  pixm = pixClone(pixw);
150  } else {
151  pixWordMaskByDilation(pixs, NULL, &size, NULL);
152  L_INFO("dilation size = %d\n", procName, size);
153  snprintf(opstring, sizeof(opstring), "d1.5 + c%d.1", size);
154  pixm = pixMorphSequence(pixs, opstring, 0);
155  }
156 
157  /* Binary reconstruction to fill in those word mask
158  * components for which there is at least one seed pixel. */
159  pixd = pixSeedfillBinary(NULL, pixsd, pixm, 8);
160  boxa = pixConnComp(pixd, NULL, 8);
161  *pboxa = boxa;
162 
163  if (debugflag) {
164  /* Save results at at 2x reduction */
165  lept_mkdir("lept/ital");
166  l_int32 res, upper;
167  BOXA *boxat;
168  GPLOT *gplot;
169  NUMA *na;
170  PIXA *pad;
171  PIX *pix1, *pix2, *pix3;
172  pad = pixaCreate(0);
173  boxat = pixConnComp(pixm, NULL, 8);
174  boxaWriteDebug("/tmp/lept/ital/ital.ba", boxat);
175  pixSaveTiledOutline(pixs, pad, 0.5, 1, 20, 2, 32); /* orig */
176  pixSaveTiledOutline(pixsd, pad, 0.5, 1, 20, 2, 0); /* seed */
177  pix1 = pixConvertTo32(pixm);
178  pixRenderBoxaArb(pix1, boxat, 3, 255, 0, 0);
179  pixSaveTiledOutline(pix1, pad, 0.5, 1, 20, 2, 0); /* mask + outline */
180  pixDestroy(&pix1);
181  pixSaveTiledOutline(pixd, pad, 0.5, 1, 20, 2, 0); /* ital mask */
182  pix1 = pixConvertTo32(pixs);
183  pixRenderBoxaArb(pix1, boxa, 3, 255, 0, 0);
184  pixSaveTiledOutline(pix1, pad, 0.5, 1, 20, 2, 0); /* orig + outline */
185  pixDestroy(&pix1);
186  pix1 = pixCreateTemplate(pixs);
187  pix2 = pixSetBlackOrWhiteBoxa(pix1, boxa, L_SET_BLACK);
188  pixCopy(pix1, pixs);
189  pix3 = pixDilateBrick(NULL, pixs, 3, 3);
190  pixCombineMasked(pix1, pix3, pix2);
191  pixSaveTiledOutline(pix1, pad, 0.5, 1, 20, 2, 0); /* ital bolded */
192  pixDestroy(&pix1);
193  pixDestroy(&pix2);
194  pixDestroy(&pix3);
195  pix2 = pixaDisplay(pad, 0, 0);
196  pixWriteDebug("/tmp/lept/ital/ital.png", pix2, IFF_PNG);
197  pixDestroy(&pix2);
198 
199  /* Assuming the image represents 6 inches of actual page width,
200  * the pixs resolution is approximately
201  * (width of pixs in pixels) / 6
202  * and the images have been saved at half this resolution. */
203  res = pixGetWidth(pixs) / 12;
204  L_INFO("resolution = %d\n", procName, res);
206  pixaConvertToPdf(pad, res, 1.0, L_FLATE_ENCODE, 75, "Italic Finder",
207  "/tmp/lept/ital/ital.pdf");
209  pixaDestroy(&pad);
210  boxaDestroy(&boxat);
211 
212  /* Plot histogram of horizontal white run sizes. A small
213  * initial vertical dilation removes most runs that are neither
214  * inter-character nor inter-word. The larger first peak is
215  * from inter-character runs, and the smaller second peak is
216  * from inter-word runs. */
217  pix1 = pixDilateBrick(NULL, pixs, 1, 15);
218  upper = L_MAX(30, 3 * size);
219  na = pixRunHistogramMorph(pix1, L_RUN_OFF, L_HORIZ, upper);
220  pixDestroy(&pix1);
221  gplot = gplotCreate("/tmp/lept/ital/runhisto", GPLOT_PNG,
222  "Histogram of horizontal runs of white pixels, vs length",
223  "run length", "number of runs");
224  gplotAddPlot(gplot, NULL, na, GPLOT_LINES, "plot1");
225  gplotMakeOutput(gplot);
226  gplotDestroy(&gplot);
227  numaDestroy(&na);
228  }
229 
230  selDestroy(&sel_ital1);
231  selDestroy(&sel_ital2);
232  selDestroy(&sel_ital3);
233  pixDestroy(&pixsd);
234  pixDestroy(&pixm);
235  pixDestroy(&pixd);
236  return 0;
237 }
void gplotDestroy(GPLOT **pgplot)
gplotDestroy()
Definition: gplot.c:197
l_int32 lept_mkdir(const char *subdir)
lept_mkdir()
Definition: utils2.c:1944
l_ok gplotAddPlot(GPLOT *gplot, NUMA *nax, NUMA *nay, l_int32 plotstyle, const char *plottitle)
gplotAddPlot()
Definition: gplot.c:263
PIX * pixConvertTo32(PIX *pixs)
pixConvertTo32()
Definition: pixconv.c:3233
PIXA * pixaCreate(l_int32 n)
pixaCreate()
Definition: pixabasic.c:163
l_ok gplotMakeOutput(GPLOT *gplot)
gplotMakeOutput()
Definition: gplot.c:379
GPLOT * gplotCreate(const char *rootname, l_int32 outformat, const char *title, const char *xlabel, const char *ylabel)
gplotCreate()
Definition: gplot.c:138
PIX * pixOpen(PIX *pixd, PIX *pixs, SEL *sel)
pixOpen()
Definition: morph.c:422
PIX * pixDilateBrick(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize)
pixDilateBrick()
Definition: morph.c:684
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
Definition: boxbasic.c:580
PIX * pixaDisplay(PIXA *pixa, l_int32 w, l_int32 h)
pixaDisplay()
Definition: pixafunc2.c:184
PIX * pixCreateTemplate(PIX *pixs)
pixCreateTemplate()
Definition: pix1.c:367
Definition: pix.h:492
l_ok pixCombineMasked(PIX *pixd, PIX *pixs, PIX *pixm)
pixCombineMasked()
Definition: pix3.c:374
BOXA * pixConnComp(PIX *pixs, PIXA **ppixa, l_int32 connectivity)
pixConnComp()
Definition: conncomp.c:147
Definition: array.h:59
void selDestroy(SEL **psel)
selDestroy()
Definition: sel1.c:337
void l_pdfSetDateAndVersion(l_int32 flag)
l_pdfSetDateAndVersion()
Definition: pdfio2.c:2459
PIX * pixMorphSequence(PIX *pixs, const char *sequence, l_int32 dispsep)
pixMorphSequence()
Definition: morphseq.c:133
l_ok boxaWriteDebug(const char *filename, BOXA *boxa)
boxaWriteDebug()
Definition: boxbasic.c:2188
NUMA * pixRunHistogramMorph(PIX *pixs, l_int32 runtype, l_int32 direction, l_int32 maxsize)
pixRunHistogramMorph()
Definition: morphapp.c:1103
Definition: gplot.h:75
PIX * pixClone(PIX *pixs)
pixClone()
Definition: pix1.c:515
void pixDestroy(PIX **ppix)
pixDestroy()
Definition: pix1.c:543
PIX * pixMaskBoxa(PIX *pixd, PIX *pixs, BOXA *boxa, l_int32 op)
pixMaskBoxa()
Definition: boxfunc3.c:148
Definition: pix.h:454
void numaDestroy(NUMA **pna)
numaDestroy()
Definition: numabasic.c:360
PIX * pixSeedfillBinary(PIX *pixd, PIX *pixs, PIX *pixm, l_int32 connectivity)
pixSeedfillBinary()
Definition: seedfill.c:243
l_ok pixaConvertToPdf(PIXA *pixa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout)
pixaConvertToPdf()
Definition: pdfio1.c:752
l_ok pixRenderBoxaArb(PIX *pix, BOXA *boxa, l_int32 width, l_uint8 rval, l_uint8 gval, l_uint8 bval)
pixRenderBoxaArb()
Definition: graphics.c:1759
PIX * pixHMT(PIX *pixd, PIX *pixs, SEL *sel)
pixHMT()
Definition: morph.c:338
PIX * pixClose(PIX *pixd, PIX *pixs, SEL *sel)
pixClose()
Definition: morph.c:469
Definition: pix.h:134
PIX * pixCopy(PIX *pixd, PIX *pixs)
pixCopy()
Definition: pix1.c:628
void pixaDestroy(PIXA **ppixa)
pixaDestroy()
Definition: pixabasic.c:408
PIX * pixSetBlackOrWhiteBoxa(PIX *pixs, BOXA *boxa, l_int32 op)
pixSetBlackOrWhiteBoxa()
Definition: boxfunc3.c:283
SEL * selCreateFromString(const char *text, l_int32 h, l_int32 w, const char *name)
selCreateFromString()
Definition: sel1.c:1616