Leptonica  1.77.0
Image processing and image analysis suite
psio1.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
105 #include <string.h>
106 #include "allheaders.h"
107 
108 /* --------------------------------------------*/
109 #if USE_PSIO /* defined in environ.h */
110  /* --------------------------------------------*/
111 
112 /*-------------------------------------------------------------*
113  * Convert files in a directory to PS *
114  *-------------------------------------------------------------*/
115 /*
116  * \brief convertFilesToPS()
117  *
118  * \param[in] dirin input directory
119  * \param[in] substr [optional] substring filter on filenames; can be NULL
120  * \param[in] res typ. 300 or 600 ppi
121  * \param[in] fileout output ps file
122  * \return 0 if OK, 1 on error
123  *
124  * <pre>
125  * Notes:
126  * (1) This generates a PS file for all image files in a specified
127  * directory that contain the substr pattern to be matched.
128  * (2) Each image is written to a separate page in the output PS file.
129  * (3) All images are written compressed:
130  * * if tiffg4 --> use ccittg4
131  * * if jpeg --> use dct
132  * * all others --> use flate
133  * If the image is jpeg or tiffg4, we use the existing compressed
134  * strings for the encoding; otherwise, we read the image into
135  * a pix and flate-encode the pieces.
136  * (4) The resolution is often confusing. It is interpreted
137  * as the resolution of the output display device: "If the
138  * input image were digitized at 300 ppi, what would it
139  * look like when displayed at res ppi." So, for example,
140  * if res = 100 ppi, then the display pixels are 3x larger
141  * than the 300 ppi pixels, and the image will be rendered
142  * 3x larger.
143  * (5) The size of the PostScript file is independent of the resolution,
144  * because the entire file is encoded. The res parameter just
145  * tells the PS decomposer how to render the page. Therefore,
146  * for minimum file size without loss of visual information,
147  * if the output res is less than 300, you should downscale
148  * the image to the output resolution before wrapping in PS.
149  * (6) The "canvas" on which the image is rendered, at the given
150  * output resolution, is a standard page size (8.5 x 11 in).
151  * </pre>
152  */
153 l_ok
154 convertFilesToPS(const char *dirin,
155  const char *substr,
156  l_int32 res,
157  const char *fileout)
158 {
159 SARRAY *sa;
160 
161  PROCNAME("convertFilesToPS");
162 
163  if (!dirin)
164  return ERROR_INT("dirin not defined", procName, 1);
165  if (!fileout)
166  return ERROR_INT("fileout not defined", procName, 1);
167  if (res <= 0) {
168  L_INFO("setting res to 300 ppi\n", procName);
169  res = 300;
170  }
171  if (res < 10 || res > 4000)
172  L_WARNING("res is typically in the range 300-600 ppi\n", procName);
173 
174  /* Get all filtered and sorted full pathnames. */
175  sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
176 
177  /* Generate the PS file. Don't use bounding boxes. */
178  l_psWriteBoundingBox(FALSE);
179  sarrayConvertFilesToPS(sa, res, fileout);
180  l_psWriteBoundingBox(TRUE);
181  sarrayDestroy(&sa);
182  return 0;
183 }
184 
185 
186 /*
187 
188  * \brief sarrayConvertFilesToPS()
189  *
190  * \param[in] sarray of full path names
191  * \param[in] res typ. 300 or 600 ppi
192  * \param[in] fileout output ps file
193  * \return 0 if OK, 1 on error
194  *
195  * <pre>
196  * Notes:
197  * (1) See convertFilesToPS()
198  * </pre>
199  */
200 l_ok
201 sarrayConvertFilesToPS(SARRAY *sa,
202  l_int32 res,
203  const char *fileout)
204 {
205 char *fname;
206 l_int32 i, nfiles, index, firstfile, ret, format;
207 
208  PROCNAME("sarrayConvertFilesToPS");
209 
210  if (!sa)
211  return ERROR_INT("sa not defined", procName, 1);
212  if (!fileout)
213  return ERROR_INT("fileout not defined", procName, 1);
214  if (res <= 0) {
215  L_INFO("setting res to 300 ppi\n", procName);
216  res = 300;
217  }
218  if (res < 10 || res > 4000)
219  L_WARNING("res is typically in the range 300-600 ppi\n", procName);
220 
221  nfiles = sarrayGetCount(sa);
222  firstfile = TRUE;
223  for (i = 0, index = 0; i < nfiles; i++) {
224  fname = sarrayGetString(sa, i, L_NOCOPY);
225  ret = pixReadHeader(fname, &format, NULL, NULL, NULL, NULL, NULL);
226  if (ret) continue;
227  if (format == IFF_UNKNOWN)
228  continue;
229 
230  writeImageCompressedToPSFile(fname, fileout, res, &firstfile, &index);
231  }
232 
233  return 0;
234 }
235 
236 
237 /*
238  * \brief convertFilesFittedToPS()
239  *
240  * \param[in] dirin input directory
241  * \param[in] substr [optional] substring filter on filenames; can be NULL)
242  * \param[in] xpts desired size in printer points; use 0 for default
243  * \param[in] ypts desired size in printer points; use 0 for default
244  * \param[in] fileout output ps file
245  * \return 0 if OK, 1 on error
246  *
247  * <pre>
248  * Notes:
249  * (1) This generates a PS file for all files in a specified directory
250  * that contain the substr pattern to be matched.
251  * (2) Each image is written to a separate page in the output PS file.
252  * (3) All images are written compressed:
253  * * if tiffg4 --> use ccittg4
254  * * if jpeg --> use dct
255  * * all others --> use flate
256  * If the image is jpeg or tiffg4, we use the existing compressed
257  * strings for the encoding; otherwise, we read the image into
258  * a pix and flate-encode the pieces.
259  * (4) The resolution is internally determined such that the images
260  * are rendered, in at least one direction, at 100% of the given
261  * size in printer points. Use 0.0 for xpts or ypts to get
262  * the default value, which is 612.0 or 792.0, rsp.
263  * (5) The size of the PostScript file is independent of the resolution,
264  * because the entire file is encoded. The %xpts and %ypts
265  * parameter tells the PS decomposer how to render the page.
266  * </pre>
267  */
268 l_ok
269 convertFilesFittedToPS(const char *dirin,
270  const char *substr,
271  l_float32 xpts,
272  l_float32 ypts,
273  const char *fileout)
274 {
275 SARRAY *sa;
276 
277  PROCNAME("convertFilesFittedToPS");
278 
279  if (!dirin)
280  return ERROR_INT("dirin not defined", procName, 1);
281  if (!fileout)
282  return ERROR_INT("fileout not defined", procName, 1);
283  if (xpts <= 0.0) {
284  L_INFO("setting xpts to 612.0 ppi\n", procName);
285  xpts = 612.0;
286  }
287  if (ypts <= 0.0) {
288  L_INFO("setting ypts to 792.0 ppi\n", procName);
289  ypts = 792.0;
290  }
291  if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
292  L_WARNING("xpts,ypts are typically in the range 500-800\n", procName);
293 
294  /* Get all filtered and sorted full pathnames. */
295  sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
296 
297  /* Generate the PS file. Don't use bounding boxes. */
298  l_psWriteBoundingBox(FALSE);
299  sarrayConvertFilesFittedToPS(sa, xpts, ypts, fileout);
300  l_psWriteBoundingBox(TRUE);
301  sarrayDestroy(&sa);
302  return 0;
303 }
304 
305 
306 /*
307  * \brief sarrayConvertFilesFittedToPS()
308  *
309  * \param[in] sarray of full path names
310  * \param[in] xpts desired size in printer points; use 0 for default
311  * \param[in] ypts desired size in printer points; use 0 for default
312  * \param[in] fileout output ps file
313  * \return 0 if OK, 1 on error
314  *
315  * <pre>
316  * Notes:
317  * (1) See convertFilesFittedToPS()
318  * </pre>
319  */
320 l_ok
321 sarrayConvertFilesFittedToPS(SARRAY *sa,
322  l_float32 xpts,
323  l_float32 ypts,
324  const char *fileout)
325 {
326 char *fname;
327 l_int32 ret, i, w, h, nfiles, index, firstfile, format, res;
328 
329  PROCNAME("sarrayConvertFilesFittedToPS");
330 
331  if (!sa)
332  return ERROR_INT("sa not defined", procName, 1);
333  if (!fileout)
334  return ERROR_INT("fileout not defined", procName, 1);
335  if (xpts <= 0.0) {
336  L_INFO("setting xpts to 612.0\n", procName);
337  xpts = 612.0;
338  }
339  if (ypts <= 0.0) {
340  L_INFO("setting ypts to 792.0\n", procName);
341  ypts = 792.0;
342  }
343  if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
344  L_WARNING("xpts,ypts are typically in the range 500-800\n", procName);
345 
346  nfiles = sarrayGetCount(sa);
347  firstfile = TRUE;
348  for (i = 0, index = 0; i < nfiles; i++) {
349  fname = sarrayGetString(sa, i, L_NOCOPY);
350  ret = pixReadHeader(fname, &format, &w, &h, NULL, NULL, NULL);
351  if (ret) continue;
352  if (format == IFF_UNKNOWN)
353  continue;
354 
355  /* Be sure the entire image is wrapped */
356  if (xpts * h < ypts * w)
357  res = (l_int32)((l_float32)w * 72.0 / xpts);
358  else
359  res = (l_int32)((l_float32)h * 72.0 / ypts);
360 
361  writeImageCompressedToPSFile(fname, fileout, res, &firstfile, &index);
362  }
363 
364  return 0;
365 }
366 
367 
368 /*
369  * \brief writeImageCompressedToPSFile()
370  *
371  * \param[in] filein input image file
372  * \param[in] fileout output ps file
373  * \param[in] res output printer resolution
374  * \param[in,out] pfirstfile 1 if the first image; 0 otherwise
375  * \param[in,out] pindex index of image in output ps file
376  * \return 0 if OK, 1 on error
377  *
378  * <pre>
379  * Notes:
380  * (1) This wraps a single page image in PS.
381  * (2) The input file can be in any format. It is compressed as follows:
382  * * if in tiffg4 --> use ccittg4
383  * * if in jpeg --> use dct
384  * * all others --> use flate
385  * (3) Before the first call, set %firstpage = 1. After writing
386  * the first page, it will be set to 0.
387  * (4) %index is incremented if the page is successfully written.
388  * </pre>
389  */
390 l_ok
391 writeImageCompressedToPSFile(const char *filein,
392  const char *fileout,
393  l_int32 res,
394  l_int32 *pfirstfile,
395  l_int32 *pindex)
396 {
397 const char *op;
398 l_int32 format, retval;
399 
400  PROCNAME("writeImageCompressedToPSFile");
401 
402  if (!pfirstfile || !pindex)
403  return ERROR_INT("&firstfile and &index not defined", procName, 1);
404 
405  findFileFormat(filein, &format);
406  if (format == IFF_UNKNOWN) {
407  L_ERROR("format of %s not known\n", procName, filein);
408  return 1;
409  }
410 
411  op = (*pfirstfile == TRUE) ? "w" : "a";
412  if (format == IFF_JFIF_JPEG) {
413  retval = convertJpegToPS(filein, fileout, op, 0, 0,
414  res, 1.0, *pindex + 1, TRUE);
415  if (retval == 0) {
416  *pfirstfile = FALSE;
417  (*pindex)++;
418  }
419  } else if (format == IFF_TIFF_G4) {
420  retval = convertG4ToPS(filein, fileout, op, 0, 0,
421  res, 1.0, *pindex + 1, FALSE, TRUE);
422  if (retval == 0) {
423  *pfirstfile = FALSE;
424  (*pindex)++;
425  }
426  } else { /* all other image formats */
427  retval = convertFlateToPS(filein, fileout, op, 0, 0,
428  res, 1.0, *pindex + 1, TRUE);
429  if (retval == 0) {
430  *pfirstfile = FALSE;
431  (*pindex)++;
432  }
433  }
434 
435  return retval;
436 }
437 
438 
439 /*-------------------------------------------------------------*
440  * Convert mixed text/image files to PS *
441  *-------------------------------------------------------------*/
442 /*
443  * \brief convertSegmentedPagesToPS()
444  *
445  * \param[in] pagedir input page image directory
446  * \param[in] pagestr [optional] substring filter on page filenames;
447  * can be NULL
448  * \param[in] page_numpre number of characters in page name before number
449  * \param[in] maskdir input mask image directory
450  * \param[in] maskstr [optional] substring filter on mask filenames;
451  * can be NULL
452  * \param[in] mask_numpre number of characters in mask name before number
453  * \param[in] numpost number of characters in names after number
454  * \param[in] maxnum only consider page numbers up to this value
455  * \param[in] textscale scale of text output relative to pixs
456  * \param[in] imagescale scale of image output relative to pixs
457  * \param[in] threshold for binarization; typ. about 190; 0 for default
458  * \param[in] fileout output ps file
459  * \return 0 if OK, 1 on error
460  *
461  * <pre>
462  * Notes:
463  * (1) This generates a PS file for all page image and mask files in two
464  * specified directories and that contain the page numbers as
465  * specified below. The two directories can be the same, in which
466  * case the page and mask files are differentiated by the two
467  * substrings for string matches.
468  * (2) The page images are taken in lexicographic order.
469  * Mask images whose numbers match the page images are used to
470  * segment the page images. Page images without a matching
471  * mask image are scaled, thresholded and rendered entirely as text.
472  * (3) Each PS page is generated as a compressed representation of
473  * the page image, where the part of the image under the mask
474  * is suitably scaled and compressed as DCT (i.e., jpeg), and
475  * the remaining part of the page is suitably scaled, thresholded,
476  * compressed as G4 (i.e., tiff g4), and rendered by painting
477  * black through the resulting text mask.
478  * (4) The scaling is typically 2x down for the DCT component
479  * (%imagescale = 0.5) and 2x up for the G4 component
480  * (%textscale = 2.0).
481  * (5) The resolution is automatically set to fit to a
482  * letter-size (8.5 x 11 inch) page.
483  * (6) Both the DCT and the G4 encoding are PostScript level 2.
484  * (7) It is assumed that the page number is contained within
485  * the basename (the filename without directory or extension).
486  * %page_numpre is the number of characters in the page basename
487  * preceding the actual page number; %mask_numpre is likewise for
488  * the mask basename; %numpost is the number of characters
489  * following the page number. For example, for mask name
490  * mask_006.tif, mask_numpre = 5 ("mask_).
491  * (8) To render a page as is -- that is, with no thresholding
492  * of any pixels -- use a mask in the mask directory that is
493  * full size with all pixels set to 1. If the page is 1 bpp,
494  * it is not necessary to have a mask.
495  * </pre>
496  */
497 l_ok
498 convertSegmentedPagesToPS(const char *pagedir,
499  const char *pagestr,
500  l_int32 page_numpre,
501  const char *maskdir,
502  const char *maskstr,
503  l_int32 mask_numpre,
504  l_int32 numpost,
505  l_int32 maxnum,
506  l_float32 textscale,
507  l_float32 imagescale,
508  l_int32 threshold,
509  const char *fileout)
510 {
511 l_int32 pageno, i, npages;
512 PIX *pixs, *pixm;
513 SARRAY *sapage, *samask;
514 
515  PROCNAME("convertSegmentedPagesToPS");
516 
517  if (!pagedir)
518  return ERROR_INT("pagedir not defined", procName, 1);
519  if (!maskdir)
520  return ERROR_INT("maskdir not defined", procName, 1);
521  if (!fileout)
522  return ERROR_INT("fileout not defined", procName, 1);
523  if (threshold <= 0) {
524  L_INFO("setting threshold to 190\n", procName);
525  threshold = 190;
526  }
527 
528  /* Get numbered full pathnames; max size of sarray is maxnum */
529  sapage = getNumberedPathnamesInDirectory(pagedir, pagestr,
530  page_numpre, numpost, maxnum);
531  samask = getNumberedPathnamesInDirectory(maskdir, maskstr,
532  mask_numpre, numpost, maxnum);
533  sarrayPadToSameSize(sapage, samask, "");
534  if ((npages = sarrayGetCount(sapage)) == 0) {
535  sarrayDestroy(&sapage);
536  sarrayDestroy(&samask);
537  return ERROR_INT("no matching pages found", procName, 1);
538  }
539 
540  /* Generate the PS file */
541  pageno = 1;
542  for (i = 0; i < npages; i++) {
543  if ((pixs = pixReadIndexed(sapage, i)) == NULL)
544  continue;
545  pixm = pixReadIndexed(samask, i);
546  pixWriteSegmentedPageToPS(pixs, pixm, textscale, imagescale,
547  threshold, pageno, fileout);
548  pixDestroy(&pixs);
549  pixDestroy(&pixm);
550  pageno++;
551  }
552 
553  sarrayDestroy(&sapage);
554  sarrayDestroy(&samask);
555  return 0;
556 }
557 
558 
559 /*
560  * \brief pixWriteSegmentedPageToPS()
561  *
562  * \param[in] pixs all depths; colormap ok
563  * \param[in] pixm [optional] 1 bpp segmentation mask over image region
564  * \param[in] textscale scale of text output relative to pixs
565  * \param[in] imagescale scale of image output relative to pixs
566  * \param[in] threshold for binarization; typ. about 190; 0 for default
567  * \param[in] pageno page number in set; use 1 for new output file
568  * \param[in] fileout output ps file
569  * \return 0 if OK, 1 on error
570  *
571  * <pre>
572  * Notes:
573  * (1) This generates the PS string for a mixed text/image page,
574  * and adds it to an existing file if %pageno > 1.
575  * The PS output is determined by fitting the result to
576  * a letter-size (8.5 x 11 inch) page.
577  * (2) The two images (pixs and pixm) are at the same resolution
578  * (typically 300 ppi). They are used to generate two compressed
579  * images, pixb and pixc, that are put directly into the output
580  * PS file.
581  * (3) pixb is the text component. In the PostScript world, we think of
582  * it as a mask through which we paint black. It is produced by
583  * scaling pixs by %textscale, and thresholding to 1 bpp.
584  * (4) pixc is the image component, which is that part of pixs under
585  * the mask pixm. It is scaled from pixs by %imagescale.
586  * (5) Typical values are textscale = 2.0 and imagescale = 0.5.
587  * (6) If pixm == NULL, the page has only text. If it is all black,
588  * the page is all image and has no text.
589  * (7) This can be used to write a multi-page PS file, by using
590  * sequential page numbers with the same output file. It can
591  * also be used to write separate PS files for each page,
592  * by using different output files with %pageno = 0 or 1.
593  * </pre>
594  */
595 l_ok
596 pixWriteSegmentedPageToPS(PIX *pixs,
597  PIX *pixm,
598  l_float32 textscale,
599  l_float32 imagescale,
600  l_int32 threshold,
601  l_int32 pageno,
602  const char *fileout)
603 {
604 l_int32 alltext, notext, d, ret;
605 l_uint32 val;
606 l_float32 scaleratio;
607 PIX *pixmi, *pixmis, *pixt, *pixg, *pixsc, *pixb, *pixc;
608 
609  PROCNAME("pixWriteSegmentedPageToPS");
610 
611  if (!pixs)
612  return ERROR_INT("pixs not defined", procName, 1);
613  if (!fileout)
614  return ERROR_INT("fileout not defined", procName, 1);
615  if (imagescale <= 0.0 || textscale <= 0.0)
616  return ERROR_INT("relative scales must be > 0.0", procName, 1);
617 
618  /* Analyze the page. Determine the ratio by which the
619  * binary text mask is scaled relative to the image part.
620  * If there is no image region (alltext == TRUE), the
621  * text mask will be rendered directly to fit the page,
622  * and scaleratio = 1.0. */
623  alltext = TRUE;
624  notext = FALSE;
625  scaleratio = 1.0;
626  if (pixm) {
627  pixZero(pixm, &alltext); /* pixm empty: all text */
628  if (alltext) {
629  pixm = NULL; /* treat it as not existing here */
630  } else {
631  pixmi = pixInvert(NULL, pixm);
632  pixZero(pixmi, &notext); /* pixm full; no text */
633  pixDestroy(&pixmi);
634  scaleratio = textscale / imagescale;
635  }
636  }
637 
638  if (pixGetDepth(pixs) == 1) { /* render tiff g4 */
639  pixb = pixClone(pixs);
640  pixc = NULL;
641  } else {
642  pixt = pixConvertTo8Or32(pixs, L_CLONE, 0); /* clone if possible */
643 
644  /* Get the binary text mask. Note that pixg cannot be a
645  * clone of pixs, because it may be altered by pixSetMasked(). */
646  pixb = NULL;
647  if (notext == FALSE) {
648  d = pixGetDepth(pixt);
649  if (d == 8)
650  pixg = pixCopy(NULL, pixt);
651  else /* d == 32 */
652  pixg = pixConvertRGBToLuminance(pixt);
653  if (pixm) /* clear out the image parts */
654  pixSetMasked(pixg, pixm, 255);
655  if (textscale == 1.0)
656  pixsc = pixClone(pixg);
657  else if (textscale >= 0.7)
658  pixsc = pixScaleGrayLI(pixg, textscale, textscale);
659  else
660  pixsc = pixScaleAreaMap(pixg, textscale, textscale);
661  pixb = pixThresholdToBinary(pixsc, threshold);
662  pixDestroy(&pixg);
663  pixDestroy(&pixsc);
664  }
665 
666  /* Get the scaled image region */
667  pixc = NULL;
668  if (pixm) {
669  if (imagescale == 1.0)
670  pixsc = pixClone(pixt); /* can possibly be a clone of pixs */
671  else
672  pixsc = pixScale(pixt, imagescale, imagescale);
673 
674  /* If pixm is not full, clear the pixels in pixsc
675  * corresponding to bg in pixm, where there can be text
676  * that is written through the mask pixb. Note that
677  * we could skip this and use pixsc directly in
678  * pixWriteMixedToPS(); however, clearing these
679  * non-image regions to a white background will reduce
680  * the size of pixc (relative to pixsc), and hence
681  * reduce the size of the PS file that is generated.
682  * Use a copy so that we don't accidentally alter pixs. */
683  if (notext == FALSE) {
684  pixmis = pixScale(pixm, imagescale, imagescale);
685  pixmi = pixInvert(NULL, pixmis);
686  val = (d == 8) ? 0xff : 0xffffff00;
687  pixc = pixCopy(NULL, pixsc);
688  pixSetMasked(pixc, pixmi, val); /* clear non-image part */
689  pixDestroy(&pixmis);
690  pixDestroy(&pixmi);
691  } else {
692  pixc = pixClone(pixsc);
693  }
694  pixDestroy(&pixsc);
695  }
696  pixDestroy(&pixt);
697  }
698 
699  /* Generate the PS file. Don't use bounding boxes. */
700  l_psWriteBoundingBox(FALSE);
701  ret = pixWriteMixedToPS(pixb, pixc, scaleratio, pageno, fileout);
702  l_psWriteBoundingBox(TRUE);
703  pixDestroy(&pixb);
704  pixDestroy(&pixc);
705  return ret;
706 }
707 
708 
709 /*
710  * \brief pixWriteMixedToPS()
711  *
712  * \param[in] pixb [optional] 1 bpp mask; typically for text
713  * \param[in] pixc [optional] 8 or 32 bpp image regions
714  * \param[in] scale scale factor for rendering pixb, relative to pixc;
715  * typ. 4.0
716  * \param[in] pageno page number in set; use 1 for new output file
717  * \param[in] fileout output ps file
718  * \return 0 if OK, 1 on error
719  *
720  * <pre>
721  * Notes:
722  * (1) This low level function generates the PS string for a mixed
723  * text/image page, and adds it to an existing file if
724  * %pageno > 1.
725  * (2) The two images (pixb and pixc) are typically generated at the
726  * resolution that they will be rendered in the PS file.
727  * (3) pixb is the text component. In the PostScript world, we think of
728  * it as a mask through which we paint black.
729  * (4) pixc is the (typically halftone) image component. It is
730  * white in the rest of the page. To minimize the size of the
731  * PS file, it should be rendered at a resolution that is at
732  * least equal to its actual resolution.
733  * (5) %scale gives the ratio of resolution of pixb to pixc.
734  * Typical resolutions are: 600 ppi for pixb, 150 ppi for pixc;
735  * so %scale = 4.0. If one of the images is not defined,
736  * the value of %scale is ignored.
737  * (6) We write pixc with DCT compression (jpeg). This is followed
738  * by painting the text as black through the mask pixb. If
739  * pixc doesn't exist (alltext), we write the text with the
740  * PS "image" operator instead of the "imagemask" operator,
741  * because ghostscript's ps2pdf is flaky when the latter is used.
742  * (7) The actual output resolution is determined by fitting the
743  * result to a letter-size (8.5 x 11 inch) page.
744  * <pre>
745  */
746 l_ok
747 pixWriteMixedToPS(PIX *pixb,
748  PIX *pixc,
749  l_float32 scale,
750  l_int32 pageno,
751  const char *fileout)
752 {
753 char *tname;
754 const char *op;
755 l_int32 resb, resc, endpage, maskop, ret;
756 
757  PROCNAME("pixWriteMixedToPS");
758 
759  if (!pixb && !pixc)
760  return ERROR_INT("pixb and pixc both undefined", procName, 1);
761  if (!fileout)
762  return ERROR_INT("fileout not defined", procName, 1);
763 
764  /* Compute the resolution that fills a letter-size page. */
765  if (!pixc) {
766  resb = getResLetterPage(pixGetWidth(pixb), pixGetHeight(pixb), 0);
767  } else {
768  resc = getResLetterPage(pixGetWidth(pixc), pixGetHeight(pixc), 0);
769  if (pixb)
770  resb = (l_int32)(scale * resc);
771  }
772 
773  /* Write the jpeg image first */
774  if (pixc) {
775  tname = l_makeTempFilename();
776  pixWrite(tname, pixc, IFF_JFIF_JPEG);
777  endpage = (pixb) ? FALSE : TRUE;
778  op = (pageno <= 1) ? "w" : "a";
779  ret = convertJpegToPS(tname, fileout, op, 0, 0, resc, 1.0,
780  pageno, endpage);
781  lept_rmfile(tname);
782  LEPT_FREE(tname);
783  if (ret)
784  return ERROR_INT("jpeg data not written", procName, 1);
785  }
786 
787  /* Write the binary data, either directly or, if there is
788  * a jpeg image on the page, through the mask. */
789  if (pixb) {
790  tname = l_makeTempFilename();
791  pixWrite(tname, pixb, IFF_TIFF_G4);
792  op = (pageno <= 1 && !pixc) ? "w" : "a";
793  maskop = (pixc) ? 1 : 0;
794  ret = convertG4ToPS(tname, fileout, op, 0, 0, resb, 1.0,
795  pageno, maskop, 1);
796  lept_rmfile(tname);
797  LEPT_FREE(tname);
798  if (ret)
799  return ERROR_INT("tiff data not written", procName, 1);
800  }
801 
802  return 0;
803 }
804 
805 
806 /*-------------------------------------------------------------*
807  * Convert any image file to PS for embedding *
808  *-------------------------------------------------------------*/
809 /*
810  * \brief convertToPSEmbed()
811  *
812  * \param[in] filein input image file, any format
813  * \param[in] fileout output ps file
814  * \param[in] level PostScript compression: 1 (uncompressed), 2 or 3
815  * \return 0 if OK, 1 on error
816  *
817  * <pre>
818  * Notes:
819  * (1) This is a wrapper function that generates a PS file with
820  * a bounding box, from any input image file.
821  * (2) Do the best job of compression given the specified level.
822  * %level=3 does flate compression on anything that is not
823  * tiffg4 (1 bpp) or jpeg (8 bpp or rgb).
824  * (3) If %level=2 and the file is not tiffg4 or jpeg, it will
825  * first be written to file as jpeg with quality = 75.
826  * This will remove the colormap and cause some degradation
827  * in the image.
828  * (4) The bounding box is required when a program such as TeX
829  * (through epsf) places and rescales the image. It is
830  * sized for fitting the image to an 8.5 x 11.0 inch page.
831  * </pre>
832  */
833 l_ok
834 convertToPSEmbed(const char *filein,
835  const char *fileout,
836  l_int32 level)
837 {
838 char *tname;
839 l_int32 d, format;
840 PIX *pix, *pixs;
841 
842  PROCNAME("convertToPSEmbed");
843 
844  if (!filein)
845  return ERROR_INT("filein not defined", procName, 1);
846  if (!fileout)
847  return ERROR_INT("fileout not defined", procName, 1);
848  if (level != 1 && level != 2 && level != 3) {
849  L_ERROR("invalid level specified; using level 2\n", procName);
850  level = 2;
851  }
852 
853  if (level == 1) { /* no compression */
854  pixWritePSEmbed(filein, fileout);
855  return 0;
856  }
857 
858  /* Find the format and write out directly if in jpeg or tiff g4 */
859  findFileFormat(filein, &format);
860  if (format == IFF_JFIF_JPEG) {
861  convertJpegToPSEmbed(filein, fileout);
862  return 0;
863  } else if (format == IFF_TIFF_G4) {
864  convertG4ToPSEmbed(filein, fileout);
865  return 0;
866  } else if (format == IFF_UNKNOWN) {
867  L_ERROR("format of %s not known\n", procName, filein);
868  return 1;
869  }
870 
871  /* If level 3, flate encode. */
872  if (level == 3) {
873  convertFlateToPSEmbed(filein, fileout);
874  return 0;
875  }
876 
877  /* OK, it's level 2, so we must convert to jpeg or tiff g4 */
878  if ((pixs = pixRead(filein)) == NULL)
879  return ERROR_INT("image not read from file", procName, 1);
880  d = pixGetDepth(pixs);
881  if ((d == 2 || d == 4) && !pixGetColormap(pixs))
882  pix = pixConvertTo8(pixs, 0);
883  else if (d == 16)
884  pix = pixConvert16To8(pixs, 1);
885  else
887 
888  d = pixGetDepth(pix);
889  tname = l_makeTempFilename();
890  if (d == 1) {
891  pixWrite(tname, pix, IFF_TIFF_G4);
892  convertG4ToPSEmbed(tname, fileout);
893  } else {
894  pixWrite(tname, pix, IFF_JFIF_JPEG);
895  convertJpegToPSEmbed(tname, fileout);
896  }
897 
898  lept_rmfile(tname);
899  LEPT_FREE(tname);
900  pixDestroy(&pix);
901  pixDestroy(&pixs);
902  return 0;
903 }
904 
905 
906 /*-------------------------------------------------------------*
907  * Write all images in a pixa out to PS *
908  *-------------------------------------------------------------*/
909 /*
910  * \brief pixaWriteCompressedToPS()
911  *
912  * \param[in] pixa any set of images
913  * \param[in] fileout output ps file
914  * \param[in] res of input image
915  * \param[in] level PostScript compression: 1 (uncompressed), 2 or 3
916  * \return 0 if OK, 1 on error
917  *
918  * <pre>
919  * Notes:
920  * (1) This generates a PS file of multiple page images, all
921  * with bounding boxes.
922  * (2) It compresses to:
923  * cmap + level2: jpeg
924  * cmap + level3: flate
925  * 1 bpp: tiffg4
926  * 2 or 4 bpp + level2: jpeg
927  * 2 or 4 bpp + level3: flate
928  * 8 bpp: jpeg
929  * 16 bpp: flate
930  * 32 bpp: jpeg
931  * (3) To generate a pdf, use: ps2pdf <infile.ps> <outfile.pdf>
932  * </pre>
933  */
934 l_ok
935 pixaWriteCompressedToPS(PIXA *pixa,
936  const char *fileout,
937  l_int32 res,
938  l_int32 level)
939 {
940 char *tname;
941 l_int32 i, n, firstfile, index, writeout, d;
942 PIX *pix, *pixt;
943 PIXCMAP *cmap;
944 
945  PROCNAME("pixaWriteCompressedToPS");
946 
947  if (!pixa)
948  return ERROR_INT("pixa not defined", procName, 1);
949  if (!fileout)
950  return ERROR_INT("fileout not defined", procName, 1);
951  if (level != 2 && level != 3) {
952  L_ERROR("only levels 2 and 3 permitted; using level 2\n", procName);
953  level = 2;
954  }
955 
956  n = pixaGetCount(pixa);
957  firstfile = TRUE;
958  index = 0;
959  tname = l_makeTempFilename();
960  for (i = 0; i < n; i++) {
961  writeout = TRUE;
962  pix = pixaGetPix(pixa, i, L_CLONE);
963  d = pixGetDepth(pix);
964  cmap = pixGetColormap(pix);
965  if (d == 1) {
966  pixWrite(tname, pix, IFF_TIFF_G4);
967  } else if (cmap) {
968  if (level == 2) {
969  pixt = pixConvertForPSWrap(pix);
970  pixWrite(tname, pixt, IFF_JFIF_JPEG);
971  pixDestroy(&pixt);
972  } else { /* level == 3 */
973  pixWrite(tname, pix, IFF_PNG);
974  }
975  } else if (d == 16) {
976  if (level == 2)
977  L_WARNING("d = 16; must write out flate\n", procName);
978  pixWrite(tname, pix, IFF_PNG);
979  } else if (d == 2 || d == 4) {
980  if (level == 2) {
981  pixt = pixConvertTo8(pix, 0);
982  pixWrite(tname, pixt, IFF_JFIF_JPEG);
983  pixDestroy(&pixt);
984  } else { /* level == 3 */
985  pixWrite(tname, pix, IFF_PNG);
986  }
987  } else if (d == 8 || d == 32) {
988  pixWrite(tname, pix, IFF_JFIF_JPEG);
989  } else { /* shouldn't happen */
990  L_ERROR("invalid depth: %d\n", procName, d);
991  writeout = FALSE;
992  }
993  pixDestroy(&pix);
994 
995  if (writeout)
996  writeImageCompressedToPSFile(tname, fileout, res,
997  &firstfile, &index);
998  }
999 
1000  lept_rmfile(tname);
1001  LEPT_FREE(tname);
1002  return 0;
1003 }
1004 
1005 
1006 /* --------------------------------------------*/
1007 #endif /* USE_PSIO */
1008 /* --------------------------------------------*/
PIX * pixConvertRGBToLuminance(PIX *pixs)
pixConvertRGBToLuminance()
Definition: pixconv.c:733
PIX * pixRemoveColormap(PIX *pixs, l_int32 type)
pixRemoveColormap()
Definition: pixconv.c:322
l_ok pixSetMasked(PIX *pixd, PIX *pixm, l_uint32 val)
pixSetMasked()
Definition: pix3.c:155
Definition: pix.h:716
PIX * pixReadIndexed(SARRAY *sa, l_int32 index)
pixReadIndexed()
Definition: readfile.c:277
PIX * pixConvertTo8(PIX *pixs, l_int32 cmapflag)
pixConvertTo8()
Definition: pixconv.c:3041
l_ok convertJpegToPSEmbed(const char *filein, const char *fileout)
convertJpegToPSEmbed()
Definition: psio2.c:643
PIX * pixScaleAreaMap(PIX *pix, l_float32 scalex, l_float32 scaley)
pixScaleAreaMap()
Definition: scale1.c:1912
l_ok convertFlateToPSEmbed(const char *filein, const char *fileout)
convertFlateToPSEmbed()
Definition: psio2.c:1518
PIX * pixInvert(PIX *pixd, PIX *pixs)
pixInvert()
Definition: pix3.c:1395
l_ok pixWritePSEmbed(const char *filein, const char *fileout)
pixWritePSEmbed()
Definition: psio2.c:152
PIX * pixThresholdToBinary(PIX *pixs, l_int32 thresh)
pixThresholdToBinary()
Definition: grayquant.c:443
l_ok sarrayPadToSameSize(SARRAY *sa1, SARRAY *sa2, const char *padstring)
sarrayPadToSameSize()
Definition: sarray1.c:972
SARRAY * getNumberedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost, l_int32 maxnum)
getNumberedPathnamesInDirectory()
Definition: sarray1.c:1669
Definition: array.h:116
l_ok convertG4ToPSEmbed(const char *filein, const char *fileout)
convertG4ToPSEmbed()
Definition: psio2.c:1039
l_ok convertJpegToPS(const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 endpage)
convertJpegToPS()
Definition: psio2.c:758
l_ok findFileFormat(const char *filename, l_int32 *pformat)
findFileFormat()
Definition: readfile.c:580
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
Definition: sarray1.c:681
l_int32 getResLetterPage(l_int32 w, l_int32 h, l_float32 fillfract)
getResLetterPage()
Definition: psio2.c:1957
PIX * pixClone(PIX *pixs)
pixClone()
Definition: pix1.c:515
l_ok convertG4ToPS(const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 maskflag, l_int32 endpage)
convertG4ToPS()
Definition: psio2.c:1145
void pixDestroy(PIX **ppix)
pixDestroy()
Definition: pix1.c:543
SARRAY * getSortedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 first, l_int32 nfiles)
getSortedPathnamesInDirectory()
Definition: sarray1.c:1717
Definition: pix.h:454
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
Definition: sarray1.c:621
PIX * pixScaleGrayLI(PIX *pixs, l_float32 scalex, l_float32 scaley)
pixScaleGrayLI()
Definition: scale1.c:778
PIX * pixRead(const char *filename)
pixRead()
Definition: readfile.c:190
PIX * pixConvertForPSWrap(PIX *pixs)
pixConvertForPSWrap()
Definition: pixconv.c:3832
char * l_makeTempFilename()
l_makeTempFilename()
Definition: utils2.c:3086
PIX * pixaGetPix(PIXA *pixa, l_int32 index, l_int32 accesstype)
pixaGetPix()
Definition: pixabasic.c:672
Definition: pix.h:134
Definition: pix.h:719
l_ok pixZero(PIX *pix, l_int32 *pempty)
pixZero()
Definition: pix3.c:1701
PIX * pixCopy(PIX *pixd, PIX *pixs)
pixCopy()
Definition: pix1.c:628
l_ok convertFlateToPS(const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 endpage)
convertFlateToPS()
Definition: psio2.c:1631
PIX * pixConvert16To8(PIX *pixs, l_int32 type)
pixConvert16To8()
Definition: pixconv.c:1689
PIX * pixScale(PIX *pixs, l_float32 scalex, l_float32 scaley)
pixScale()
Definition: scale1.c:244
l_int32 pixaGetCount(PIXA *pixa)
pixaGetCount()
Definition: pixabasic.c:631
PIX * pixConvertTo8Or32(PIX *pixs, l_int32 copyflag, l_int32 warnflag)
pixConvertTo8Or32()
Definition: pixconv.c:3393
l_ok pixReadHeader(const char *filename, l_int32 *pformat, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap)
pixReadHeader()
Definition: readfile.c:442
l_int32 lept_rmfile(const char *filepath)
lept_rmfile()
Definition: utils2.c:2243
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
Definition: sarray1.c:355