Leptonica  1.77.0
Image processing and image analysis suite
pdfio1.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
192 #include <string.h>
193 #include <math.h>
194 #include "allheaders.h"
195 
196 /* --------------------------------------------*/
197 #if USE_PDFIO /* defined in environ.h */
198  /* --------------------------------------------*/
199 
200  /* Typical scan resolution in ppi (pixels/inch) */
201 static const l_int32 DEFAULT_INPUT_RES = 300;
202 
203 
204 /*---------------------------------------------------------------------*
205  * Convert specified image files to pdf (one image file per page) *
206  *---------------------------------------------------------------------*/
238 l_ok
239 convertFilesToPdf(const char *dirname,
240  const char *substr,
241  l_int32 res,
242  l_float32 scalefactor,
243  l_int32 type,
244  l_int32 quality,
245  const char *title,
246  const char *fileout)
247 {
248 l_int32 ret;
249 SARRAY *sa;
250 
251  PROCNAME("convertFilesToPdf");
252 
253  if (!dirname)
254  return ERROR_INT("dirname not defined", procName, 1);
255  if (!fileout)
256  return ERROR_INT("fileout not defined", procName, 1);
257 
258  if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
259  return ERROR_INT("sa not made", procName, 1);
260  ret = saConvertFilesToPdf(sa, res, scalefactor, type, quality,
261  title, fileout);
262  sarrayDestroy(&sa);
263  return ret;
264 }
265 
266 
286 l_ok
288  l_int32 res,
289  l_float32 scalefactor,
290  l_int32 type,
291  l_int32 quality,
292  const char *title,
293  const char *fileout)
294 {
295 l_uint8 *data;
296 l_int32 ret;
297 size_t nbytes;
298 
299  PROCNAME("saConvertFilesToPdf");
300 
301  if (!sa)
302  return ERROR_INT("sa not defined", procName, 1);
303 
304  ret = saConvertFilesToPdfData(sa, res, scalefactor, type, quality,
305  title, &data, &nbytes);
306  if (ret) {
307  if (data) LEPT_FREE(data);
308  return ERROR_INT("pdf data not made", procName, 1);
309  }
310 
311  ret = l_binaryWrite(fileout, "w", data, nbytes);
312  LEPT_FREE(data);
313  if (ret)
314  L_ERROR("pdf data not written to file\n", procName);
315  return ret;
316 }
317 
318 
339 l_ok
341  l_int32 res,
342  l_float32 scalefactor,
343  l_int32 type,
344  l_int32 quality,
345  const char *title,
346  l_uint8 **pdata,
347  size_t *pnbytes)
348 {
349 char *fname;
350 const char *pdftitle;
351 l_uint8 *imdata;
352 l_int32 i, n, ret, pagetype, npages, scaledres;
353 size_t imbytes;
354 L_BYTEA *ba;
355 PIX *pixs, *pix;
356 L_PTRA *pa_data;
357 
358  PROCNAME("saConvertFilesToPdfData");
359 
360  if (!pdata)
361  return ERROR_INT("&data not defined", procName, 1);
362  *pdata = NULL;
363  if (!pnbytes)
364  return ERROR_INT("&nbytes not defined", procName, 1);
365  *pnbytes = 0;
366  if (!sa)
367  return ERROR_INT("sa not defined", procName, 1);
368  if (scalefactor <= 0.0) scalefactor = 1.0;
369  if (type < 0 || type > L_FLATE_ENCODE) {
370  L_WARNING("invalid compression type; using per-page default\n",
371  procName);
372  type = 0;
373  }
374 
375  /* Generate all the encoded pdf strings */
376  n = sarrayGetCount(sa);
377  pa_data = ptraCreate(n);
378  pdftitle = NULL;
379  for (i = 0; i < n; i++) {
380  if (i && (i % 10 == 0)) fprintf(stderr, ".. %d ", i);
381  fname = sarrayGetString(sa, i, L_NOCOPY);
382  if ((pixs = pixRead(fname)) == NULL) {
383  L_ERROR("image not readable from file %s\n", procName, fname);
384  continue;
385  }
386  if (!pdftitle)
387  pdftitle = (title) ? title : fname;
388  if (scalefactor != 1.0)
389  pix = pixScale(pixs, scalefactor, scalefactor);
390  else
391  pix = pixClone(pixs);
392  pixDestroy(&pixs);
393  scaledres = (l_int32)(res * scalefactor);
394  if (type != 0) {
395  pagetype = type;
396  } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) {
397  pixDestroy(&pix);
398  L_ERROR("encoding type selection failed for file %s\n",
399  procName, fname);
400  continue;
401  }
402  ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes,
403  0, 0, scaledres, pdftitle, NULL, 0);
404  pixDestroy(&pix);
405  if (ret) {
406  LEPT_FREE(imdata);
407  L_ERROR("pdf encoding failed for %s\n", procName, fname);
408  continue;
409  }
410  ba = l_byteaInitFromMem(imdata, imbytes);
411  LEPT_FREE(imdata);
412  ptraAdd(pa_data, ba);
413  }
414  ptraGetActualCount(pa_data, &npages);
415  if (npages == 0) {
416  L_ERROR("no pdf files made\n", procName);
417  ptraDestroy(&pa_data, FALSE, FALSE);
418  return 1;
419  }
420 
421  /* Concatenate them */
422  fprintf(stderr, "\nconcatenating ... ");
423  ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes);
424  fprintf(stderr, "done\n");
425 
426  ptraGetActualCount(pa_data, &npages); /* recalculate in case it changes */
427  for (i = 0; i < npages; i++) {
428  ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
429  l_byteaDestroy(&ba);
430  }
431  ptraDestroy(&pa_data, FALSE, FALSE);
432  return ret;
433 }
434 
435 
456 l_ok
458  l_int32 *ptype)
459 {
460 l_int32 w, h, d, factor, ncolors;
461 PIXCMAP *cmap;
462 
463  PROCNAME("selectDefaultPdfEncoding");
464 
465  if (!pix)
466  return ERROR_INT("pix not defined", procName, 1);
467  if (!ptype)
468  return ERROR_INT("&type not defined", procName, 1);
469  *ptype = L_FLATE_ENCODE; /* default universal encoding */
470  pixGetDimensions(pix, &w, &h, &d);
471  cmap = pixGetColormap(pix);
472  if (d == 8 && !cmap) {
473  factor = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 20000.));
474  pixNumColors(pix, factor, &ncolors);
475  if (ncolors < 20)
476  *ptype = L_FLATE_ENCODE;
477  else
478  *ptype = L_JPEG_ENCODE;
479  } else if (d == 1) {
480  *ptype = L_G4_ENCODE;
481  } else if (cmap || d == 2 || d == 4) {
482  *ptype = L_FLATE_ENCODE;
483  } else if (d == 8 || d == 32) {
484  *ptype = L_JPEG_ENCODE;
485  } else {
486  return ERROR_INT("type selection failure", procName, 1);
487  }
488 
489  return 0;
490 }
491 
492 
493 /*---------------------------------------------------------------------*
494  * Convert specified image files to pdf without scaling *
495  *---------------------------------------------------------------------*/
519 l_ok
520 convertUnscaledFilesToPdf(const char *dirname,
521  const char *substr,
522  const char *title,
523  const char *fileout)
524 {
525 l_int32 ret;
526 SARRAY *sa;
527 
528  PROCNAME("convertUnscaledFilesToPdf");
529 
530  if (!dirname)
531  return ERROR_INT("dirname not defined", procName, 1);
532  if (!fileout)
533  return ERROR_INT("fileout not defined", procName, 1);
534 
535  if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
536  return ERROR_INT("sa not made", procName, 1);
537  ret = saConvertUnscaledFilesToPdf(sa, title, fileout);
538  sarrayDestroy(&sa);
539  return ret;
540 }
541 
542 
557 l_ok
559  const char *title,
560  const char *fileout)
561 {
562 l_uint8 *data;
563 l_int32 ret;
564 size_t nbytes;
565 
566  PROCNAME("saConvertUnscaledFilesToPdf");
567 
568  if (!sa)
569  return ERROR_INT("sa not defined", procName, 1);
570 
571  ret = saConvertUnscaledFilesToPdfData(sa, title, &data, &nbytes);
572  if (ret) {
573  if (data) LEPT_FREE(data);
574  return ERROR_INT("pdf data not made", procName, 1);
575  }
576 
577  ret = l_binaryWrite(fileout, "w", data, nbytes);
578  LEPT_FREE(data);
579  if (ret)
580  L_ERROR("pdf data not written to file\n", procName);
581  return ret;
582 }
583 
584 
595 l_ok
597  const char *title,
598  l_uint8 **pdata,
599  size_t *pnbytes)
600 {
601 char *fname;
602 l_uint8 *imdata;
603 l_int32 i, n, ret, npages;
604 size_t imbytes;
605 L_BYTEA *ba;
606 L_PTRA *pa_data;
607 
608  PROCNAME("saConvertUnscaledFilesToPdfData");
609 
610  if (!pdata)
611  return ERROR_INT("&data not defined", procName, 1);
612  *pdata = NULL;
613  if (!pnbytes)
614  return ERROR_INT("&nbytes not defined", procName, 1);
615  *pnbytes = 0;
616  if (!sa)
617  return ERROR_INT("sa not defined", procName, 1);
618 
619  /* Generate all the encoded pdf strings */
620  n = sarrayGetCount(sa);
621  pa_data = ptraCreate(n);
622  for (i = 0; i < n; i++) {
623  if (i && (i % 10 == 0)) fprintf(stderr, ".. %d ", i);
624  fname = sarrayGetString(sa, i, L_NOCOPY);
625 
626  /* Generate the pdf data */
627  if (convertUnscaledToPdfData(fname, title, &imdata, &imbytes))
628  continue;
629 
630  /* ... and add it to the array of single page data */
631  ba = l_byteaInitFromMem(imdata, imbytes);
632  if (imdata) LEPT_FREE(imdata);
633  ptraAdd(pa_data, ba);
634  }
635  ptraGetActualCount(pa_data, &npages);
636  if (npages == 0) {
637  L_ERROR("no pdf files made\n", procName);
638  ptraDestroy(&pa_data, FALSE, FALSE);
639  return 1;
640  }
641 
642  /* Concatenate to generate a multipage pdf */
643  fprintf(stderr, "\nconcatenating ... ");
644  ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes);
645  fprintf(stderr, "done\n");
646 
647  /* Clean up */
648  ptraGetActualCount(pa_data, &npages); /* maybe failed to read some files */
649  for (i = 0; i < npages; i++) {
650  ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
651  l_byteaDestroy(&ba);
652  }
653  ptraDestroy(&pa_data, FALSE, FALSE);
654  return ret;
655 }
656 
657 
667 l_ok
668 convertUnscaledToPdfData(const char *fname,
669  const char *title,
670  l_uint8 **pdata,
671  size_t *pnbytes)
672 {
673 const char *pdftitle = NULL;
674 char *tail = NULL;
675 l_int32 format;
676 L_COMP_DATA *cid;
677 
678  PROCNAME("convertUnscaledToPdfData");
679 
680  if (!pdata)
681  return ERROR_INT("&data not defined", procName, 1);
682  *pdata = NULL;
683  if (!pnbytes)
684  return ERROR_INT("&nbytes not defined", procName, 1);
685  *pnbytes = 0;
686  if (!fname)
687  return ERROR_INT("fname not defined", procName, 1);
688 
689  findFileFormat(fname, &format);
690  if (format == IFF_UNKNOWN) {
691  L_WARNING("file %s format is unknown; skip\n", procName, fname);
692  return 1;
693  }
694  if (format == IFF_PS || format == IFF_LPDF) {
695  L_WARNING("file %s format is %d; skip\n", procName, fname, format);
696  return 1;
697  }
698 
699  /* Generate the image data required for pdf generation, always
700  * in binary (not ascii85) coding; jpeg files are never transcoded. */
701  l_generateCIDataForPdf(fname, NULL, 0, &cid);
702  if (!cid) {
703  L_ERROR("file %s format is %d; unreadable\n", procName, fname, format);
704  return 1;
705  }
706 
707  /* If %title == NULL, use the tail of %fname. */
708  if (title) {
709  pdftitle = title;
710  } else {
711  splitPathAtDirectory(fname, NULL, &tail);
712  pdftitle = tail;
713  }
714 
715  /* Generate the pdf string for this page (image). This destroys
716  * the cid by attaching it to an lpd and destroying the lpd. */
717  cidConvertToPdfData(cid, pdftitle, pdata, pnbytes);
718  LEPT_FREE(tail);
719  return 0;
720 }
721 
722 
723 /*---------------------------------------------------------------------*
724  * Convert multiple images to pdf (one image per page) *
725  *---------------------------------------------------------------------*/
751 l_ok
753  l_int32 res,
754  l_float32 scalefactor,
755  l_int32 type,
756  l_int32 quality,
757  const char *title,
758  const char *fileout)
759 {
760 l_uint8 *data;
761 l_int32 ret;
762 size_t nbytes;
763 
764  PROCNAME("pixaConvertToPdf");
765 
766  if (!pixa)
767  return ERROR_INT("pixa not defined", procName, 1);
768 
769  ret = pixaConvertToPdfData(pixa, res, scalefactor, type, quality,
770  title, &data, &nbytes);
771  if (ret) {
772  LEPT_FREE(data);
773  return ERROR_INT("conversion to pdf failed", procName, 1);
774  }
775 
776  ret = l_binaryWrite(fileout, "w", data, nbytes);
777  LEPT_FREE(data);
778  if (ret)
779  L_ERROR("pdf data not written to file\n", procName);
780  return ret;
781 }
782 
783 
803 l_ok
805  l_int32 res,
806  l_float32 scalefactor,
807  l_int32 type,
808  l_int32 quality,
809  const char *title,
810  l_uint8 **pdata,
811  size_t *pnbytes)
812 {
813 l_uint8 *imdata;
814 l_int32 i, n, ret, scaledres, pagetype;
815 size_t imbytes;
816 L_BYTEA *ba;
817 PIX *pixs, *pix;
818 L_PTRA *pa_data;
819 
820  PROCNAME("pixaConvertToPdfData");
821 
822  if (!pdata)
823  return ERROR_INT("&data not defined", procName, 1);
824  *pdata = NULL;
825  if (!pnbytes)
826  return ERROR_INT("&nbytes not defined", procName, 1);
827  *pnbytes = 0;
828  if (!pixa)
829  return ERROR_INT("pixa not defined", procName, 1);
830  if (scalefactor <= 0.0) scalefactor = 1.0;
831  if (type < 0 || type > L_FLATE_ENCODE) {
832  L_WARNING("invalid compression type; using per-page default\n",
833  procName);
834  type = 0;
835  }
836 
837  /* Generate all the encoded pdf strings */
838  n = pixaGetCount(pixa);
839  pa_data = ptraCreate(n);
840  for (i = 0; i < n; i++) {
841  if ((pixs = pixaGetPix(pixa, i, L_CLONE)) == NULL) {
842  L_ERROR("pix[%d] not retrieved\n", procName, i);
843  continue;
844  }
845  if (scalefactor != 1.0)
846  pix = pixScale(pixs, scalefactor, scalefactor);
847  else
848  pix = pixClone(pixs);
849  pixDestroy(&pixs);
850  scaledres = (l_int32)(res * scalefactor);
851  if (type != 0) {
852  pagetype = type;
853  } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) {
854  L_ERROR("encoding type selection failed for pix[%d]\n",
855  procName, i);
856  pixDestroy(&pix);
857  continue;
858  }
859  ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes,
860  0, 0, scaledres, title, NULL, 0);
861  pixDestroy(&pix);
862  if (ret) {
863  LEPT_FREE(imdata);
864  L_ERROR("pdf encoding failed for pix[%d]\n", procName, i);
865  continue;
866  }
867  ba = l_byteaInitFromMem(imdata, imbytes);
868  LEPT_FREE(imdata);
869  ptraAdd(pa_data, ba);
870  }
871  ptraGetActualCount(pa_data, &n);
872  if (n == 0) {
873  L_ERROR("no pdf files made\n", procName);
874  ptraDestroy(&pa_data, FALSE, FALSE);
875  return 1;
876  }
877 
878  /* Concatenate them */
879  ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes);
880 
881  ptraGetActualCount(pa_data, &n); /* recalculate in case it changes */
882  for (i = 0; i < n; i++) {
883  ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
884  l_byteaDestroy(&ba);
885  }
886  ptraDestroy(&pa_data, FALSE, FALSE);
887  return ret;
888 }
889 
890 
891 /*---------------------------------------------------------------------*
892  * Single page, multi-image converters *
893  *---------------------------------------------------------------------*/
949 l_ok
950 convertToPdf(const char *filein,
951  l_int32 type,
952  l_int32 quality,
953  const char *fileout,
954  l_int32 x,
955  l_int32 y,
956  l_int32 res,
957  const char *title,
958  L_PDF_DATA **plpd,
959  l_int32 position)
960 {
961 l_uint8 *data;
962 l_int32 ret;
963 size_t nbytes;
964 
965  PROCNAME("convertToPdf");
966 
967  if (!filein)
968  return ERROR_INT("filein not defined", procName, 1);
969  if (!plpd || (position == L_LAST_IMAGE)) {
970  if (!fileout)
971  return ERROR_INT("fileout not defined", procName, 1);
972  }
973  if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
974  type != L_FLATE_ENCODE)
975  return ERROR_INT("invalid conversion type", procName, 1);
976 
977  if (convertToPdfData(filein, type, quality, &data, &nbytes, x, y,
978  res, title, plpd, position))
979  return ERROR_INT("pdf data not made", procName, 1);
980 
981  if (!plpd || (position == L_LAST_IMAGE)) {
982  ret = l_binaryWrite(fileout, "w", data, nbytes);
983  LEPT_FREE(data);
984  if (ret)
985  return ERROR_INT("pdf data not written to file", procName, 1);
986  }
987 
988  return 0;
989 }
990 
991 
1020 l_ok
1021 convertImageDataToPdf(l_uint8 *imdata,
1022  size_t size,
1023  l_int32 type,
1024  l_int32 quality,
1025  const char *fileout,
1026  l_int32 x,
1027  l_int32 y,
1028  l_int32 res,
1029  const char *title,
1030  L_PDF_DATA **plpd,
1031  l_int32 position)
1032 {
1033 l_int32 ret;
1034 PIX *pix;
1035 
1036  PROCNAME("convertImageDataToPdf");
1037 
1038  if (!imdata)
1039  return ERROR_INT("image data not defined", procName, 1);
1040  if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1041  type != L_FLATE_ENCODE)
1042  return ERROR_INT("invalid conversion type", procName, 1);
1043  if (!plpd || (position == L_LAST_IMAGE)) {
1044  if (!fileout)
1045  return ERROR_INT("fileout not defined", procName, 1);
1046  }
1047 
1048  if ((pix = pixReadMem(imdata, size)) == NULL)
1049  return ERROR_INT("pix not read", procName, 1);
1050  ret = pixConvertToPdf(pix, type, quality, fileout, x, y, res,
1051  title, plpd, position);
1052  pixDestroy(&pix);
1053  return ret;
1054 }
1055 
1056 
1085 l_ok
1086 convertToPdfData(const char *filein,
1087  l_int32 type,
1088  l_int32 quality,
1089  l_uint8 **pdata,
1090  size_t *pnbytes,
1091  l_int32 x,
1092  l_int32 y,
1093  l_int32 res,
1094  const char *title,
1095  L_PDF_DATA **plpd,
1096  l_int32 position)
1097 {
1098 PIX *pix;
1099 
1100  PROCNAME("convertToPdfData");
1101 
1102  if (!pdata)
1103  return ERROR_INT("&data not defined", procName, 1);
1104  *pdata = NULL;
1105  if (!pnbytes)
1106  return ERROR_INT("&nbytes not defined", procName, 1);
1107  *pnbytes = 0;
1108  if (!filein)
1109  return ERROR_INT("filein not defined", procName, 1);
1110  if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1111  type != L_FLATE_ENCODE)
1112  return ERROR_INT("invalid conversion type", procName, 1);
1113 
1114  if ((pix = pixRead(filein)) == NULL)
1115  return ERROR_INT("pix not made", procName, 1);
1116 
1117  pixConvertToPdfData(pix, type, quality, pdata, pnbytes,
1118  x, y, res, (title) ? title : filein, plpd, position);
1119  pixDestroy(&pix);
1120  return 0;
1121 }
1122 
1123 
1153 l_ok
1155  size_t size,
1156  l_int32 type,
1157  l_int32 quality,
1158  l_uint8 **pdata,
1159  size_t *pnbytes,
1160  l_int32 x,
1161  l_int32 y,
1162  l_int32 res,
1163  const char *title,
1164  L_PDF_DATA **plpd,
1165  l_int32 position)
1166 {
1167 l_int32 ret;
1168 PIX *pix;
1169 
1170  PROCNAME("convertImageDataToPdfData");
1171 
1172  if (!pdata)
1173  return ERROR_INT("&data not defined", procName, 1);
1174  *pdata = NULL;
1175  if (!pnbytes)
1176  return ERROR_INT("&nbytes not defined", procName, 1);
1177  *pnbytes = 0;
1178  if (!imdata)
1179  return ERROR_INT("image data not defined", procName, 1);
1180  if (plpd) { /* part of multi-page invocation */
1181  if (position == L_FIRST_IMAGE)
1182  *plpd = NULL;
1183  }
1184 
1185  if ((pix = pixReadMem(imdata, size)) == NULL)
1186  return ERROR_INT("pix not read", procName, 1);
1187  ret = pixConvertToPdfData(pix, type, quality, pdata, pnbytes,
1188  x, y, res, title, plpd, position);
1189  pixDestroy(&pix);
1190  return ret;
1191 }
1192 
1193 
1222 l_ok
1224  l_int32 type,
1225  l_int32 quality,
1226  const char *fileout,
1227  l_int32 x,
1228  l_int32 y,
1229  l_int32 res,
1230  const char *title,
1231  L_PDF_DATA **plpd,
1232  l_int32 position)
1233 {
1234 l_uint8 *data;
1235 l_int32 ret;
1236 size_t nbytes;
1237 
1238  PROCNAME("pixConvertToPdf");
1239 
1240  if (!pix)
1241  return ERROR_INT("pix not defined", procName, 1);
1242  if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1243  type != L_FLATE_ENCODE)
1244  return ERROR_INT("invalid conversion type", procName, 1);
1245  if (!plpd || (position == L_LAST_IMAGE)) {
1246  if (!fileout)
1247  return ERROR_INT("fileout not defined", procName, 1);
1248  }
1249 
1250  if (pixConvertToPdfData(pix, type, quality, &data, &nbytes,
1251  x, y, res, title, plpd, position)) {
1252  LEPT_FREE(data);
1253  return ERROR_INT("pdf data not made", procName, 1);
1254  }
1255 
1256  if (!plpd || (position == L_LAST_IMAGE)) {
1257  ret = l_binaryWrite(fileout, "w", data, nbytes);
1258  LEPT_FREE(data);
1259  if (ret)
1260  return ERROR_INT("pdf data not written to file", procName, 1);
1261  }
1262  return 0;
1263 }
1264 
1265 
1285 l_ok
1287  PIX *pix,
1288  l_int32 res,
1289  const char *title)
1290 {
1291 l_uint8 *data;
1292 size_t nbytes, nbytes_written;
1293 
1294  PROCNAME("pixWriteStreamPdf");
1295 
1296  if (!fp)
1297  return ERROR_INT("stream not opened", procName, 1);
1298  if (!pix)
1299  return ERROR_INT("pix not defined", procName, 1);
1300 
1301  if (pixWriteMemPdf(&data, &nbytes, pix, res, title) != 0) {
1302  LEPT_FREE(data);
1303  return ERROR_INT("pdf data not made", procName, 1);
1304  }
1305 
1306  nbytes_written = fwrite(data, 1, nbytes, fp);
1307  LEPT_FREE(data);
1308  if (nbytes != nbytes_written)
1309  return ERROR_INT("failure writing pdf data to stream", procName, 1);
1310  return 0;
1311 }
1312 
1313 
1334 l_ok
1335 pixWriteMemPdf(l_uint8 **pdata,
1336  size_t *pnbytes,
1337  PIX *pix,
1338  l_int32 res,
1339  const char *title)
1340 {
1341 l_int32 ret, d, type;
1342 PIXCMAP *cmap;
1343 
1344  PROCNAME("pixWriteMemPdf");
1345 
1346  if (pdata) *pdata = NULL;
1347  if (pnbytes) *pnbytes = 0;
1348  if (!pdata || !pnbytes)
1349  return ERROR_INT("&data or &nbytes not defined", procName, 1);
1350  if (!pix)
1351  return ERROR_INT("pix not defined", procName, 1);
1352 
1353  d = pixGetDepth(pix);
1354  cmap = pixGetColormap(pix);
1355  if (d == 1)
1356  type = L_G4_ENCODE;
1357  else if (cmap || d == 2 || d == 4 || d == 16)
1358  type = L_FLATE_ENCODE;
1359  else /* d == 8 (no cmap) or d == 32 */
1360  type = L_JPEG_ENCODE;
1361 
1362  ret = pixConvertToPdfData(pix, type, 75, pdata, pnbytes,
1363  0, 0, res, title, NULL, 0);
1364  if (ret)
1365  return ERROR_INT("pdf data not made", procName, 1);
1366  return 0;
1367 }
1368 
1369 
1370 /*---------------------------------------------------------------------*
1371  * Segmented multi-page, multi-image converter *
1372  *---------------------------------------------------------------------*/
1414 l_ok
1415 convertSegmentedFilesToPdf(const char *dirname,
1416  const char *substr,
1417  l_int32 res,
1418  l_int32 type,
1419  l_int32 thresh,
1420  BOXAA *baa,
1421  l_int32 quality,
1422  l_float32 scalefactor,
1423  const char *title,
1424  const char *fileout)
1425 {
1426 char *fname;
1427 l_uint8 *imdata, *data;
1428 l_int32 i, npages, nboxa, nboxes, ret;
1429 size_t imbytes, databytes;
1430 BOXA *boxa;
1431 L_BYTEA *ba;
1432 L_PTRA *pa_data;
1433 SARRAY *sa;
1434 
1435  PROCNAME("convertSegmentedFilesToPdf");
1436 
1437  if (!dirname)
1438  return ERROR_INT("dirname not defined", procName, 1);
1439  if (!fileout)
1440  return ERROR_INT("fileout not defined", procName, 1);
1441 
1442  if ((sa = getNumberedPathnamesInDirectory(dirname, substr, 0, 0, 10000))
1443  == NULL)
1444  return ERROR_INT("sa not made", procName, 1);
1445 
1446  npages = sarrayGetCount(sa);
1447  /* If necessary, extend the boxaa, which is page-aligned with
1448  * the image files, to be as large as the set of images. */
1449  if (baa) {
1450  nboxa = boxaaGetCount(baa);
1451  if (nboxa < npages) {
1452  boxa = boxaCreate(1);
1453  boxaaExtendWithInit(baa, npages, boxa);
1454  boxaDestroy(&boxa);
1455  }
1456  }
1457 
1458  /* Generate and save all the encoded pdf strings */
1459  pa_data = ptraCreate(npages);
1460  for (i = 0; i < npages; i++) {
1461  fname = sarrayGetString(sa, i, L_NOCOPY);
1462  if (!strcmp(fname, "")) continue;
1463  boxa = NULL;
1464  if (baa) {
1465  boxa = boxaaGetBoxa(baa, i, L_CLONE);
1466  nboxes = boxaGetCount(boxa);
1467  if (nboxes == 0)
1468  boxaDestroy(&boxa);
1469  }
1470  ret = convertToPdfDataSegmented(fname, res, type, thresh, boxa,
1471  quality, scalefactor, title,
1472  &imdata, &imbytes);
1473  boxaDestroy(&boxa); /* safe; in case nboxes > 0 */
1474  if (ret) {
1475  L_ERROR("pdf encoding failed for %s\n", procName, fname);
1476  continue;
1477  }
1478  ba = l_byteaInitFromMem(imdata, imbytes);
1479  if (imdata) LEPT_FREE(imdata);
1480  ptraAdd(pa_data, ba);
1481  }
1482  sarrayDestroy(&sa);
1483 
1484  ptraGetActualCount(pa_data, &npages);
1485  if (npages == 0) {
1486  L_ERROR("no pdf files made\n", procName);
1487  ptraDestroy(&pa_data, FALSE, FALSE);
1488  return 1;
1489  }
1490 
1491  /* Concatenate */
1492  ret = ptraConcatenatePdfToData(pa_data, NULL, &data, &databytes);
1493 
1494  /* Clean up */
1495  ptraGetActualCount(pa_data, &npages); /* recalculate in case it changes */
1496  for (i = 0; i < npages; i++) {
1497  ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
1498  l_byteaDestroy(&ba);
1499  }
1500  ptraDestroy(&pa_data, FALSE, FALSE);
1501 
1502  if (ret) {
1503  if (data) LEPT_FREE(data);
1504  return ERROR_INT("pdf data not made", procName, 1);
1505  }
1506 
1507  ret = l_binaryWrite(fileout, "w", data, databytes);
1508  LEPT_FREE(data);
1509  if (ret)
1510  L_ERROR("pdf data not written to file\n", procName);
1511  return ret;
1512 }
1513 
1514 
1534 BOXAA *
1535 convertNumberedMasksToBoxaa(const char *dirname,
1536  const char *substr,
1537  l_int32 numpre,
1538  l_int32 numpost)
1539 {
1540 char *fname;
1541 l_int32 i, n;
1542 BOXA *boxa;
1543 BOXAA *baa;
1544 PIX *pix;
1545 SARRAY *sa;
1546 
1547  PROCNAME("convertNumberedMasksToBoxaa");
1548 
1549  if (!dirname)
1550  return (BOXAA *)ERROR_PTR("dirname not defined", procName, NULL);
1551 
1552  if ((sa = getNumberedPathnamesInDirectory(dirname, substr, numpre,
1553  numpost, 10000)) == NULL)
1554  return (BOXAA *)ERROR_PTR("sa not made", procName, NULL);
1555 
1556  /* Generate and save all the encoded pdf strings */
1557  n = sarrayGetCount(sa);
1558  baa = boxaaCreate(n);
1559  boxa = boxaCreate(1);
1560  boxaaInitFull(baa, boxa);
1561  boxaDestroy(&boxa);
1562  for (i = 0; i < n; i++) {
1563  fname = sarrayGetString(sa, i, L_NOCOPY);
1564  if (!strcmp(fname, "")) continue;
1565  if ((pix = pixRead(fname)) == NULL) {
1566  L_WARNING("invalid image on page %d\n", procName, i);
1567  continue;
1568  }
1569  boxa = pixConnComp(pix, NULL, 8);
1570  boxaaReplaceBoxa(baa, i, boxa);
1571  pixDestroy(&pix);
1572  }
1573 
1574  sarrayDestroy(&sa);
1575  return baa;
1576 }
1577 
1578 
1579 /*---------------------------------------------------------------------*
1580  * Segmented single page, multi-image converters *
1581  *---------------------------------------------------------------------*/
1643 l_ok
1644 convertToPdfSegmented(const char *filein,
1645  l_int32 res,
1646  l_int32 type,
1647  l_int32 thresh,
1648  BOXA *boxa,
1649  l_int32 quality,
1650  l_float32 scalefactor,
1651  const char *title,
1652  const char *fileout)
1653 {
1654 l_int32 ret;
1655 PIX *pixs;
1656 
1657  PROCNAME("convertToPdfSegmented");
1658 
1659  if (!filein)
1660  return ERROR_INT("filein not defined", procName, 1);
1661  if (!fileout)
1662  return ERROR_INT("fileout not defined", procName, 1);
1663  if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1664  type != L_FLATE_ENCODE)
1665  return ERROR_INT("invalid conversion type", procName, 1);
1666  if (boxa && scalefactor > 1.0) {
1667  L_WARNING("setting scalefactor to 1.0\n", procName);
1668  scalefactor = 1.0;
1669  }
1670 
1671  if ((pixs = pixRead(filein)) == NULL)
1672  return ERROR_INT("pixs not made", procName, 1);
1673 
1674  ret = pixConvertToPdfSegmented(pixs, res, type, thresh, boxa, quality,
1675  scalefactor, (title) ? title : filein,
1676  fileout);
1677  pixDestroy(&pixs);
1678  return ret;
1679 }
1680 
1681 
1703 l_ok
1705  l_int32 res,
1706  l_int32 type,
1707  l_int32 thresh,
1708  BOXA *boxa,
1709  l_int32 quality,
1710  l_float32 scalefactor,
1711  const char *title,
1712  const char *fileout)
1713 {
1714 l_uint8 *data;
1715 l_int32 ret;
1716 size_t nbytes;
1717 
1718  PROCNAME("pixConvertToPdfSegmented");
1719 
1720  if (!pixs)
1721  return ERROR_INT("pixs not defined", procName, 1);
1722  if (!fileout)
1723  return ERROR_INT("fileout not defined", procName, 1);
1724  if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1725  type != L_FLATE_ENCODE)
1726  return ERROR_INT("invalid conversion type", procName, 1);
1727  if (boxa && scalefactor > 1.0) {
1728  L_WARNING("setting scalefactor to 1.0\n", procName);
1729  scalefactor = 1.0;
1730  }
1731 
1732  ret = pixConvertToPdfDataSegmented(pixs, res, type, thresh, boxa, quality,
1733  scalefactor, title, &data, &nbytes);
1734  if (ret)
1735  return ERROR_INT("pdf generation failure", procName, 1);
1736 
1737  ret = l_binaryWrite(fileout, "w", data, nbytes);
1738  if (data) LEPT_FREE(data);
1739  return ret;
1740 }
1741 
1742 
1766 l_ok
1767 convertToPdfDataSegmented(const char *filein,
1768  l_int32 res,
1769  l_int32 type,
1770  l_int32 thresh,
1771  BOXA *boxa,
1772  l_int32 quality,
1773  l_float32 scalefactor,
1774  const char *title,
1775  l_uint8 **pdata,
1776  size_t *pnbytes)
1777 {
1778 l_int32 ret;
1779 PIX *pixs;
1780 
1781  PROCNAME("convertToPdfDataSegmented");
1782 
1783  if (!pdata)
1784  return ERROR_INT("&data not defined", procName, 1);
1785  *pdata = NULL;
1786  if (!pnbytes)
1787  return ERROR_INT("&nbytes not defined", procName, 1);
1788  *pnbytes = 0;
1789  if (!filein)
1790  return ERROR_INT("filein not defined", procName, 1);
1791  if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1792  type != L_FLATE_ENCODE)
1793  return ERROR_INT("invalid conversion type", procName, 1);
1794  if (boxa && scalefactor > 1.0) {
1795  L_WARNING("setting scalefactor to 1.0\n", procName);
1796  scalefactor = 1.0;
1797  }
1798 
1799  if ((pixs = pixRead(filein)) == NULL)
1800  return ERROR_INT("pixs not made", procName, 1);
1801 
1802  ret = pixConvertToPdfDataSegmented(pixs, res, type, thresh, boxa,
1803  quality, scalefactor,
1804  (title) ? title : filein,
1805  pdata, pnbytes);
1806  pixDestroy(&pixs);
1807  return ret;
1808 }
1809 
1810 
1833 l_ok
1835  l_int32 res,
1836  l_int32 type,
1837  l_int32 thresh,
1838  BOXA *boxa,
1839  l_int32 quality,
1840  l_float32 scalefactor,
1841  const char *title,
1842  l_uint8 **pdata,
1843  size_t *pnbytes)
1844 {
1845 l_int32 i, nbox, seq, bx, by, bw, bh, upscale;
1846 l_float32 scale;
1847 BOX *box, *boxc, *box2;
1848 PIX *pix, *pixt1, *pixt2, *pixt3, *pixt4, *pixt5, *pixt6;
1849 PIXCMAP *cmap;
1850 L_PDF_DATA *lpd;
1851 
1852  PROCNAME("pixConvertToPdfDataSegmented");
1853 
1854  if (!pdata)
1855  return ERROR_INT("&data not defined", procName, 1);
1856  *pdata = NULL;
1857  if (!pnbytes)
1858  return ERROR_INT("&nbytes not defined", procName, 1);
1859  *pnbytes = 0;
1860  if (!pixs)
1861  return ERROR_INT("pixs not defined", procName, 1);
1862  if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1863  type != L_FLATE_ENCODE)
1864  return ERROR_INT("invalid conversion type", procName, 1);
1865  if (boxa && (scalefactor <= 0.0 || scalefactor > 1.0)) {
1866  L_WARNING("setting scalefactor to 1.0\n", procName);
1867  scalefactor = 1.0;
1868  }
1869 
1870  /* Adjust scalefactor so that the product with res gives an integer */
1871  if (res <= 0)
1872  res = DEFAULT_INPUT_RES;
1873  scale = (l_float32)((l_int32)(scalefactor * res + 0.5)) / (l_float32)res;
1874  cmap = pixGetColormap(pixs);
1875 
1876  /* Simple case: single image to be encoded */
1877  if (!boxa || boxaGetCount(boxa) == 0) {
1878  if (pixGetDepth(pixs) > 1 && type == L_G4_ENCODE) {
1879  if (cmap)
1881  else
1882  pixt1 = pixConvertTo8(pixs, FALSE);
1883  pixt2 = pixScaleGray2xLIThresh(pixt1, thresh);
1884  pixConvertToPdfData(pixt2, type, quality, pdata, pnbytes,
1885  0, 0, 2 * res, title, NULL, 0);
1886  pixDestroy(&pixt1);
1887  pixDestroy(&pixt2);
1888  } else {
1889  pixConvertToPdfData(pixs, type, quality, pdata, pnbytes,
1890  0, 0, res, title, NULL, 0);
1891  }
1892  return 0;
1893  }
1894 
1895  /* Multiple images to be encoded. If %type == L_G4_ENCODE,
1896  * jpeg encode a version of pixs that is blanked in the non-image
1897  * regions, and paint the scaled non-image part onto it through a mask.
1898  * Otherwise, we must put the non-image part down first and
1899  * then render all the image regions separately on top of it,
1900  * at their own resolution. */
1901  pixt1 = pixSetBlackOrWhiteBoxa(pixs, boxa, L_SET_WHITE); /* non-image */
1902  nbox = boxaGetCount(boxa);
1903  if (type == L_G4_ENCODE) {
1904  pixt2 = pixCreateTemplate(pixs); /* only image regions */
1906  for (i = 0; i < nbox; i++) {
1907  box = boxaGetBox(boxa, i, L_CLONE);
1908  pix = pixClipRectangle(pixs, box, &boxc);
1909  boxGetGeometry(boxc, &bx, &by, &bw, &bh);
1910  pixRasterop(pixt2, bx, by, bw, bh, PIX_SRC, pix, 0, 0);
1911  pixDestroy(&pix);
1912  boxDestroy(&box);
1913  boxDestroy(&boxc);
1914  }
1916  if (pixGetDepth(pixt3) == 1)
1917  pixt4 = pixScaleToGray(pixt3, scale);
1918  else
1919  pixt4 = pixScale(pixt3, scale, scale);
1920  pixConvertToPdfData(pixt4, L_JPEG_ENCODE, quality, pdata, pnbytes,
1921  0, 0, (l_int32)(scale * res), title,
1922  &lpd, L_FIRST_IMAGE);
1923 
1924  if (pixGetDepth(pixt1) == 1) {
1925  pixt5 = pixClone(pixt1);
1926  upscale = 1;
1927  } else {
1928  pixt6 = pixConvertTo8(pixt1, 0);
1929  pixt5 = pixScaleGray2xLIThresh(pixt6, thresh);
1930  pixDestroy(&pixt6);
1931  upscale = 2;
1932  }
1933  pixConvertToPdfData(pixt5, L_G4_ENCODE, quality, pdata, pnbytes,
1934  0, 0, upscale * res, title, &lpd, L_LAST_IMAGE);
1935  pixDestroy(&pixt2);
1936  pixDestroy(&pixt3);
1937  pixDestroy(&pixt4);
1938  pixDestroy(&pixt5);
1939  } else {
1940  /* Put the non-image part down first. This is the full
1941  size of the page, so we can use it to find the page
1942  height in pixels, which is required for determining
1943  the LL corner of the image relative to the LL corner
1944  of the page. */
1945  pixConvertToPdfData(pixt1, type, quality, pdata, pnbytes, 0, 0,
1946  res, title, &lpd, L_FIRST_IMAGE);
1947  for (i = 0; i < nbox; i++) {
1948  box = boxaGetBox(boxa, i, L_CLONE);
1949  pixt2 = pixClipRectangle(pixs, box, &boxc);
1951  if (pixGetDepth(pixt3) == 1)
1952  pixt4 = pixScaleToGray(pixt3, scale);
1953  else
1954  pixt4 = pixScale(pixt3, scale, scale);
1955  box2 = boxTransform(boxc, 0, 0, scale, scale);
1956  boxGetGeometry(box2, &bx, &by, NULL, &bh);
1957  seq = (i == nbox - 1) ? L_LAST_IMAGE : L_NEXT_IMAGE;
1958  pixConvertToPdfData(pixt4, L_JPEG_ENCODE, quality, pdata, pnbytes,
1959  bx, by, (l_int32)(scale * res), title,
1960  &lpd, seq);
1961  pixDestroy(&pixt2);
1962  pixDestroy(&pixt3);
1963  pixDestroy(&pixt4);
1964  boxDestroy(&box);
1965  boxDestroy(&boxc);
1966  boxDestroy(&box2);
1967  }
1968  }
1969 
1970  pixDestroy(&pixt1);
1971  return 0;
1972 }
1973 
1974 
1975 /*---------------------------------------------------------------------*
1976  * Multi-page concatenation *
1977  *---------------------------------------------------------------------*/
1997 l_ok
1998 concatenatePdf(const char *dirname,
1999  const char *substr,
2000  const char *fileout)
2001 {
2002 l_int32 ret;
2003 SARRAY *sa;
2004 
2005  PROCNAME("concatenatePdf");
2006 
2007  if (!dirname)
2008  return ERROR_INT("dirname not defined", procName, 1);
2009  if (!fileout)
2010  return ERROR_INT("fileout not defined", procName, 1);
2011 
2012  if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
2013  return ERROR_INT("sa not made", procName, 1);
2014  ret = saConcatenatePdf(sa, fileout);
2015  sarrayDestroy(&sa);
2016  return ret;
2017 }
2018 
2019 
2032 l_ok
2034  const char *fileout)
2035 {
2036 l_uint8 *data;
2037 l_int32 ret;
2038 size_t nbytes;
2039 
2040  PROCNAME("saConcatenatePdf");
2041 
2042  if (!sa)
2043  return ERROR_INT("sa not defined", procName, 1);
2044  if (!fileout)
2045  return ERROR_INT("fileout not defined", procName, 1);
2046 
2047  ret = saConcatenatePdfToData(sa, &data, &nbytes);
2048  if (ret)
2049  return ERROR_INT("pdf data not made", procName, 1);
2050  ret = l_binaryWrite(fileout, "w", data, nbytes);
2051  LEPT_FREE(data);
2052  return ret;
2053 }
2054 
2055 
2068 l_ok
2070  const char *fileout)
2071 {
2072 l_uint8 *data;
2073 l_int32 ret;
2074 size_t nbytes;
2075 
2076  PROCNAME("ptraConcatenatePdf");
2077 
2078  if (!pa)
2079  return ERROR_INT("pa not defined", procName, 1);
2080  if (!fileout)
2081  return ERROR_INT("fileout not defined", procName, 1);
2082 
2083  ret = ptraConcatenatePdfToData(pa, NULL, &data, &nbytes);
2084  if (ret)
2085  return ERROR_INT("pdf data not made", procName, 1);
2086  ret = l_binaryWrite(fileout, "w", data, nbytes);
2087  LEPT_FREE(data);
2088  return ret;
2089 }
2090 
2091 
2112 l_ok
2113 concatenatePdfToData(const char *dirname,
2114  const char *substr,
2115  l_uint8 **pdata,
2116  size_t *pnbytes)
2117 {
2118 l_int32 ret;
2119 SARRAY *sa;
2120 
2121  PROCNAME("concatenatePdfToData");
2122 
2123  if (!pdata)
2124  return ERROR_INT("&data not defined", procName, 1);
2125  *pdata = NULL;
2126  if (!pnbytes)
2127  return ERROR_INT("&nbytes not defined", procName, 1);
2128  *pnbytes = 0;
2129  if (!dirname)
2130  return ERROR_INT("dirname not defined", procName, 1);
2131 
2132  if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
2133  return ERROR_INT("sa not made", procName, 1);
2134  ret = saConcatenatePdfToData(sa, pdata, pnbytes);
2135  sarrayDestroy(&sa);
2136  return ret;
2137 }
2138 
2139 
2153 l_ok
2155  l_uint8 **pdata,
2156  size_t *pnbytes)
2157 {
2158 char *fname;
2159 l_int32 i, npages, ret;
2160 L_BYTEA *bas;
2161 L_PTRA *pa_data; /* input pdf data for each page */
2162 
2163  PROCNAME("saConcatenatePdfToData");
2164 
2165  if (!pdata)
2166  return ERROR_INT("&data not defined", procName, 1);
2167  *pdata = NULL;
2168  if (!pnbytes)
2169  return ERROR_INT("&nbytes not defined", procName, 1);
2170  *pnbytes = 0;
2171  if (!sa)
2172  return ERROR_INT("sa not defined", procName, 1);
2173 
2174  /* Read the pdf files into memory */
2175  if ((npages = sarrayGetCount(sa)) == 0)
2176  return ERROR_INT("no filenames found", procName, 1);
2177  pa_data = ptraCreate(npages);
2178  for (i = 0; i < npages; i++) {
2179  fname = sarrayGetString(sa, i, L_NOCOPY);
2180  bas = l_byteaInitFromFile(fname);
2181  ptraAdd(pa_data, bas);
2182  }
2183 
2184  ret = ptraConcatenatePdfToData(pa_data, sa, pdata, pnbytes);
2185 
2186  /* Cleanup: some pages could have been removed */
2187  ptraGetActualCount(pa_data, &npages);
2188  for (i = 0; i < npages; i++) {
2189  bas = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
2190  l_byteaDestroy(&bas);
2191  }
2192  ptraDestroy(&pa_data, FALSE, FALSE);
2193  return ret;
2194 }
2195 
2196 /* --------------------------------------------*/
2197 #endif /* USE_PDFIO */
2198 /* --------------------------------------------*/
PIX * pixRemoveColormap(PIX *pixs, l_int32 type)
pixRemoveColormap()
Definition: pixconv.c:322
l_ok ptraConcatenatePdfToData(L_PTRA *pa_data, SARRAY *sa, l_uint8 **pdata, size_t *pnbytes)
ptraConcatenatePdfToData()
Definition: pdfio2.c:307
PIX * pixScaleToGray(PIX *pixs, l_float32 scalefactor)
pixScaleToGray()
Definition: scale2.c:204
l_ok ptraGetActualCount(L_PTRA *pa, l_int32 *pcount)
ptraGetActualCount()
Definition: ptra.c:727
l_ok saConvertFilesToPdf(SARRAY *sa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout)
saConvertFilesToPdf()
Definition: pdfio1.c:287
l_ok boxaaReplaceBoxa(BOXAA *baa, l_int32 index, BOXA *boxa)
boxaaReplaceBoxa()
Definition: boxbasic.c:1634
l_ok convertToPdfData(const char *filein, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
convertToPdfData()
Definition: pdfio1.c:1086
l_ok convertToPdfSegmented(const char *filein, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, const char *fileout)
convertToPdfSegmented()
Definition: pdfio1.c:1644
l_int32 boxaaGetCount(BOXAA *baa)
boxaaGetCount()
Definition: boxbasic.c:1424
l_ok convertImageDataToPdf(l_uint8 *imdata, size_t size, l_int32 type, l_int32 quality, const char *fileout, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
convertImageDataToPdf()
Definition: pdfio1.c:1021
l_ok saConvertFilesToPdfData(SARRAY *sa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, l_uint8 **pdata, size_t *pnbytes)
saConvertFilesToPdfData()
Definition: pdfio1.c:340
l_ok saConcatenatePdfToData(SARRAY *sa, l_uint8 **pdata, size_t *pnbytes)
saConcatenatePdfToData()
Definition: pdfio1.c:2154
Definition: pix.h:716
l_ok pixRasterop(PIX *pixd, l_int32 dx, l_int32 dy, l_int32 dw, l_int32 dh, l_int32 op, PIX *pixs, l_int32 sx, l_int32 sy)
pixRasterop()
Definition: rop.c:193
PIX * pixConvertTo8(PIX *pixs, l_int32 cmapflag)
pixConvertTo8()
Definition: pixconv.c:3041
L_BYTEA * l_byteaInitFromMem(const l_uint8 *data, size_t size)
l_byteaInitFromMem()
Definition: bytearray.c:121
PIX * pixScaleGray2xLIThresh(PIX *pixs, l_int32 thresh)
pixScaleGray2xLIThresh()
Definition: scale1.c:929
l_ok boxaaExtendWithInit(BOXAA *baa, l_int32 maxindex, BOXA *boxa)
boxaaExtendWithInit()
Definition: boxbasic.c:1593
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
Definition: boxbasic.c:580
l_ok pixWriteMemPdf(l_uint8 **pdata, size_t *pnbytes, PIX *pix, l_int32 res, const char *title)
pixWriteMemPdf()
Definition: pdfio1.c:1335
l_ok cidConvertToPdfData(L_COMP_DATA *cid, const char *title, l_uint8 **pdata, size_t *pnbytes)
cidConvertToPdfData()
Definition: pdfio2.c:1428
BOX * boxTransform(BOX *box, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley)
boxTransform()
Definition: boxfunc2.c:141
l_ok pixNumColors(PIX *pixs, l_int32 factor, l_int32 *pncolors)
pixNumColors()
l_ok pixConvertToPdfSegmented(PIX *pixs, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, const char *fileout)
pixConvertToPdfSegmented()
Definition: pdfio1.c:1704
l_ok convertFilesToPdf(const char *dirname, const char *substr, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout)
convertFilesToPdf()
Definition: pdfio1.c:239
PIX * pixReadMem(const l_uint8 *data, size_t size)
pixReadMem()
Definition: readfile.c:839
PIX * pixCreateTemplate(PIX *pixs)
pixCreateTemplate()
Definition: pix1.c:367
PIX * pixClipRectangle(PIX *pixs, BOX *box, BOX **pboxc)
pixClipRectangle()
Definition: pix5.c:1020
Definition: pix.h:492
SARRAY * getNumberedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost, l_int32 maxnum)
getNumberedPathnamesInDirectory()
Definition: sarray1.c:1669
l_ok concatenatePdfToData(const char *dirname, const char *substr, l_uint8 **pdata, size_t *pnbytes)
concatenatePdfToData()
Definition: pdfio1.c:2113
Definition: array.h:116
Definition: pix.h:502
l_ok convertSegmentedFilesToPdf(const char *dirname, const char *substr, l_int32 res, l_int32 type, l_int32 thresh, BOXAA *baa, l_int32 quality, l_float32 scalefactor, const char *title, const char *fileout)
convertSegmentedFilesToPdf()
Definition: pdfio1.c:1415
l_ok l_binaryWrite(const char *filename, const char *operation, const void *data, size_t nbytes)
l_binaryWrite()
Definition: utils2.c:1429
BOXA * pixConnComp(PIX *pixs, PIXA **ppixa, l_int32 connectivity)
pixConnComp()
Definition: conncomp.c:147
l_ok convertUnscaledFilesToPdf(const char *dirname, const char *substr, const char *title, const char *fileout)
convertUnscaledFilesToPdf()
Definition: pdfio1.c:520
l_ok saConcatenatePdf(SARRAY *sa, const char *fileout)
saConcatenatePdf()
Definition: pdfio1.c:2033
L_PTRA * ptraCreate(l_int32 n)
ptraCreate()
Definition: ptra.c:139
l_ok pixWriteStreamPdf(FILE *fp, PIX *pix, l_int32 res, const char *title)
pixWriteStreamPdf()
Definition: pdfio1.c:1286
l_ok convertImageDataToPdfData(l_uint8 *imdata, size_t size, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
convertImageDataToPdfData()
Definition: pdfio1.c:1154
Definition: ptra.h:51
l_ok pixSetBlackOrWhite(PIX *pixs, l_int32 op)
pixSetBlackOrWhite()
Definition: pix2.c:946
l_ok findFileFormat(const char *filename, l_int32 *pformat)
findFileFormat()
Definition: readfile.c:580
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
Definition: sarray1.c:681
l_ok convertToPdf(const char *filein, l_int32 type, l_int32 quality, const char *fileout, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
convertToPdf()
Definition: pdfio1.c:950
L_BYTEA * l_byteaInitFromFile(const char *fname)
l_byteaInitFromFile()
Definition: bytearray.c:148
PIX * pixClone(PIX *pixs)
pixClone()
Definition: pix1.c:515
void pixDestroy(PIX **ppix)
pixDestroy()
Definition: pix1.c:543
BOX * boxaGetBox(BOXA *boxa, l_int32 index, l_int32 accessflag)
boxaGetBox()
Definition: boxbasic.c:763
l_ok selectDefaultPdfEncoding(PIX *pix, l_int32 *ptype)
selectDefaultPdfEncoding()
Definition: pdfio1.c:457
SARRAY * getSortedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 first, l_int32 nfiles)
getSortedPathnamesInDirectory()
Definition: sarray1.c:1717
Definition: pix.h:454
l_ok splitPathAtDirectory(const char *pathname, char **pdir, char **ptail)
splitPathAtDirectory()
Definition: utils2.c:2540
l_ok ptraConcatenatePdf(L_PTRA *pa, const char *fileout)
ptraConcatenatePdf()
Definition: pdfio1.c:2069
l_ok pixGetDimensions(const PIX *pix, l_int32 *pw, l_int32 *ph, l_int32 *pd)
pixGetDimensions()
Definition: pix1.c:1065
BOXAA * boxaaCreate(l_int32 n)
boxaaCreate()
Definition: boxbasic.c:1223
l_ok pixConvertToPdfData(PIX *pix, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
pixConvertToPdfData()
Definition: pdfio2.c:182
void ptraDestroy(L_PTRA **ppa, l_int32 freeflag, l_int32 warnflag)
ptraDestroy()
Definition: ptra.c:185
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
Definition: sarray1.c:621
l_ok ptraAdd(L_PTRA *pa, void *item)
ptraAdd()
Definition: ptra.c:242
PIX * pixRead(const char *filename)
pixRead()
Definition: readfile.c:190
l_ok convertToPdfDataSegmented(const char *filein, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, l_uint8 **pdata, size_t *pnbytes)
convertToPdfDataSegmented()
Definition: pdfio1.c:1767
l_ok pixConvertToPdf(PIX *pix, l_int32 type, l_int32 quality, const char *fileout, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
pixConvertToPdf()
Definition: pdfio1.c:1223
l_ok convertUnscaledToPdfData(const char *fname, const char *title, l_uint8 **pdata, size_t *pnbytes)
convertUnscaledToPdfData()
Definition: pdfio1.c:668
l_ok pixaConvertToPdf(PIXA *pixa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout)
pixaConvertToPdf()
Definition: pdfio1.c:752
l_ok saConvertUnscaledFilesToPdf(SARRAY *sa, const char *title, const char *fileout)
saConvertUnscaledFilesToPdf()
Definition: pdfio1.c:558
BOXAA * convertNumberedMasksToBoxaa(const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost)
convertNumberedMasksToBoxaa()
Definition: pdfio1.c:1535
PIX * pixaGetPix(PIXA *pixa, l_int32 index, l_int32 accesstype)
pixaGetPix()
Definition: pixabasic.c:672
l_ok l_generateCIDataForPdf(const char *fname, PIX *pix, l_int32 quality, L_COMP_DATA **pcid)
l_generateCIDataForPdf()
Definition: pdfio2.c:520
void * ptraRemove(L_PTRA *pa, l_int32 index, l_int32 flag)
ptraRemove()
Definition: ptra.c:434
l_ok boxaaInitFull(BOXAA *baa, BOXA *boxa)
boxaaInitFull()
Definition: boxbasic.c:1553
Definition: pix.h:134
Definition: pix.h:719
l_ok pixaConvertToPdfData(PIXA *pixa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, l_uint8 **pdata, size_t *pnbytes)
pixaConvertToPdfData()
Definition: pdfio1.c:804
BOXA * boxaCreate(l_int32 n)
boxaCreate()
Definition: boxbasic.c:499
#define PIX_SRC
Definition: pix.h:327
l_ok saConvertUnscaledFilesToPdfData(SARRAY *sa, const char *title, l_uint8 **pdata, size_t *pnbytes)
saConvertUnscaledFilesToPdfData()
Definition: pdfio1.c:596
void boxDestroy(BOX **pbox)
boxDestroy()
Definition: boxbasic.c:278
l_int32 boxaGetCount(BOXA *boxa)
boxaGetCount()
Definition: boxbasic.c:718
void l_byteaDestroy(L_BYTEA **pba)
l_byteaDestroy()
Definition: bytearray.c:244
l_ok boxGetGeometry(BOX *box, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxGetGeometry()
Definition: boxbasic.c:310
l_ok pixConvertToPdfDataSegmented(PIX *pixs, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, l_uint8 **pdata, size_t *pnbytes)
pixConvertToPdfDataSegmented()
Definition: pdfio1.c:1834
Definition: pix.h:480
PIX * pixScale(PIX *pixs, l_float32 scalex, l_float32 scaley)
pixScale()
Definition: scale1.c:244
l_ok concatenatePdf(const char *dirname, const char *substr, const char *fileout)
concatenatePdf()
Definition: pdfio1.c:1998
l_int32 pixaGetCount(PIXA *pixa)
pixaGetCount()
Definition: pixabasic.c:631
PIX * pixSetBlackOrWhiteBoxa(PIX *pixs, BOXA *boxa, l_int32 op)
pixSetBlackOrWhiteBoxa()
Definition: boxfunc3.c:283
BOXA * boxaaGetBoxa(BOXAA *baa, l_int32 index, l_int32 accessflag)
boxaaGetBoxa()
Definition: boxbasic.c:1471
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
Definition: sarray1.c:355
Definition: array.h:126