![]() |
Leptonica
1.77.0
Image processing and image analysis suite
|
#include <string.h>#include <math.h>#include "allheaders.h"Go to the source code of this file.
Functions | |
| l_ok | convertFilesToPdf (const char *dirname, const char *substr, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout) |
| l_ok | saConvertFilesToPdf (SARRAY *sa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout) |
| l_ok | saConvertFilesToPdfData (SARRAY *sa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, l_uint8 **pdata, size_t *pnbytes) |
| l_ok | selectDefaultPdfEncoding (PIX *pix, l_int32 *ptype) |
| l_ok | convertUnscaledFilesToPdf (const char *dirname, const char *substr, const char *title, const char *fileout) |
| l_ok | saConvertUnscaledFilesToPdf (SARRAY *sa, const char *title, const char *fileout) |
| l_ok | saConvertUnscaledFilesToPdfData (SARRAY *sa, const char *title, l_uint8 **pdata, size_t *pnbytes) |
| l_ok | convertUnscaledToPdfData (const char *fname, const char *title, l_uint8 **pdata, size_t *pnbytes) |
| l_ok | pixaConvertToPdf (PIXA *pixa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout) |
| l_ok | pixaConvertToPdfData (PIXA *pixa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, l_uint8 **pdata, size_t *pnbytes) |
| l_ok | convertToPdf (const char *filein, l_int32 type, l_int32 quality, const char *fileout, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position) |
| l_ok | convertImageDataToPdf (l_uint8 *imdata, size_t size, l_int32 type, l_int32 quality, const char *fileout, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position) |
| l_ok | convertToPdfData (const char *filein, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position) |
| l_ok | convertImageDataToPdfData (l_uint8 *imdata, size_t size, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position) |
| l_ok | pixConvertToPdf (PIX *pix, l_int32 type, l_int32 quality, const char *fileout, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position) |
| l_ok | pixWriteStreamPdf (FILE *fp, PIX *pix, l_int32 res, const char *title) |
| l_ok | pixWriteMemPdf (l_uint8 **pdata, size_t *pnbytes, PIX *pix, l_int32 res, const char *title) |
| l_ok | convertSegmentedFilesToPdf (const char *dirname, const char *substr, l_int32 res, l_int32 type, l_int32 thresh, BOXAA *baa, l_int32 quality, l_float32 scalefactor, const char *title, const char *fileout) |
| BOXAA * | convertNumberedMasksToBoxaa (const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost) |
| l_ok | convertToPdfSegmented (const char *filein, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, const char *fileout) |
| l_ok | pixConvertToPdfSegmented (PIX *pixs, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, const char *fileout) |
| l_ok | convertToPdfDataSegmented (const char *filein, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, l_uint8 **pdata, size_t *pnbytes) |
| l_ok | pixConvertToPdfDataSegmented (PIX *pixs, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, l_uint8 **pdata, size_t *pnbytes) |
| l_ok | concatenatePdf (const char *dirname, const char *substr, const char *fileout) |
| l_ok | saConcatenatePdf (SARRAY *sa, const char *fileout) |
| l_ok | ptraConcatenatePdf (L_PTRA *pa, const char *fileout) |
| l_ok | concatenatePdfToData (const char *dirname, const char *substr, l_uint8 **pdata, size_t *pnbytes) |
| l_ok | saConcatenatePdfToData (SARRAY *sa, l_uint8 **pdata, size_t *pnbytes) |
Variables | |
| static const l_int32 | DEFAULT_INPUT_RES = 300 |
Higher-level operations for generating pdf.
|=============================================================| | Important note | |=============================================================| | Some of these functions require libtiff, libjpeg, and libz | | If you do not have these libraries, you must set | || | in environ.h. This will link pdfiostub.c | |=============================================================||| #define USE_PDFIO 0 ||
Set 1. These functions convert a set of image files
to a multi-page pdf file, with one image on each page.
All images are rendered at the same (input) resolution.
The images can be specified as being in a directory, or they
can be in an sarray. The output pdf can be either a file
or an array of bytes in memory. Set 2. These functions are a special case of set 1, where
no scaling or change in quality is requires. For jpeg and
jp2k images, the bytes in each jpeg file can be directly
incorporated into the output pdf, and the wrapping up of
multiple image files is very fast. For non-interlaced png,
the data bytes including the predictors can also be written
directly into the flate pdf data. For other image formats,
transcoding is required, where the image data is first
decompressed and then the G4 or Flate (gzip) encodings are generated. Set 3. These functions convert a set of images in memory
to a multi-page pdf, with one image on each page. The pdf
output can be either a file or an array of bytes in memory. Set 4. These functions implement a pdf output "device driver"
for wrapping (encoding) any number of images on a single page
in pdf. The input can be either an image file or a Pix;
the pdf output can be either a file or an array of bytes in memory. Set 5. These "segmented" functions take a set of image
files, along with optional segmentation information, and
generate a multi-page pdf file, where each page consists
in general of a mixed raster pdf of image and non-image regions.
The segmentation information for each page can be input as
either a mask over the image parts, or as a Boxa of those
regions. Set 6. These "segmented" functions convert an image and
an optional Boxa of image regions into a mixed raster pdf file
for the page. The input image can be either a file or a Pix. Set 7. These functions take a set of single-page pdf files
and concatenates them into a multi-page pdf.
The input can be a set of single page pdf files, or of
pdf 'strings' in memory. The output can be either a file or
an array of bytes in memory. The images in the pdf file can be rendered using a pdf viewer,
such as gv, evince, xpdf or acroread. Reference on the pdf file format:
http://www.adobe.com/devnet/pdf/pdf_reference_archive.html 1. Convert specified image files to pdf (one image file per page)
l_int32 convertFilesToPdf()
l_int32 saConvertFilesToPdf()
l_int32 saConvertFilesToPdfData()
l_int32 selectDefaultPdfEncoding() 2. Convert specified image files to pdf without scaling
l_int32 convertUnscaledFilesToPdf()
l_int32 saConvertUnscaledFilesToPdf()
l_int32 saConvertUnscaledFilesToPdfData()
l_int32 convertUnscaledToPdfData() 3. Convert multiple images to pdf (one image per page)
l_int32 pixaConvertToPdf()
l_int32 pixaConvertToPdfData() 4. Single page, multi-image converters
l_int32 convertToPdf()
l_int32 convertImageDataToPdf()
l_int32 convertToPdfData()
l_int32 convertImageDataToPdfData()
l_int32 pixConvertToPdf()
l_int32 pixWriteStreamPdf()
l_int32 pixWriteMemPdf() 5. Segmented multi-page, multi-image converter
l_int32 convertSegmentedFilesToPdf()
BOXAA *convertNumberedMasksToBoxaa() 6. Segmented single page, multi-image converters
l_int32 convertToPdfSegmented()
l_int32 pixConvertToPdfSegmented()
l_int32 convertToPdfDataSegmented()
l_int32 pixConvertToPdfDataSegmented() 7. Multipage concatenation
l_int32 concatenatePdf()
l_int32 saConcatenatePdf()
l_int32 ptraConcatenatePdf()
l_int32 concatenatePdfToData()
l_int32 saConcatenatePdfToData() The top-level multi-image functions can be visualized as follows:
Output pdf data to file:
convertToPdf() and convertImageDataToPdf()
–> pixConvertToPdf()
–> pixConvertToPdfData() Output pdf data to array in memory:
convertToPdfData() and convertImageDataToPdfData()
–> pixConvertToPdfData() The top-level segmented image functions can be visualized as follows:
Output pdf data to file:
convertToPdfSegmented()
–> pixConvertToPdfSegmented()
–> pixConvertToPdfDataSegmented() Output pdf data to array in memory:
convertToPdfDataSegmented()
–> pixConvertToPdfDataSegmented() For multi-page concatenation, there are three different types of input
(1) directory and optional filename filter
(2) sarray of filenames
(3) ptra of byte arrays of pdf data
and two types of output for the concatenated pdf data
(1) filename
(2) data array and size
High-level interfaces are given for each of the six combinations. Note: When wrapping small images into pdf, it is useful to give
them a relatively low resolution value, to avoid rounding errors
when rendering the images. For example, if you want an image
of width w pixels to be 5 inches wide on a screen, choose a
resolution w/5. The very fast functions in section (2) require neither transcoding
nor parsing of the compressed jpeg file. With three types of image
compression, the compressed strings can be incorporated into
the pdf data without decompression and re-encoding: jpeg, jp2k
and png. The DCTDecode and JPXDecode filters can handle the
entire jpeg and jp2k encoded string as a byte array in the pdf file.
The FlateDecode filter can handle the png compressed image data,
including predictors that occur as the first byte in each
raster line, but it is necessary to store only the png IDAT chunk
data in the pdf array. The alternative for wrapping png images
is to uncompress into a raster (a pix) and then gzip the raster data.
This typically results in a larger pdf file, because it doesn't
use the two-dimensional png predictor. Colormaps, which are found
in png PLTE chunks, must always be pulled out and included separately
in the pdf. For CCITT-G4 compression, you can not simply
include a tiff G4 file – you must either parse it and extract the
G4 compressed data within it, or uncompress to a raster and
G4 compress again.
Definition in file pdfio1.c.
| l_ok concatenatePdf | ( | const char * | dirname, |
| const char * | substr, | ||
| const char * | fileout | ||
| ) |
| [in] | dirname | directory name containing single-page pdf files |
| [in] | substr | [optional] substring filter on filenames; can be NULL |
| [in] | fileout | concatenated pdf file |
Notes:
(1) This only works with leptonica-formatted single-page pdf files.
(2) If substr is not NULL, only filenames that contain
the substring can be returned. If substr == NULL,
none of the filenames are filtered out.
(3) The files in the directory, after optional filtering by
the substring, are lexically sorted in increasing order
before concatenation.
Definition at line 1998 of file pdfio1.c.
References getSortedPathnamesInDirectory(), saConcatenatePdf(), and sarrayDestroy().
| l_ok concatenatePdfToData | ( | const char * | dirname, |
| const char * | substr, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
| [in] | dirname | directory name containing single-page pdf files |
| [in] | substr | [optional] substring filter on filenames; can be NULL |
| [out] | pdata | concatenated pdf data in memory |
| [out] | pnbytes | number of bytes in pdf data |
Notes:
(1) This only works with leptonica-formatted single-page pdf files.
(2) If substr is not NULL, only filenames that contain
the substring can be returned. If substr == NULL,
none of the filenames are filtered out.
(3) The files in the directory, after optional filtering by
the substring, are lexically sorted in increasing order
before concatenation.
Definition at line 2113 of file pdfio1.c.
References getSortedPathnamesInDirectory(), saConcatenatePdfToData(), and sarrayDestroy().
| l_ok convertFilesToPdf | ( | const char * | dirname, |
| const char * | substr, | ||
| l_int32 | res, | ||
| l_float32 | scalefactor, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
| [in] | dirname | directory name containing images |
| [in] | substr | [optional] substring filter on filenames; can be NULL |
| [in] | res | input resolution of all images |
| [in] | scalefactor | scaling factor applied to each image; > 0.0 |
| [in] | type | encoding type (L_JPEG_ENCODE, L_G4_ENCODE, L_FLATE_ENCODE, or 0 for default |
| [in] | quality | used for JPEG only; 0 for default (75) |
| [in] | title | [optional] pdf title; if null, taken from the first image filename |
| [in] | fileout | pdf file of all images |
Notes:
(1) If substr is not NULL, only image filenames that contain
the substring can be used. If substr == NULL, all files
in the directory are used.
(2) The files in the directory, after optional filtering by
the substring, are lexically sorted in increasing order
before concatenation.
(3) The scalefactor is applied to each image before encoding.
If you enter a value <= 0.0, it will be set to 1.0.
(4) Specifying one of the three encoding types for type forces
all images to be compressed with that type. Use 0 to have
the type determined for each image based on depth and whether
or not it has a colormap.
Definition at line 239 of file pdfio1.c.
References getSortedPathnamesInDirectory(), saConvertFilesToPdf(), and sarrayDestroy().
Referenced by pixCompareWithTranslation().
| l_ok convertImageDataToPdf | ( | l_uint8 * | imdata, |
| size_t | size, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | fileout, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| const char * | title, | ||
| L_PDF_DATA ** | plpd, | ||
| l_int32 | position | ||
| ) |
| [in] | imdata | array of formatted image data; e.g., png, jpeg |
| [in] | size | size of image data |
| [in] | type | L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE |
| [in] | quality | used for JPEG only; 0 for default (75) |
| [in] | fileout | output pdf file; only required on last image on page |
| [in] | x,y | location of lower-left corner of image, in pixels, relative to the PostScript origin (0,0) at the lower-left corner of the page |
| [in] | res | override the resolution of the input image, in ppi; use 0 to respect the resolution embedded in the input |
| [in] | title | [optional] pdf title |
| [in,out] | plpd | ptr to lpd, which is created on the first invocation and returned until last image is processed, at which time it is destroyed |
| [in] | position | in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, L_LAST_IMAGE |
Notes:
(1) If res == 0 and the input resolution field is 0,
this will use DEFAULT_INPUT_RES.
(2) See comments in convertToPdf().
Definition at line 1021 of file pdfio1.c.
References L_FLATE_ENCODE, L_G4_ENCODE, L_JPEG_ENCODE, L_LAST_IMAGE, pixConvertToPdf(), pixDestroy(), and pixReadMem().
| l_ok convertImageDataToPdfData | ( | l_uint8 * | imdata, |
| size_t | size, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| const char * | title, | ||
| L_PDF_DATA ** | plpd, | ||
| l_int32 | position | ||
| ) |
| [in] | imdata | array of formatted image data; e.g., png, jpeg |
| [in] | size | size of image data |
| [in] | type | L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE |
| [in] | quality | used for JPEG only; 0 for default (75) |
| [out] | pdata | pdf data in memory |
| [out] | pnbytes | number of bytes in pdf data |
| [in] | x,y | location of lower-left corner of image, in pixels, relative to the PostScript origin (0,0) at the lower-left corner of the page |
| [in] | res | override the resolution of the input image, in ppi; use 0 to respect the resolution embedded in the input |
| [in] | title | [optional] pdf title |
| [out] | plpd | ptr to lpd, which is created on the first invocation and returned until last image is processed, at which time it is destroyed |
| [in] | position | in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, L_LAST_IMAGE |
Notes:
(1) If res == 0 and the input resolution field is 0,
this will use DEFAULT_INPUT_RES.
(2) See comments in convertToPdf().
Definition at line 1154 of file pdfio1.c.
References L_FIRST_IMAGE, pixConvertToPdfData(), pixDestroy(), and pixReadMem().
| BOXAA* convertNumberedMasksToBoxaa | ( | const char * | dirname, |
| const char * | substr, | ||
| l_int32 | numpre, | ||
| l_int32 | numpost | ||
| ) |
| [in] | dirname | directory name containing mask images |
| [in] | substr | [optional] substring filter on filenames; can be NULL |
| [in] | numpre | number of characters in name before number |
| [in] | numpost | number of characters in name after number, up to a dot before an extension including an extension and the dot separator |
Notes:
(1) This is conveniently used to generate the input boxaa
for convertSegmentedFilesToPdf(). It guarantees that the
boxa will be aligned with the page images, even if some
of the boxa are empty.
Definition at line 1535 of file pdfio1.c.
References boxaaCreate(), boxaaInitFull(), boxaaReplaceBoxa(), boxaCreate(), boxaDestroy(), getNumberedPathnamesInDirectory(), L_NOCOPY, pixConnComp(), pixDestroy(), pixRead(), sarrayDestroy(), sarrayGetCount(), and sarrayGetString().
| l_ok convertSegmentedFilesToPdf | ( | const char * | dirname, |
| const char * | substr, | ||
| l_int32 | res, | ||
| l_int32 | type, | ||
| l_int32 | thresh, | ||
| BOXAA * | baa, | ||
| l_int32 | quality, | ||
| l_float32 | scalefactor, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
| [in] | dirname | directory name containing images |
| [in] | substr | [optional] substring filter on filenames; can be NULL |
| [in] | res | input resolution of all images |
| [in] | type | compression type for non-image regions; the image regions are always compressed with L_JPEG_ENCODE |
| [in] | thresh | used for converting gray –> 1 bpp with L_G4_ENCODE |
| [in] | baa | [optional] boxaa of image regions |
| [in] | quality | used for JPEG only; 0 for default (75) |
| [in] | scalefactor | scaling factor applied to each image region |
| [in] | title | [optional] pdf title; if null, taken from the first image filename |
| [in] | fileout | pdf file of all images |
Notes:
(1) If substr is not NULL, only image filenames that contain
the substring can be used. If substr == NULL, all files
in the directory are used.
(2) The files in the directory, after optional filtering by
the substring, are lexically sorted in increasing order
before concatenation.
(3) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without
colormap and many colors, or 32 bpp; FLATE for anything else.
(4) The boxaa, if it exists, contains one boxa of "image regions"
for each image file. The boxa must be aligned with the
sorted set of images.
(5) The scalefactor is applied to each image region. It is
typically < 1.0, to save bytes in the final pdf, because
the resolution is often not critical in non-text regions.
(6) If the non-image regions have pixel depth > 1 and the encoding
type is G4, they are automatically scaled up by 2x and
thresholded. Otherwise, no scaling is performed on them.
(7) Note that this function can be used to generate multipage
G4 compressed pdf from any input, by using boxaa == NULL
and type == L_G4_ENCODE.
Definition at line 1415 of file pdfio1.c.
References boxaaExtendWithInit(), boxaaGetBoxa(), boxaaGetCount(), boxaCreate(), boxaDestroy(), boxaGetCount(), convertToPdfDataSegmented(), getNumberedPathnamesInDirectory(), l_binaryWrite(), l_byteaDestroy(), l_byteaInitFromMem(), L_CLONE, L_NO_COMPACTION, L_NOCOPY, ptraAdd(), ptraConcatenatePdfToData(), ptraCreate(), ptraDestroy(), ptraGetActualCount(), ptraRemove(), sarrayDestroy(), sarrayGetCount(), and sarrayGetString().
| l_ok convertToPdf | ( | const char * | filein, |
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | fileout, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| const char * | title, | ||
| L_PDF_DATA ** | plpd, | ||
| l_int32 | position | ||
| ) |
| [in] | filein | input image file – any format |
| [in] | type | L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE |
| [in] | quality | used for JPEG only; 0 for default (75) |
| [in] | fileout | output pdf file; only required on last image on page |
| [in] | x,y | location of lower-left corner of image, in pixels, relative to the PostScript origin (0,0) at the lower-left corner of the page |
| [in] | res | override the resolution of the input image, in ppi; use 0 to respect the resolution embedded in the input |
| [in] | title | [optional] pdf title; if null, taken from filein |
| [in,out] | plpd | ptr to lpd, which is created on the first invocation and returned until last image is processed, at which time it is destroyed |
| [in] | position | in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, L_LAST_IMAGE |
Notes:
(1) To wrap only one image in pdf, input plpd = NULL, and
the value of position will be ignored:
convertToPdf(... type, quality, x, y, res, NULL, 0);
(2) To wrap multiple images on a single pdf page, this is called
once for each successive image. Do it this way:
L_PDF_DATA *lpd;
convertToPdf(... type, quality, x, y, res, &lpd, L_FIRST_IMAGE);
convertToPdf(... type, quality, x, y, res, &lpd, L_NEXT_IMAGE);
...
convertToPdf(... type, quality, x, y, res, &lpd, L_LAST_IMAGE);
This will write the result to the value of fileout specified
in the first call; succeeding values of fileout are ignored.
On the last call: the pdf data bytes are computed and written
to fileout, lpd is destroyed internally, and the returned
value of lpd is null. So the client has nothing to clean up.
(3) (a) Set res == 0 to respect the resolution embedded in the
image file. If no resolution is embedded, it will be set
to the default value.
(b) Set res to some other value to override the file resolution.
(4) (a) If the input res and the resolution of the output device
are equal, the image will be "displayed" at the same size
as the original.
(b) If the input res is 72, the output device will render
the image at 1 pt/pixel.
(c) Some possible choices for the default input pix resolution are:
72 ppi Render pix on any output device at one pt/pixel
96 ppi Windows default for generated display images
300 ppi Typical default for scanned images.
We choose 300, which is sensible for rendering page images.
However, images come from a variety of sources, and
some are explicitly created for viewing on a display.
Definition at line 950 of file pdfio1.c.
References convertToPdfData(), l_binaryWrite(), L_FLATE_ENCODE, L_G4_ENCODE, L_JPEG_ENCODE, and L_LAST_IMAGE.
| l_ok convertToPdfData | ( | const char * | filein, |
| l_int32 | type, | ||
| l_int32 | quality, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| const char * | title, | ||
| L_PDF_DATA ** | plpd, | ||
| l_int32 | position | ||
| ) |
| [in] | filein | input image file – any format |
| [in] | type | L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE |
| [in] | quality | used for JPEG only; 0 for default (75) |
| [out] | pdata | pdf data in memory |
| [out] | pnbytes | number of bytes in pdf data |
| [in] | x,y | location of lower-left corner of image, in pixels, relative to the PostScript origin (0,0) at the lower-left corner of the page |
| [in] | res | override the resolution of the input image, in ppi; use 0 to respect the resolution embedded in the input |
| [in] | title | [optional] pdf title; if null, use filein |
| [in,out] | plpd | ptr to lpd, which is created on the first invocation and returned until last image is processed, at which time it is destroyed |
| [in] | position | in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, L_LAST_IMAGE |
Notes:
(1) If res == 0 and the input resolution field is 0,
this will use DEFAULT_INPUT_RES.
(2) See comments in convertToPdf().
Definition at line 1086 of file pdfio1.c.
References L_FLATE_ENCODE, L_G4_ENCODE, L_JPEG_ENCODE, pixConvertToPdfData(), pixDestroy(), and pixRead().
Referenced by convertToPdf().
| l_ok convertToPdfDataSegmented | ( | const char * | filein, |
| l_int32 | res, | ||
| l_int32 | type, | ||
| l_int32 | thresh, | ||
| BOXA * | boxa, | ||
| l_int32 | quality, | ||
| l_float32 | scalefactor, | ||
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
| [in] | filein | input image file – any format |
| [in] | res | input image resolution; typ. 300 ppi; use 0 for default |
| [in] | type | compression type for non-image regions; the image regions are always compressed with L_JPEG_ENCODE |
| [in] | thresh | used for converting gray –> 1 bpp with L_G4_ENCODE |
| [in] | boxa | [optional] image regions; can be null |
| [in] | quality | used for jpeg image regions; 0 for default |
| [in] | scalefactor | used for jpeg regions; must be <= 1.0 |
| [in] | title | [optional] pdf title; if null, uses filein |
| [out] | pdata | pdf data in memory |
| [out] | pnbytes | number of bytes in pdf data |
Notes:
(1) If there are no image regions, set boxa == NULL;
quality and scalefactor are ignored.
(2) Typically, scalefactor is < 1.0. The image regions are
Definition at line 1767 of file pdfio1.c.
References L_FLATE_ENCODE, L_G4_ENCODE, L_JPEG_ENCODE, pixConvertToPdfDataSegmented(), pixDestroy(), and pixRead().
Referenced by convertSegmentedFilesToPdf().
| l_ok convertToPdfSegmented | ( | const char * | filein, |
| l_int32 | res, | ||
| l_int32 | type, | ||
| l_int32 | thresh, | ||
| BOXA * | boxa, | ||
| l_int32 | quality, | ||
| l_float32 | scalefactor, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
| [in] | filein | input image file – any format |
| [in] | res | input image resolution; typ. 300 ppi; use 0 for default |
| [in] | type | compression type for non-image regions; the image regions are always compressed with L_JPEG_ENCODE |
| [in] | thresh | used for converting gray –> 1 bpp with L_G4_ENCODE |
| [in] | boxa | [optional] of image regions; can be null |
| [in] | quality | used for jpeg image regions; 0 for default |
| [in] | scalefactor | used for jpeg regions; must be <= 1.0 |
| [in] | title | [optional] pdf title; typically taken from the input file for the pix |
| [in] | fileout | output pdf file |
Notes:
(1) If there are no image regions, set boxa == NULL;
quality and scalefactor are ignored.
(2) Typically, scalefactor is < 1.0, because the image regions
can be rendered at a lower resolution (for better compression)
than the text regions. If scalefactor == 0, we use 1.0.
If the input image is 1 bpp and scalefactor < 1.0, we
use scaleToGray() to downsample the image regions to gray
before compressing them.
(3) If the compression type for non-image regions is L_G4_ENCODE
and bpp > 1, the image is upscaled 2x and thresholded
to 1 bpp. That is the only situation where thresh is used.
(4) The parameter quality is only used for image regions.
If type == L_JPEG_ENCODE, default jpeg quality (75) is
used for the non-image regions.
(5) Processing matrix for non-image regions.
Input G4 JPEG FLATE
----------|---------------------------------------------------
1 bpp | 1x, 1 bpp 1x flate, 1 bpp 1x, 1 bpp
|
cmap | 2x, 1 bpp 1x flate, cmap 1x, cmap
|
2,4 bpp | 2x, 1 bpp 1x flate 1x, 2,4 bpp
no cmap | 2,4 bpp
|
8,32 bpp | 2x, 1 bpp 1x (jpeg) 1x, 8,32 bpp
no cmap | 8,32 bpp
Summary:
(a) if G4 is requested, G4 is used, with 2x upscaling
for all cases except 1 bpp.
(b) if JPEG is requested, use flate encoding for all cases
except 8 bpp without cmap and 32 bpp (rgb).
(c) if FLATE is requested, use flate with no transformation
of the raster data.
(6) Calling options/sequence for these functions:
file –> file (convertToPdfSegmented)
pix –> file (pixConvertToPdfSegmented)
pix –> data (pixConvertToPdfDataSegmented)
file –> data (convertToPdfDataSegmented)
pix –> data (pixConvertToPdfDataSegmented)
Definition at line 1644 of file pdfio1.c.
References L_FLATE_ENCODE, L_G4_ENCODE, L_JPEG_ENCODE, pixConvertToPdfSegmented(), pixDestroy(), and pixRead().
| l_ok convertUnscaledFilesToPdf | ( | const char * | dirname, |
| const char * | substr, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
| [in] | dirname | directory name containing images |
| [in] | substr | [optional] substring filter on filenames; can be NULL |
| [in] | title | [optional] pdf title; if null, taken from the first image filename |
| [in] | fileout | pdf file of all images |
Notes:
(1) If substr is not NULL, only image filenames that contain
the substring can be used. If substr == NULL, all files
in the directory are used.
(2) The files in the directory, after optional filtering by
the substring, are lexically sorted in increasing order
before concatenation.
(3) For jpeg and jp2k, this is very fast because the compressed
data is wrapped up and concatenated. For png and tiffg4,
the images must be read and recompressed.
Definition at line 520 of file pdfio1.c.
References getSortedPathnamesInDirectory(), saConvertUnscaledFilesToPdf(), and sarrayDestroy().
| l_ok convertUnscaledToPdfData | ( | const char * | fname, |
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
| [in] | fname | of image file |
| [in] | title | [optional] pdf title; can be NULL |
| [out] | pdata | output pdf data for image |
| [out] | pnbytes | size of output pdf data |
Definition at line 668 of file pdfio1.c.
References findFileFormat().
Referenced by saConvertUnscaledFilesToPdfData().
| l_ok pixaConvertToPdf | ( | PIXA * | pixa, |
| l_int32 | res, | ||
| l_float32 | scalefactor, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
| [in] | pixa | containing images all at the same resolution |
| [in] | res | override the resolution of each input image, in ppi; use 0 to respect the resolution embedded in the input |
| [in] | scalefactor | scaling factor applied to each image; > 0.0 |
| [in] | type | encoding type (L_JPEG_ENCODE, L_G4_ENCODE, L_FLATE_ENCODE, or 0 for default |
| [in] | quality | used for JPEG only; 0 for default (75) |
| [in] | title | [optional] pdf title |
| [in] | fileout | pdf file of all images |
Notes:
(1) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without
colormap and many colors, or 32 bpp; FLATE for anything else.
(2) The scalefactor must be > 0.0; otherwise it is set to 1.0.
(3) Specifying one of the three encoding types for type forces
all images to be compressed with that type. Use 0 to have
the type determined for each image based on depth and whether
or not it has a colormap.
Definition at line 752 of file pdfio1.c.
References l_binaryWrite(), and pixaConvertToPdfData().
Referenced by convertTiffMultipageToPdf(), pixaCompareInPdf(), pixaSelectToPdf(), and pixCompareWithTranslation().
| l_ok pixaConvertToPdfData | ( | PIXA * | pixa, |
| l_int32 | res, | ||
| l_float32 | scalefactor, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
| [in] | pixa | containing images all at the same resolution |
| [in] | res | input resolution of all images |
| [in] | scalefactor | scaling factor applied to each image; > 0.0 |
| [in] | type | encoding type (L_JPEG_ENCODE, L_G4_ENCODE, L_FLATE_ENCODE, or 0 for default |
| [in] | quality | used for JPEG only; 0 for default (75) |
| [in] | title | [optional] pdf title |
| [out] | pdata | output pdf data (of all images |
| [out] | pnbytes | size of output pdf data |
Notes:
(1) See pixaConvertToPdf().
Definition at line 804 of file pdfio1.c.
References l_byteaDestroy(), l_byteaInitFromMem(), L_CLONE, L_FLATE_ENCODE, L_NO_COMPACTION, pixaGetCount(), pixaGetPix(), pixClone(), pixConvertToPdfData(), pixDestroy(), pixScale(), ptraAdd(), ptraConcatenatePdfToData(), ptraCreate(), ptraDestroy(), ptraGetActualCount(), ptraRemove(), and selectDefaultPdfEncoding().
Referenced by pixaConvertToPdf().
| l_ok pixConvertToPdf | ( | PIX * | pix, |
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | fileout, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| const char * | title, | ||
| L_PDF_DATA ** | plpd, | ||
| l_int32 | position | ||
| ) |
| [in] | pix | |
| [in] | type | L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE |
| [in] | quality | used for JPEG only; 0 for default (75) |
| [in] | fileout | output pdf file; only required on last image on page |
| [in] | x,y | location of lower-left corner of image, in pixels, relative to the PostScript origin (0,0 at the lower-left corner of the page) |
| [in] | res | override the resolution of the input image, in ppi; use 0 to respect the resolution embedded in the input |
| [in] | title | [optional] pdf title |
| [in,out] | plpd | ptr to lpd, which is created on the first invocation and returned until last image is processed |
| [in] | position | in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, L_LAST_IMAGE |
Notes:
(1) If res == 0 and the input resolution field is 0,
this will use DEFAULT_INPUT_RES.
(2) This only writes data to fileout if it is the last
image to be written on the page.
(3) See comments in convertToPdf().
Definition at line 1223 of file pdfio1.c.
References l_binaryWrite(), L_FLATE_ENCODE, L_G4_ENCODE, L_JPEG_ENCODE, L_LAST_IMAGE, and pixConvertToPdfData().
Referenced by convertImageDataToPdf().
| l_ok pixConvertToPdfDataSegmented | ( | PIX * | pixs, |
| l_int32 | res, | ||
| l_int32 | type, | ||
| l_int32 | thresh, | ||
| BOXA * | boxa, | ||
| l_int32 | quality, | ||
| l_float32 | scalefactor, | ||
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
pixConvertToPdfDataSegmented()
| [in] | pixs | any depth, cmap OK |
| [in] | res | input image resolution; typ. 300 ppi; use 0 for default |
| [in] | type | compression type for non-image regions; the image regions are always compressed with L_JPEG_ENCODE |
| [in] | thresh | used for converting gray –> 1 bpp with L_G4_ENCODE |
| [in] | boxa | [optional] of image regions; can be null |
| [in] | quality | used for jpeg image regions; 0 for default |
| [in] | scalefactor | used for jpeg regions; must be <= 1.0 |
| [in] | title | [optional] pdf title; typically taken from the input file for the pix |
| [out] | pdata | pdf data in memory |
| [out] | pnbytes | number of bytes in pdf data |
Notes:
(1) See convertToPdfSegmented() for details.
Definition at line 1834 of file pdfio1.c.
References L_FLATE_ENCODE, L_G4_ENCODE, and L_JPEG_ENCODE.
Referenced by convertToPdfDataSegmented(), and pixConvertToPdfSegmented().
| l_ok pixConvertToPdfSegmented | ( | PIX * | pixs, |
| l_int32 | res, | ||
| l_int32 | type, | ||
| l_int32 | thresh, | ||
| BOXA * | boxa, | ||
| l_int32 | quality, | ||
| l_float32 | scalefactor, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
| [in] | pixs | any depth, cmap OK |
| [in] | res | input image resolution; typ. 300 ppi; use 0 for default |
| [in] | type | compression type for non-image regions; the image regions are always compressed with L_JPEG_ENCODE |
| [in] | thresh | used for converting gray –> 1 bpp with L_G4_ENCODE |
| [in] | boxa | [optional] of image regions; can be null |
| [in] | quality | used for jpeg image regions; 0 for default |
| [in] | scalefactor | used for jpeg regions; must be <= 1.0 |
| [in] | title | [optional] pdf title; typically taken from the input file for the pix |
| [in] | fileout | output pdf file |
Notes:
(1) See convertToPdfSegmented() for details.
Definition at line 1704 of file pdfio1.c.
References l_binaryWrite(), L_FLATE_ENCODE, L_G4_ENCODE, L_JPEG_ENCODE, and pixConvertToPdfDataSegmented().
Referenced by convertToPdfSegmented().
| l_ok pixWriteMemPdf | ( | l_uint8 ** | pdata, |
| size_t * | pnbytes, | ||
| PIX * | pix, | ||
| l_int32 | res, | ||
| const char * | title | ||
| ) |
| [out] | pdata | pdf as byte array |
| [out] | pnbytes | number of bytes in pdf array |
| [in] | pix | all depths, cmap OK |
| [in] | res | override the resolution of the input image, in ppi; use 0 to respect the resolution embedded in the input |
| [in] | title | [optional] pdf title; taken from the first image placed on a page; e.g., an input image filename |
Notes:
(1) This is the simplest interface for writing a single image
with pdf encoding to memory. It uses G4 encoding for 1 bpp,
JPEG encoding for 8 bpp (no cmap) and 32 bpp, and FLATE
encoding for everything else.
Definition at line 1335 of file pdfio1.c.
Referenced by pixWriteStreamPdf().
| l_ok pixWriteStreamPdf | ( | FILE * | fp, |
| PIX * | pix, | ||
| l_int32 | res, | ||
| const char * | title | ||
| ) |
| [in] | fp | file stream opened for writing |
| [in] | pix | all depths, cmap OK |
| [in] | res | override the resolution of the input image, in ppi; use 0 to respect the resolution embedded in the input |
| [in] | title | [optional] pdf title; taken from the first image placed on a page; e.g., an input image filename |
Notes:
(1) This is the simplest interface for writing a single image
with pdf encoding to a stream. It uses G4 encoding for 1 bpp,
JPEG encoding for 8 bpp (no cmap) and 32 bpp, and FLATE
encoding for everything else.
Definition at line 1286 of file pdfio1.c.
References pixWriteMemPdf().
| l_ok ptraConcatenatePdf | ( | L_PTRA * | pa, |
| const char * | fileout | ||
| ) |
| [in] | pa | array of pdf strings, each for a single-page pdf file |
| [in] | fileout | concatenated pdf file |
Notes:
(1) This only works with leptonica-formatted single-page pdf files.
Definition at line 2069 of file pdfio1.c.
References l_binaryWrite(), and ptraConcatenatePdfToData().
| l_ok saConcatenatePdf | ( | SARRAY * | sa, |
| const char * | fileout | ||
| ) |
| [in] | sa | string array of pathnames for single-page pdf files |
| [in] | fileout | concatenated pdf file |
Notes:
(1) This only works with leptonica-formatted single-page pdf files.
Definition at line 2033 of file pdfio1.c.
References l_binaryWrite(), and saConcatenatePdfToData().
Referenced by concatenatePdf().
| l_ok saConcatenatePdfToData | ( | SARRAY * | sa, |
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
| [in] | sa | string array of pathnames for single-page pdf files |
| [out] | pdata | concatenated pdf data in memory |
| [out] | pnbytes | number of bytes in pdf data |
Notes:
(1) This only works with leptonica-formatted single-page pdf files.
Definition at line 2154 of file pdfio1.c.
References l_byteaDestroy(), l_byteaInitFromFile(), L_NO_COMPACTION, L_NOCOPY, ptraAdd(), ptraConcatenatePdfToData(), ptraCreate(), ptraDestroy(), ptraGetActualCount(), ptraRemove(), sarrayGetCount(), and sarrayGetString().
Referenced by concatenatePdfToData(), and saConcatenatePdf().
| l_ok saConvertFilesToPdf | ( | SARRAY * | sa, |
| l_int32 | res, | ||
| l_float32 | scalefactor, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
| [in] | sa | string array of pathnames for images |
| [in] | res | input resolution of all images |
| [in] | scalefactor | scaling factor applied to each image; > 0.0 |
| [in] | type | encoding type (L_JPEG_ENCODE, L_G4_ENCODE, L_FLATE_ENCODE, or 0 for default |
| [in] | quality | used for JPEG only; 0 for default (75) |
| [in] | title | [optional] pdf title; if null, taken from the first image filename |
| [in] | fileout | pdf file of all images |
Notes:
(1) See convertFilesToPdf().
Definition at line 287 of file pdfio1.c.
References l_binaryWrite(), and saConvertFilesToPdfData().
Referenced by convertFilesToPdf().
| l_ok saConvertFilesToPdfData | ( | SARRAY * | sa, |
| l_int32 | res, | ||
| l_float32 | scalefactor, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
| [in] | sa | string array of pathnames for images |
| [in] | res | input resolution of all images |
| [in] | scalefactor | scaling factor applied to each image; > 0.0 |
| [in] | type | encoding type (L_JPEG_ENCODE, L_G4_ENCODE, L_FLATE_ENCODE, or 0 for default |
| [in] | quality | used for JPEG only; 0 for default (75) |
| [in] | title | [optional] pdf title; if null, taken from the first image filename |
| [out] | pdata | output pdf data (of all images |
| [out] | pnbytes | size of output pdf data |
Notes:
(1) See convertFilesToPdf().
Definition at line 340 of file pdfio1.c.
References l_byteaDestroy(), l_byteaInitFromMem(), L_FLATE_ENCODE, L_NO_COMPACTION, L_NOCOPY, pixClone(), pixConvertToPdfData(), pixDestroy(), pixRead(), pixScale(), ptraAdd(), ptraConcatenatePdfToData(), ptraCreate(), ptraDestroy(), ptraGetActualCount(), ptraRemove(), sarrayGetCount(), sarrayGetString(), and selectDefaultPdfEncoding().
Referenced by saConvertFilesToPdf().
| l_ok saConvertUnscaledFilesToPdf | ( | SARRAY * | sa, |
| const char * | title, | ||
| const char * | fileout | ||
| ) |
| [in] | sa | string array of pathnames for images |
| [in] | title | [optional] pdf title; if null, taken from the first image filename |
| [in] | fileout | pdf file of all images |
Notes:
(1) See convertUnscaledFilesToPdf().
Definition at line 558 of file pdfio1.c.
References l_binaryWrite(), and saConvertUnscaledFilesToPdfData().
Referenced by convertUnscaledFilesToPdf().
| l_ok saConvertUnscaledFilesToPdfData | ( | SARRAY * | sa, |
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
saConvertUnscaledFilesToPdfData()
| [in] | sa | string array of pathnames for images |
| [in] | title | [optional] pdf title; if null, taken from the first image filename |
| [out] | pdata | output pdf data (of all images) |
| [out] | pnbytes | size of output pdf data |
Definition at line 596 of file pdfio1.c.
References convertUnscaledToPdfData(), l_byteaDestroy(), l_byteaInitFromMem(), L_NO_COMPACTION, L_NOCOPY, ptraAdd(), ptraConcatenatePdfToData(), ptraCreate(), ptraDestroy(), ptraGetActualCount(), ptraRemove(), sarrayGetCount(), and sarrayGetString().
Referenced by saConvertUnscaledFilesToPdf().
| l_ok selectDefaultPdfEncoding | ( | PIX * | pix, |
| l_int32 * | ptype | ||
| ) |
| [in] | pix | |
| [out] | ptype | L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE |
Notes:
(1) This attempts to choose an encoding for the pix that results
in the smallest file, assuming that if jpeg encoded, it will
use quality = 75. The decision is approximate, in that
(a) all colormapped images will be losslessly encoded with
gzip (flate), and (b) an image with less than about 20 colors
is likely to be smaller if flate encoded than if encoded
as a jpeg (dct). For example, an image made by pixScaleToGray3()
will have 10 colors, and flate encoding will give about
twice the compression as jpeg with quality = 75.
Definition at line 457 of file pdfio1.c.
References L_FLATE_ENCODE, and pixGetDimensions().
Referenced by pixaConvertToPdfData(), and saConvertFilesToPdfData().