139 #include <sys/stat.h> 143 #include "allheaders.h" 145 static const l_int32 INITIAL_PTR_ARRAYSIZE = 50;
167 PROCNAME(
"sarrayCreate");
170 n = INITIAL_PTR_ARRAYSIZE;
173 if ((sa->
array = (
char **)LEPT_CALLOC(n,
sizeof(
char *))) == NULL) {
175 return (
SARRAY *)ERROR_PTR(
"ptr array not made", procName, NULL);
199 PROCNAME(
"sarrayCreateInitialized");
202 return (
SARRAY *)ERROR_PTR(
"n must be > 0", procName, NULL);
204 return (
SARRAY *)ERROR_PTR(
"initstr not defined", procName, NULL);
207 for (i = 0; i < n; i++)
228 char separators[] =
" \n\t";
229 l_int32 i, nsub, size, inword;
232 PROCNAME(
"sarrayCreateWordsFromString");
235 return (
SARRAY *)ERROR_PTR(
"textstr not defined", procName, NULL);
238 size = strlen(
string);
241 for (i = 0; i < size; i++) {
242 if (inword == FALSE &&
243 (
string[i] !=
' ' &&
string[i] !=
'\t' &&
string[i] !=
'\n')) {
246 }
else if (inword == TRUE &&
247 (
string[i] ==
' ' ||
string[i] ==
'\t' ||
string[i] ==
'\n')) {
253 return (
SARRAY *)ERROR_PTR(
"sa not made", procName, NULL);
254 sarraySplitString(sa,
string, separators);
279 l_int32 i, nsub, size, startptr;
280 char *cstring, *substring;
283 PROCNAME(
"sarrayCreateLinesFromString");
286 return (
SARRAY *)ERROR_PTR(
"textstr not defined", procName, NULL);
289 size = strlen(
string);
291 for (i = 0; i < size; i++) {
292 if (
string[i] ==
'\n')
297 return (
SARRAY *)ERROR_PTR(
"sa not made", procName, NULL);
301 if ((cstring =
stringNew(
string)) == NULL) {
303 return (
SARRAY *)ERROR_PTR(
"cstring not made", procName, NULL);
307 for (i = 0; i < size; i++) {
308 if (cstring[i] ==
'\n') {
310 if (i > 0 && cstring[i - 1] ==
'\r')
311 cstring[i - 1] =
'\0';
312 if ((substring =
stringNew(cstring + startptr)) == NULL) {
315 return (
SARRAY *)ERROR_PTR(
"substring not made",
323 if (startptr < size) {
324 if ((substring =
stringNew(cstring + startptr)) == NULL) {
327 return (
SARRAY *)ERROR_PTR(
"substring not made",
335 sarraySplitString(sa,
string,
"\r\n");
360 PROCNAME(
"sarrayDestroy");
363 L_WARNING(
"ptr address is NULL!\n", procName);
366 if ((sa = *psa) == NULL)
372 for (i = 0; i < sa->
n; i++) {
374 LEPT_FREE(sa->
array[i]);
376 LEPT_FREE(sa->
array);
398 PROCNAME(
"sarrayCopy");
401 return (
SARRAY *)ERROR_PTR(
"sa not defined", procName, NULL);
404 return (
SARRAY *)ERROR_PTR(
"csa not made", procName, NULL);
406 for (i = 0; i < sa->
n; i++)
422 PROCNAME(
"sarrayClone");
425 return (
SARRAY *)ERROR_PTR(
"sa not defined", procName, NULL);
452 PROCNAME(
"sarrayAddString");
455 return ERROR_INT(
"sa not defined", procName, 1);
457 return ERROR_INT(
"string not defined", procName, 1);
459 return ERROR_INT(
"invalid copyflag", procName, 1);
468 sa->
array[n] = (
char *)
string;
483 PROCNAME(
"sarrayExtendArray");
486 return ERROR_INT(
"sa not defined", procName, 1);
489 sizeof(
char *) * sa->
nalloc,
490 2 *
sizeof(
char *) * sa->
nalloc)) == NULL)
491 return ERROR_INT(
"new ptr array not returned", procName, 1);
511 l_int32 i, n, nalloc;
513 PROCNAME(
"sarrayRemoveString");
516 return (
char *)ERROR_PTR(
"sa not defined", procName, NULL);
519 return (
char *)ERROR_PTR(
"array not returned", procName, NULL);
521 if (index < 0 || index >= n)
522 return (
char *)ERROR_PTR(
"array index out of bounds", procName, NULL);
524 string = array[
index];
530 for (i =
index; i < n - 1; i++)
531 array[i] = array[i + 1];
565 PROCNAME(
"sarrayReplaceString");
568 return ERROR_INT(
"sa not defined", procName, 1);
570 if (index < 0 || index >= n)
571 return ERROR_INT(
"array index out of bounds", procName, 1);
573 return ERROR_INT(
"newstr not defined", procName, 1);
575 return ERROR_INT(
"invalid copyflag", procName, 1);
598 PROCNAME(
"sarrayClear");
601 return ERROR_INT(
"sa not defined", procName, 1);
602 for (i = 0; i < sa->
n; i++) {
603 LEPT_FREE(sa->
array[i]);
623 PROCNAME(
"sarrayGetCount");
626 return ERROR_INT(
"sa not defined", procName, 0);
652 PROCNAME(
"sarrayGetArray");
655 return (
char **)ERROR_PTR(
"sa not defined", procName, NULL);
658 if (pnalloc) *pnalloc = sa->
nalloc;
685 PROCNAME(
"sarrayGetString");
688 return (
char *)ERROR_PTR(
"sa not defined", procName, NULL);
689 if (index < 0 || index >= sa->
n)
690 return (
char *)ERROR_PTR(
"index not valid", procName, NULL);
692 return (
char *)ERROR_PTR(
"invalid copyflag", procName, NULL);
710 PROCNAME(
"sarrayGetRefcount");
713 return ERROR_INT(
"sa not defined", procName, UNDEF);
729 PROCNAME(
"sarrayChangeRefcount");
732 return ERROR_INT(
"sa not defined", procName, UNDEF);
766 PROCNAME(
"sarrayToString");
769 return (
char *)ERROR_PTR(
"sa not defined", procName, NULL);
803 char *dest, *src, *str;
804 l_int32 n, i, last, size,
index, len;
806 PROCNAME(
"sarrayToStringRange");
809 return (
char *)ERROR_PTR(
"sa not defined", procName, NULL);
810 if (addnlflag != 0 && addnlflag != 1 && addnlflag != 2)
811 return (
char *)ERROR_PTR(
"invalid addnlflag", procName, NULL);
825 return (
char *)ERROR_PTR(
"first not valid", procName, NULL);
829 if (first < 0 || first >= n)
830 return (
char *)ERROR_PTR(
"first not valid", procName, NULL);
831 if (nstrings == 0 || (nstrings > n - first))
832 nstrings = n - first;
833 last = first + nstrings - 1;
836 for (i = first; i <= last; i++) {
838 return (
char *)ERROR_PTR(
"str not found", procName, NULL);
839 size += strlen(str) + 2;
842 if ((dest = (
char *)LEPT_CALLOC(size + 1,
sizeof(
char))) == NULL)
843 return (
char *)ERROR_PTR(
"dest not made", procName, NULL);
846 for (i = first; i <= last; i++) {
849 memcpy(dest +
index, src, len);
851 if (addnlflag == 1) {
854 }
else if (addnlflag == 2) {
886 PROCNAME(
"sarrayJoin");
889 return ERROR_INT(
"sa1 not defined", procName, 1);
891 return ERROR_INT(
"sa2 not defined", procName, 1);
894 for (i = 0; i < n; i++) {
928 PROCNAME(
"sarrayAppendRange");
931 return ERROR_INT(
"sa1 not defined", procName, 1);
933 return ERROR_INT(
"sa2 not defined", procName, 1);
938 if (end < 0 || end >= n)
941 return ERROR_INT(
"start > end", procName, 1);
943 for (i = start; i <= end; i++) {
974 const char *padstring)
978 PROCNAME(
"sarrayPadToSameSize");
981 return ERROR_INT(
"both sa1 and sa2 not defined", procName, 1);
986 for (i = n1; i < n2; i++)
988 }
else if (n1 > n2) {
989 for (i = n2; i < n1; i++)
1033 char emptystring[] =
"";
1034 l_int32 n, i, len, totlen;
1037 PROCNAME(
"sarrayConvertWordsToLines");
1040 return (
SARRAY *)ERROR_PTR(
"sa not defined", procName, NULL);
1046 for (i = 0; i < n; i++) {
1059 }
else if (totlen == 0 && len + 1 > linesize) {
1061 }
else if (totlen + len + 1 > linesize) {
1098 sarraySplitString(
SARRAY *sa,
1100 const char *separators)
1102 char *cstr, *substr, *saveptr;
1104 PROCNAME(
"sarraySplitString");
1107 return ERROR_INT(
"sa not defined", procName, 1);
1109 return ERROR_INT(
"str not defined", procName, 1);
1111 return ERROR_INT(
"separators not defined", procName, 1);
1115 substr =
strtokSafe(cstr, separators, &saveptr);
1118 while ((substr =
strtokSafe(NULL, separators, &saveptr)))
1149 l_int32 n, i, offset, found;
1152 PROCNAME(
"sarraySelectBySubstring");
1155 return (
SARRAY *)ERROR_PTR(
"sain not defined", procName, NULL);
1158 if (!substr || n == 0)
1162 for (i = 0; i < n; i++) {
1165 strlen(substr), &offset, &found);
1199 PROCNAME(
"sarraySelectByRange");
1202 return (
SARRAY *)ERROR_PTR(
"sain not defined", procName, NULL);
1203 if (first < 0) first = 0;
1205 if (last <= 0) last = n - 1;
1207 L_WARNING(
"last > n - 1; setting to n - 1\n", procName);
1211 return (
SARRAY *)ERROR_PTR(
"first must be >= last", procName, NULL);
1214 for (i = first; i <= last; i++) {
1262 l_int32 *pactualstart,
1269 l_int32 n, i, offset, found;
1271 PROCNAME(
"sarrayParseRange");
1274 return ERROR_INT(
"sa not defined", procName, 1);
1275 if (!pactualstart || !pend || !pnewstart)
1276 return ERROR_INT(
"not all range addresses defined", procName, 1);
1278 *pactualstart = *pend = *pnewstart = n;
1280 return ERROR_INT(
"substr not defined", procName, 1);
1283 if (start < 0 || start >= n)
1285 for (i = start; i < n; i++) {
1288 strlen(substr), &offset, &found);
1292 if (!found || offset != loc)
break;
1300 *pactualstart = start;
1301 for (i = start + 1; i < n; i++) {
1304 strlen(substr), &offset, &found);
1308 if (found && offset == loc)
break;
1318 for (i = start; i < n; i++) {
1321 strlen(substr), &offset, &found);
1325 if (!found || offset != loc)
break;
1350 PROCNAME(
"sarrayRead");
1353 return (
SARRAY *)ERROR_PTR(
"filename not defined", procName, NULL);
1356 return (
SARRAY *)ERROR_PTR(
"stream not opened", procName, NULL);
1360 return (
SARRAY *)ERROR_PTR(
"sa not read", procName, NULL);
1385 l_int32 i, n, size,
index, bufsize, version, ignore,
success;
1388 PROCNAME(
"sarrayReadStream");
1391 return (
SARRAY *)ERROR_PTR(
"stream not defined", procName, NULL);
1393 if (fscanf(
fp,
"\nSarray Version %d\n", &version) != 1)
1394 return (
SARRAY *)ERROR_PTR(
"not an sarray file", procName, NULL);
1396 return (
SARRAY *)ERROR_PTR(
"invalid sarray version", procName, NULL);
1397 if (fscanf(
fp,
"Number of strings = %d\n", &n) != 1)
1398 return (
SARRAY *)ERROR_PTR(
"error on # strings", procName, NULL);
1400 return (
SARRAY *)ERROR_PTR(
"more than 2^24 strings!", procName, NULL);
1404 return (
SARRAY *)ERROR_PTR(
"sa not made", procName, NULL);
1406 stringbuf = (
char *)LEPT_CALLOC(bufsize,
sizeof(
char));
1408 for (i = 0; i < n; i++) {
1410 if ((fscanf(
fp,
"%d[%d]:", &
index, &size) != 2) || (size > (1 << 30))) {
1412 L_ERROR(
"error on string size\n", procName);
1416 if (size > bufsize - 5) {
1417 LEPT_FREE(stringbuf);
1418 bufsize = (l_int32)(1.5 * size);
1419 stringbuf = (
char *)LEPT_CALLOC(bufsize,
sizeof(
char));
1422 if (fread(stringbuf, 1, size + 3,
fp) != size + 3) {
1424 L_ERROR(
"error reading string\n", procName);
1428 stringbuf[size + 2] =
'\0';
1432 ignore = fscanf(
fp,
"\n");
1435 LEPT_FREE(stringbuf);
1455 PROCNAME(
"sarrayReadMem");
1458 return (
SARRAY *)ERROR_PTR(
"data not defined", procName, NULL);
1460 return (
SARRAY *)ERROR_PTR(
"stream not opened", procName, NULL);
1464 if (!sa) L_ERROR(
"sarray not read\n", procName);
1483 PROCNAME(
"sarrayWrite");
1486 return ERROR_INT(
"filename not defined", procName, 1);
1488 return ERROR_INT(
"sa not defined", procName, 1);
1491 return ERROR_INT(
"stream not opened", procName, 1);
1495 return ERROR_INT(
"sa not written to stream", procName, 1);
1519 PROCNAME(
"sarrayWriteStream");
1522 return ERROR_INT(
"stream not defined", procName, 1);
1524 return ERROR_INT(
"sa not defined", procName, 1);
1528 fprintf(
fp,
"Number of strings = %d\n", n);
1529 for (i = 0; i < n; i++) {
1530 len = strlen(sa->
array[i]);
1531 fprintf(
fp,
" %d[%d]: %s\n", i, len, sa->
array[i]);
1560 PROCNAME(
"sarrayWriteMem");
1562 if (pdata) *pdata = NULL;
1563 if (psize) *psize = 0;
1565 return ERROR_INT(
"&data not defined", procName, 1);
1567 return ERROR_INT(
"&size not defined", procName, 1);
1569 return ERROR_INT(
"sa not defined", procName, 1);
1572 if ((
fp = open_memstream((
char **)pdata, psize)) == NULL)
1573 return ERROR_INT(
"stream not opened", procName, 1);
1576 L_INFO(
"work-around: writing to a temp file\n", procName);
1579 return ERROR_INT(
"tmpfile stream not opened", procName, 1);
1581 if ((
fp = tmpfile()) == NULL)
1582 return ERROR_INT(
"tmpfile stream not opened", procName, 1);
1606 PROCNAME(
"sarrayAppend");
1609 return ERROR_INT(
"filename not defined", procName, 1);
1611 return ERROR_INT(
"sa not defined", procName, 1);
1614 return ERROR_INT(
"stream not opened", procName, 1);
1617 return ERROR_INT(
"sa not appended to stream", procName, 1);
1678 PROCNAME(
"getNumberedPathnamesInDirectory");
1681 return (
SARRAY *)ERROR_PTR(
"dirname not defined", procName, NULL);
1684 return (
SARRAY *)ERROR_PTR(
"sa not made", procName, NULL);
1722 char *fname, *fullname;
1724 SARRAY *sa, *safiles, *saout;
1726 PROCNAME(
"getSortedPathnamesInDirectory");
1729 return (
SARRAY *)ERROR_PTR(
"dirname not defined", procName, NULL);
1732 return (
SARRAY *)ERROR_PTR(
"sa not made", procName, NULL);
1737 L_WARNING(
"no files found\n", procName);
1743 first = L_MIN(L_MAX(first, 0), n - 1);
1746 last = L_MIN(first + nfiles - 1, n - 1);
1749 for (i = first; i <= last; i++) {
1751 fullname =
pathJoin(dirname, fname);
1783 l_int32 i, nfiles, num,
index;
1786 PROCNAME(
"convertSortedToNumberedPathnames");
1789 return (
SARRAY *)ERROR_PTR(
"sa not defined", procName, NULL);
1798 for (i = nfiles - 1; i >= 0; i--) {
1801 if (num < 0)
continue;
1802 num = L_MIN(num + 1, maxnum);
1812 for (i = 0; i < nfiles; i++) {
1815 if (index < 0 || index >= num)
continue;
1817 if (str[0] !=
'\0') {
1818 L_WARNING(
"\n Multiple files with same number: %d\n",
1861 char dir[PATH_MAX + 1];
1862 char *realdir, *stat_path, *ignore;
1866 struct dirent *pdirentry;
1870 PROCNAME(
"getFilenamesInDirectory");
1873 return (
SARRAY *)ERROR_PTR(
"dirname not defined", procName, NULL);
1879 ignore = realpath(dirname, dir);
1881 if ((pdir = opendir(realdir)) == NULL) {
1883 return (
SARRAY *)ERROR_PTR(
"pdir not opened", procName, NULL);
1887 while ((pdirentry = readdir(pdir))) {
1889 stat_ret = fstatat(dfd, pdirentry->d_name, &st, 0);
1891 size = strlen(realdir) + strlen(pdirentry->d_name) + 2;
1892 if (size > PATH_MAX) {
1893 L_ERROR(
"size = %lu too large; skipping\n", procName,
1894 (
unsigned long)size);
1897 stat_path = (
char *)LEPT_CALLOC(size, 1);
1898 snprintf(stat_path, size,
"%s/%s", realdir, pdirentry->d_name);
1899 stat_ret = stat(stat_path, &st);
1900 LEPT_FREE(stat_path);
1902 if (stat_ret == 0 && S_ISDIR(st.st_mode))
1914 #include <windows.h> 1921 HANDLE hFind = INVALID_HANDLE_VALUE;
1923 WIN32_FIND_DATAA ffd;
1925 PROCNAME(
"getFilenamesInDirectory");
1928 return (
SARRAY *)ERROR_PTR(
"dirname not defined", procName, NULL);
1934 if (strlen(pszDir) + 1 > MAX_PATH) {
1936 return (
SARRAY *)ERROR_PTR(
"dirname is too long", procName, NULL);
1941 return (
SARRAY *)ERROR_PTR(
"safiles not made", procName, NULL);
1944 hFind = FindFirstFileA(pszDir, &ffd);
1945 if (INVALID_HANDLE_VALUE == hFind) {
1948 return (
SARRAY *)ERROR_PTR(
"hFind not opened", procName, NULL);
1951 while (FindNextFileA(hFind, &ffd) != 0) {
1952 if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
char * sarrayToString(SARRAY *sa, l_int32 addnlflag)
sarrayToString()
SARRAY * sarrayCopy(SARRAY *sa)
sarrayCopy()
char * genPathname(const char *dir, const char *fname)
genPathname()
#define SARRAY_VERSION_NUMBER
char * stringNew(const char *src)
stringNew()
char * sarrayRemoveString(SARRAY *sa, l_int32 index)
sarrayRemoveString()
SARRAY * sarrayCreate(l_int32 n)
sarrayCreate()
l_ok sarrayWriteStream(FILE *fp, SARRAY *sa)
sarrayWriteStream()
char * sarrayToStringRange(SARRAY *sa, l_int32 first, l_int32 nstrings, l_int32 addnlflag)
sarrayToStringRange()
SARRAY * getFilenamesInDirectory(const char *dirname)
getFilenamesInDirectory()
FILE * fopenReadFromMemory(const l_uint8 *data, size_t size)
fopenReadFromMemory()
l_ok sarrayWriteMem(l_uint8 **pdata, size_t *psize, SARRAY *sa)
sarrayWriteMem()
l_ok sarrayPadToSameSize(SARRAY *sa1, SARRAY *sa2, const char *padstring)
sarrayPadToSameSize()
void * reallocNew(void **pindata, l_int32 oldsize, l_int32 newsize)
reallocNew()
SARRAY * sarrayConvertWordsToLines(SARRAY *sa, l_int32 linesize)
sarrayConvertWordsToLines()
SARRAY * getNumberedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost, l_int32 maxnum)
getNumberedPathnamesInDirectory()
l_ok sarrayAppendRange(SARRAY *sa1, SARRAY *sa2, l_int32 start, l_int32 end)
sarrayAppendRange()
l_ok sarrayWrite(const char *filename, SARRAY *sa)
sarrayWrite()
l_ok sarrayAddString(SARRAY *sa, const char *string, l_int32 copyflag)
sarrayAddString()
l_ok sarrayAppend(const char *filename, SARRAY *sa)
sarrayAppend()
FILE * fopenWriteWinTempfile()
fopenWriteWinTempfile()
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
char ** sarrayGetArray(SARRAY *sa, l_int32 *pnalloc, l_int32 *pn)
sarrayGetArray()
l_ok sarrayClear(SARRAY *sa)
sarrayClear()
SARRAY * sarrayReadMem(const l_uint8 *data, size_t size)
sarrayReadMem()
SARRAY * sarrayCreateLinesFromString(const char *string, l_int32 blankflag)
sarrayCreateLinesFromString()
SARRAY * getSortedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 first, l_int32 nfiles)
getSortedPathnamesInDirectory()
SARRAY * sarrayClone(SARRAY *sa)
sarrayClone()
l_int32 extractNumberFromFilename(const char *fname, l_int32 numpre, l_int32 numpost)
extractNumberFromFilename()
SARRAY * sarraySelectByRange(SARRAY *sain, l_int32 first, l_int32 last)
sarraySelectByRange()
FILE * fopenWriteStream(const char *filename, const char *modestring)
fopenWriteStream()
FILE * fopenReadStream(const char *filename)
fopenReadStream()
static l_int32 sarrayExtendArray(SARRAY *sa)
sarrayExtendArray()
l_uint8 * l_binaryReadStream(FILE *fp, size_t *pnbytes)
l_binaryReadStream()
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
l_int32 sarrayGetRefcount(SARRAY *sa)
sarrayGetRefCount()
SARRAY * convertSortedToNumberedPathnames(SARRAY *sa, l_int32 numpre, l_int32 numpost, l_int32 maxnum)
convertSortedToNumberedPathnames()
SARRAY * sarrayRead(const char *filename)
sarrayRead()
l_ok sarrayJoin(SARRAY *sa1, SARRAY *sa2)
sarrayJoin()
l_ok sarrayReplaceString(SARRAY *sa, l_int32 index, char *newstr, l_int32 copyflag)
sarrayReplaceString()
SARRAY * sarraySort(SARRAY *saout, SARRAY *sain, l_int32 sortorder)
sarraySort()
char * pathJoin(const char *dir, const char *fname)
pathJoin()
char * stringJoin(const char *src1, const char *src2)
stringJoin()
l_ok sarrayChangeRefcount(SARRAY *sa, l_int32 delta)
sarrayChangeRefCount()
SARRAY * sarrayCreateInitialized(l_int32 n, const char *initstr)
sarrayCreateInitialized()
l_ok convertSepCharsInPath(char *path, l_int32 type)
convertSepCharsInPath()
char * strtokSafe(char *cstr, const char *seps, char **psaveptr)
strtokSafe()
l_int32 sarrayParseRange(SARRAY *sa, l_int32 start, l_int32 *pactualstart, l_int32 *pend, l_int32 *pnewstart, const char *substr, l_int32 loc)
sarrayParseRange()
SARRAY * sarrayReadStream(FILE *fp)
sarrayReadStream()
l_ok arrayFindSequence(const l_uint8 *data, size_t datalen, const l_uint8 *sequence, size_t seqlen, l_int32 *poffset, l_int32 *pfound)
arrayFindSequence()
SARRAY * sarraySelectBySubstring(SARRAY *sain, const char *substr)
sarraySelectBySubstring()
SARRAY * sarrayCreateWordsFromString(const char *string)
sarrayCreateWordsFromString()
static const l_int32 L_BUF_SIZE
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()