Leptonica  1.77.0
Image processing and image analysis suite
numafunc2.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
134 #include <math.h>
135 #include "allheaders.h"
136 
137  /* bin sizes in numaMakeHistogram() */
138 static const l_int32 BinSizeArray[] = {2, 5, 10, 20, 50, 100, 200, 500, 1000,\
139  2000, 5000, 10000, 20000, 50000, 100000, 200000,\
140  500000, 1000000, 2000000, 5000000, 10000000,\
141  200000000, 50000000, 100000000};
142 static const l_int32 NBinSizes = 24;
143 
144 
145 #ifndef NO_CONSOLE_IO
146 #define DEBUG_HISTO 0
147 #define DEBUG_CROSSINGS 0
148 #define DEBUG_FREQUENCY 0
149 #endif /* ~NO_CONSOLE_IO */
150 
151 
152 /*----------------------------------------------------------------------*
153  * Morphological operations *
154  *----------------------------------------------------------------------*/
176 NUMA *
178  l_int32 size)
179 {
180 l_int32 i, j, n, hsize, len;
181 l_float32 minval;
182 l_float32 *fa, *fas, *fad;
183 NUMA *nad;
184 
185  PROCNAME("numaErode");
186 
187  if (!nas)
188  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
189  if (size <= 0)
190  return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
191  if ((size & 1) == 0 ) {
192  L_WARNING("sel size must be odd; increasing by 1\n", procName);
193  size++;
194  }
195 
196  if (size == 1)
197  return numaCopy(nas);
198 
199  /* Make a source fa (fas) that has an added (size / 2) boundary
200  * on left and right, contains a copy of nas in the interior region
201  * (between 'size' and 'size + n', and has large values
202  * inserted in the boundary (because it is an erosion). */
203  n = numaGetCount(nas);
204  hsize = size / 2;
205  len = n + 2 * hsize;
206  if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL)
207  return (NUMA *)ERROR_PTR("fas not made", procName, NULL);
208  for (i = 0; i < hsize; i++)
209  fas[i] = 1.0e37;
210  for (i = hsize + n; i < len; i++)
211  fas[i] = 1.0e37;
212  fa = numaGetFArray(nas, L_NOCOPY);
213  for (i = 0; i < n; i++)
214  fas[hsize + i] = fa[i];
215 
216  nad = numaMakeConstant(0, n);
217  numaCopyParameters(nad, nas);
218  fad = numaGetFArray(nad, L_NOCOPY);
219  for (i = 0; i < n; i++) {
220  minval = 1.0e37; /* start big */
221  for (j = 0; j < size; j++)
222  minval = L_MIN(minval, fas[i + j]);
223  fad[i] = minval;
224  }
225 
226  LEPT_FREE(fas);
227  return nad;
228 }
229 
230 
245 NUMA *
247  l_int32 size)
248 {
249 l_int32 i, j, n, hsize, len;
250 l_float32 maxval;
251 l_float32 *fa, *fas, *fad;
252 NUMA *nad;
253 
254  PROCNAME("numaDilate");
255 
256  if (!nas)
257  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
258  if (size <= 0)
259  return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
260  if ((size & 1) == 0 ) {
261  L_WARNING("sel size must be odd; increasing by 1\n", procName);
262  size++;
263  }
264 
265  if (size == 1)
266  return numaCopy(nas);
267 
268  /* Make a source fa (fas) that has an added (size / 2) boundary
269  * on left and right, contains a copy of nas in the interior region
270  * (between 'size' and 'size + n', and has small values
271  * inserted in the boundary (because it is a dilation). */
272  n = numaGetCount(nas);
273  hsize = size / 2;
274  len = n + 2 * hsize;
275  if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL)
276  return (NUMA *)ERROR_PTR("fas not made", procName, NULL);
277  for (i = 0; i < hsize; i++)
278  fas[i] = -1.0e37;
279  for (i = hsize + n; i < len; i++)
280  fas[i] = -1.0e37;
281  fa = numaGetFArray(nas, L_NOCOPY);
282  for (i = 0; i < n; i++)
283  fas[hsize + i] = fa[i];
284 
285  nad = numaMakeConstant(0, n);
286  numaCopyParameters(nad, nas);
287  fad = numaGetFArray(nad, L_NOCOPY);
288  for (i = 0; i < n; i++) {
289  maxval = -1.0e37; /* start small */
290  for (j = 0; j < size; j++)
291  maxval = L_MAX(maxval, fas[i + j]);
292  fad[i] = maxval;
293  }
294 
295  LEPT_FREE(fas);
296  return nad;
297 }
298 
299 
314 NUMA *
316  l_int32 size)
317 {
318 NUMA *nat, *nad;
319 
320  PROCNAME("numaOpen");
321 
322  if (!nas)
323  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
324  if (size <= 0)
325  return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
326  if ((size & 1) == 0 ) {
327  L_WARNING("sel size must be odd; increasing by 1\n", procName);
328  size++;
329  }
330 
331  if (size == 1)
332  return numaCopy(nas);
333 
334  nat = numaErode(nas, size);
335  nad = numaDilate(nat, size);
336  numaDestroy(&nat);
337  return nad;
338 }
339 
340 
361 NUMA *
363  l_int32 size)
364 {
365 NUMA *nab, *nat1, *nat2, *nad;
366 
367  PROCNAME("numaClose");
368 
369  if (!nas)
370  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
371  if (size <= 0)
372  return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
373  if ((size & 1) == 0 ) {
374  L_WARNING("sel size must be odd; increasing by 1\n", procName);
375  size++;
376  }
377 
378  if (size == 1)
379  return numaCopy(nas);
380 
381  nab = numaAddBorder(nas, size, size, 0); /* to preserve extensivity */
382  nat1 = numaDilate(nab, size);
383  nat2 = numaErode(nat1, size);
384  nad = numaRemoveBorder(nat2, size, size);
385  numaDestroy(&nab);
386  numaDestroy(&nat1);
387  numaDestroy(&nat2);
388  return nad;
389 }
390 
391 
392 /*----------------------------------------------------------------------*
393  * Other transforms *
394  *----------------------------------------------------------------------*/
408 NUMA *
410  l_float32 shift,
411  l_float32 scale)
412 {
413 l_int32 i, n;
414 l_float32 val;
415 NUMA *nad;
416 
417  PROCNAME("numaTransform");
418 
419  if (!nas)
420  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
421  n = numaGetCount(nas);
422  if ((nad = numaCreate(n)) == NULL)
423  return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
424  numaCopyParameters(nad, nas);
425  for (i = 0; i < n; i++) {
426  numaGetFValue(nas, i, &val);
427  val = scale * (val + shift);
428  numaAddNumber(nad, val);
429  }
430  return nad;
431 }
432 
433 
445 l_ok
447  l_int32 first,
448  l_int32 last,
449  l_float32 *pmean,
450  l_float32 *pvar,
451  l_float32 *prvar)
452 {
453 l_int32 i, n, ni;
454 l_float32 sum, sumsq, val, mean, var;
455 
456  PROCNAME("numaSimpleStats");
457 
458  if (pmean) *pmean = 0.0;
459  if (pvar) *pvar = 0.0;
460  if (prvar) *prvar = 0.0;
461  if (!pmean && !pvar && !prvar)
462  return ERROR_INT("nothing requested", procName, 1);
463  if (!na)
464  return ERROR_INT("na not defined", procName, 1);
465  if ((n = numaGetCount(na)) == 0)
466  return ERROR_INT("na is empty", procName, 1);
467  first = L_MAX(0, first);
468  if (last < 0) last = n - 1;
469  if (first >= n)
470  return ERROR_INT("invalid first", procName, 1);
471  if (last >= n) {
472  L_WARNING("last = %d is beyond max index = %d; adjusting\n",
473  procName, last, n - 1);
474  last = n - 1;
475  }
476  if (first > last)
477  return ERROR_INT("first > last\n", procName, 1);
478  ni = last - first + 1;
479  sum = sumsq = 0.0;
480  for (i = first; i <= last; i++) {
481  numaGetFValue(na, i, &val);
482  sum += val;
483  sumsq += val * val;
484  }
485 
486  mean = sum / ni;
487  if (pmean)
488  *pmean = mean;
489  if (pvar || prvar) {
490  var = sumsq / ni - mean * mean;
491  if (pvar) *pvar = var;
492  if (prvar) *prvar = sqrtf(var);
493  }
494 
495  return 0;
496 }
497 
498 
530 l_ok
532  l_int32 wc,
533  NUMA **pnam,
534  NUMA **pnams,
535  NUMA **pnav,
536  NUMA **pnarv)
537 {
538 NUMA *nam, *nams;
539 
540  PROCNAME("numaWindowedStats");
541 
542  if (!nas)
543  return ERROR_INT("nas not defined", procName, 1);
544  if (2 * wc + 1 > numaGetCount(nas))
545  L_WARNING("filter wider than input array!\n", procName);
546 
547  if (!pnav && !pnarv) {
548  if (pnam) *pnam = numaWindowedMean(nas, wc);
549  if (pnams) *pnams = numaWindowedMeanSquare(nas, wc);
550  return 0;
551  }
552 
553  nam = numaWindowedMean(nas, wc);
554  nams = numaWindowedMeanSquare(nas, wc);
555  numaWindowedVariance(nam, nams, pnav, pnarv);
556  if (pnam)
557  *pnam = nam;
558  else
559  numaDestroy(&nam);
560  if (pnams)
561  *pnams = nams;
562  else
563  numaDestroy(&nams);
564  return 0;
565 }
566 
567 
581 NUMA *
583  l_int32 wc)
584 {
585 l_int32 i, n, n1, width;
586 l_float32 sum, norm;
587 l_float32 *fa1, *fad, *suma;
588 NUMA *na1, *nad;
589 
590  PROCNAME("numaWindowedMean");
591 
592  if (!nas)
593  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
594  n = numaGetCount(nas);
595  width = 2 * wc + 1; /* filter width */
596  if (width > n)
597  L_WARNING("filter wider than input array!\n", procName);
598 
599  na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER);
600  n1 = n + 2 * wc;
601  fa1 = numaGetFArray(na1, L_NOCOPY);
602  nad = numaMakeConstant(0, n);
603  fad = numaGetFArray(nad, L_NOCOPY);
604 
605  /* Make sum array; note the indexing */
606  if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) {
607  numaDestroy(&na1);
608  numaDestroy(&nad);
609  return (NUMA *)ERROR_PTR("suma not made", procName, NULL);
610  }
611  sum = 0.0;
612  suma[0] = 0.0;
613  for (i = 0; i < n1; i++) {
614  sum += fa1[i];
615  suma[i + 1] = sum;
616  }
617 
618  norm = 1. / (2 * wc + 1);
619  for (i = 0; i < n; i++)
620  fad[i] = norm * (suma[width + i] - suma[i]);
621 
622  LEPT_FREE(suma);
623  numaDestroy(&na1);
624  return nad;
625 }
626 
627 
641 NUMA *
643  l_int32 wc)
644 {
645 l_int32 i, n, n1, width;
646 l_float32 sum, norm;
647 l_float32 *fa1, *fad, *suma;
648 NUMA *na1, *nad;
649 
650  PROCNAME("numaWindowedMeanSquare");
651 
652  if (!nas)
653  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
654  n = numaGetCount(nas);
655  width = 2 * wc + 1; /* filter width */
656  if (width > n)
657  L_WARNING("filter wider than input array!\n", procName);
658 
659  na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER);
660  n1 = n + 2 * wc;
661  fa1 = numaGetFArray(na1, L_NOCOPY);
662  nad = numaMakeConstant(0, n);
663  fad = numaGetFArray(nad, L_NOCOPY);
664 
665  /* Make sum array; note the indexing */
666  if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) {
667  numaDestroy(&na1);
668  numaDestroy(&nad);
669  return (NUMA *)ERROR_PTR("suma not made", procName, NULL);
670  }
671  sum = 0.0;
672  suma[0] = 0.0;
673  for (i = 0; i < n1; i++) {
674  sum += fa1[i] * fa1[i];
675  suma[i + 1] = sum;
676  }
677 
678  norm = 1. / (2 * wc + 1);
679  for (i = 0; i < n; i++)
680  fad[i] = norm * (suma[width + i] - suma[i]);
681 
682  LEPT_FREE(suma);
683  numaDestroy(&na1);
684  return nad;
685 }
686 
687 
709 l_ok
711  NUMA *nams,
712  NUMA **pnav,
713  NUMA **pnarv)
714 {
715 l_int32 i, nm, nms;
716 l_float32 var;
717 l_float32 *fam, *fams, *fav, *farv;
718 NUMA *nav, *narv; /* variance and square root of variance */
719 
720  PROCNAME("numaWindowedVariance");
721 
722  if (pnav) *pnav = NULL;
723  if (pnarv) *pnarv = NULL;
724  if (!pnav && !pnarv)
725  return ERROR_INT("neither &nav nor &narv are defined", procName, 1);
726  if (!nam)
727  return ERROR_INT("nam not defined", procName, 1);
728  if (!nams)
729  return ERROR_INT("nams not defined", procName, 1);
730  nm = numaGetCount(nam);
731  nms = numaGetCount(nams);
732  if (nm != nms)
733  return ERROR_INT("sizes of nam and nams differ", procName, 1);
734 
735  if (pnav) {
736  nav = numaMakeConstant(0, nm);
737  *pnav = nav;
738  fav = numaGetFArray(nav, L_NOCOPY);
739  }
740  if (pnarv) {
741  narv = numaMakeConstant(0, nm);
742  *pnarv = narv;
743  farv = numaGetFArray(narv, L_NOCOPY);
744  }
745  fam = numaGetFArray(nam, L_NOCOPY);
746  fams = numaGetFArray(nams, L_NOCOPY);
747 
748  for (i = 0; i < nm; i++) {
749  var = fams[i] - fam[i] * fam[i];
750  if (pnav)
751  fav[i] = var;
752  if (pnarv)
753  farv[i] = sqrtf(var);
754  }
755 
756  return 0;
757 }
758 
759 
777 NUMA *
779  l_int32 halfwin)
780 {
781 l_int32 i, n;
782 l_float32 medval;
783 NUMA *na1, *na2, *nad;
784 
785  PROCNAME("numaWindowedMedian");
786 
787  if (!nas)
788  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
789  if ((n = numaGetCount(nas)) < 3)
790  return numaCopy(nas);
791  if (halfwin <= 0) {
792  L_ERROR("filter too small; returning a copy\n", procName);
793  return numaCopy(nas);
794  }
795 
796  if (halfwin > (n - 1) / 2) {
797  halfwin = (n - 1) / 2;
798  L_INFO("reducing filter to halfwin = %d\n", procName, halfwin);
799  }
800 
801  /* Add a border to both ends */
802  na1 = numaAddSpecifiedBorder(nas, halfwin, halfwin, L_MIRRORED_BORDER);
803 
804  /* Get the median value at the center of each window, corresponding
805  * to locations in the input nas. */
806  nad = numaCreate(n);
807  for (i = 0; i < n; i++) {
808  na2 = numaClipToInterval(na1, i, i + 2 * halfwin);
809  numaGetMedian(na2, &medval);
810  numaAddNumber(nad, medval);
811  numaDestroy(&na2);
812  }
813 
814  numaDestroy(&na1);
815  return nad;
816 }
817 
818 
826 NUMA *
828 {
829 l_int32 i, n, ival;
830 NUMA *nad;
831 
832  PROCNAME("numaConvertToInt");
833 
834  if (!nas)
835  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
836 
837  n = numaGetCount(nas);
838  if ((nad = numaCreate(n)) == NULL)
839  return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
840  numaCopyParameters(nad, nas);
841  for (i = 0; i < n; i++) {
842  numaGetIValue(nas, i, &ival);
843  numaAddNumber(nad, ival);
844  }
845  return nad;
846 }
847 
848 
849 /*----------------------------------------------------------------------*
850  * Histogram generation and statistics *
851  *----------------------------------------------------------------------*/
878 NUMA *
880  l_int32 maxbins,
881  l_int32 *pbinsize,
882  l_int32 *pbinstart)
883 {
884 l_int32 i, n, ival, hval;
885 l_int32 iminval, imaxval, range, binsize, nbins, ibin;
886 l_float32 val, ratio;
887 NUMA *nai, *nahist;
888 
889  PROCNAME("numaMakeHistogram");
890 
891  if (!na)
892  return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
893  if (!pbinsize)
894  return (NUMA *)ERROR_PTR("&binsize not defined", procName, NULL);
895 
896  /* Determine input range */
897  numaGetMin(na, &val, NULL);
898  iminval = (l_int32)(val + 0.5);
899  numaGetMax(na, &val, NULL);
900  imaxval = (l_int32)(val + 0.5);
901  if (pbinstart == NULL) { /* clip negative vals; start from 0 */
902  iminval = 0;
903  if (imaxval < 0)
904  return (NUMA *)ERROR_PTR("all values < 0", procName, NULL);
905  }
906 
907  /* Determine binsize */
908  range = imaxval - iminval + 1;
909  if (range > maxbins - 1) {
910  ratio = (l_float64)range / (l_float64)maxbins;
911  binsize = 0;
912  for (i = 0; i < NBinSizes; i++) {
913  if (ratio < BinSizeArray[i]) {
914  binsize = BinSizeArray[i];
915  break;
916  }
917  }
918  if (binsize == 0)
919  return (NUMA *)ERROR_PTR("numbers too large", procName, NULL);
920  } else {
921  binsize = 1;
922  }
923  *pbinsize = binsize;
924  nbins = 1 + range / binsize; /* +1 seems to be sufficient */
925 
926  /* Redetermine iminval */
927  if (pbinstart && binsize > 1) {
928  if (iminval >= 0)
929  iminval = binsize * (iminval / binsize);
930  else
931  iminval = binsize * ((iminval - binsize + 1) / binsize);
932  }
933  if (pbinstart)
934  *pbinstart = iminval;
935 
936 #if DEBUG_HISTO
937  fprintf(stderr, " imaxval = %d, range = %d, nbins = %d\n",
938  imaxval, range, nbins);
939 #endif /* DEBUG_HISTO */
940 
941  /* Use integerized data for input */
942  if ((nai = numaConvertToInt(na)) == NULL)
943  return (NUMA *)ERROR_PTR("nai not made", procName, NULL);
944  n = numaGetCount(nai);
945 
946  /* Make histogram, converting value in input array
947  * into a bin number for this histogram array. */
948  if ((nahist = numaCreate(nbins)) == NULL) {
949  numaDestroy(&nai);
950  return (NUMA *)ERROR_PTR("nahist not made", procName, NULL);
951  }
952  numaSetCount(nahist, nbins);
953  numaSetParameters(nahist, iminval, binsize);
954  for (i = 0; i < n; i++) {
955  numaGetIValue(nai, i, &ival);
956  ibin = (ival - iminval) / binsize;
957  if (ibin >= 0 && ibin < nbins) {
958  numaGetIValue(nahist, ibin, &hval);
959  numaSetValue(nahist, ibin, hval + 1.0);
960  }
961  }
962 
963  numaDestroy(&nai);
964  return nahist;
965 }
966 
967 
990 NUMA *
992  l_int32 maxbins)
993 {
994 l_int32 i, n, imin, imax, irange, ibin, ival, allints;
995 l_float32 minval, maxval, range, binsize, fval;
996 NUMA *nah;
997 
998  PROCNAME("numaMakeHistogramAuto");
999 
1000  if (!na)
1001  return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
1002  maxbins = L_MAX(1, maxbins);
1003 
1004  /* Determine input range */
1005  numaGetMin(na, &minval, NULL);
1006  numaGetMax(na, &maxval, NULL);
1007 
1008  /* Determine if values are all integers */
1009  n = numaGetCount(na);
1010  numaHasOnlyIntegers(na, maxbins, &allints);
1011 
1012  /* Do simple integer binning if possible */
1013  if (allints && (maxval - minval < maxbins)) {
1014  imin = (l_int32)minval;
1015  imax = (l_int32)maxval;
1016  irange = imax - imin + 1;
1017  nah = numaCreate(irange);
1018  numaSetCount(nah, irange); /* init */
1019  numaSetParameters(nah, minval, 1.0);
1020  for (i = 0; i < n; i++) {
1021  numaGetIValue(na, i, &ival);
1022  ibin = ival - imin;
1023  numaGetIValue(nah, ibin, &ival);
1024  numaSetValue(nah, ibin, ival + 1.0);
1025  }
1026 
1027  return nah;
1028  }
1029 
1030  /* Do float binning, even if the data is integers. */
1031  range = maxval - minval;
1032  binsize = range / (l_float32)maxbins;
1033  if (range == 0.0) {
1034  nah = numaCreate(1);
1035  numaSetParameters(nah, minval, binsize);
1036  numaAddNumber(nah, n);
1037  return nah;
1038  }
1039  nah = numaCreate(maxbins);
1040  numaSetCount(nah, maxbins);
1041  numaSetParameters(nah, minval, binsize);
1042  for (i = 0; i < n; i++) {
1043  numaGetFValue(na, i, &fval);
1044  ibin = (l_int32)((fval - minval) / binsize);
1045  ibin = L_MIN(ibin, maxbins - 1); /* "edge" case; stay in bounds */
1046  numaGetIValue(nah, ibin, &ival);
1047  numaSetValue(nah, ibin, ival + 1.0);
1048  }
1049 
1050  return nah;
1051 }
1052 
1053 
1074 NUMA *
1076  l_float32 binsize,
1077  l_float32 maxsize)
1078 {
1079 l_int32 i, n, nbins, ival, ibin;
1080 l_float32 val, maxval;
1081 NUMA *nad;
1082 
1083  PROCNAME("numaMakeHistogramClipped");
1084 
1085  if (!na)
1086  return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
1087  if (binsize <= 0.0)
1088  return (NUMA *)ERROR_PTR("binsize must be > 0.0", procName, NULL);
1089  if (binsize > maxsize)
1090  binsize = maxsize; /* just one bin */
1091 
1092  numaGetMax(na, &maxval, NULL);
1093  n = numaGetCount(na);
1094  maxsize = L_MIN(maxsize, maxval);
1095  nbins = (l_int32)(maxsize / binsize) + 1;
1096 
1097 /* fprintf(stderr, "maxsize = %7.3f, nbins = %d\n", maxsize, nbins); */
1098 
1099  if ((nad = numaCreate(nbins)) == NULL)
1100  return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
1101  numaSetParameters(nad, 0.0, binsize);
1102  numaSetCount(nad, nbins); /* interpret zeroes in bins as data */
1103  for (i = 0; i < n; i++) {
1104  numaGetFValue(na, i, &val);
1105  ibin = (l_int32)(val / binsize);
1106  if (ibin >= 0 && ibin < nbins) {
1107  numaGetIValue(nad, ibin, &ival);
1108  numaSetValue(nad, ibin, ival + 1.0);
1109  }
1110  }
1111 
1112  return nad;
1113 }
1114 
1115 
1123 NUMA *
1125  l_int32 newsize)
1126 {
1127 l_int32 i, j, ns, nd, index, count, val;
1128 l_float32 start, oldsize;
1129 NUMA *nad;
1130 
1131  PROCNAME("numaRebinHistogram");
1132 
1133  if (!nas)
1134  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
1135  if (newsize <= 1)
1136  return (NUMA *)ERROR_PTR("newsize must be > 1", procName, NULL);
1137  if ((ns = numaGetCount(nas)) == 0)
1138  return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL);
1139 
1140  nd = (ns + newsize - 1) / newsize;
1141  if ((nad = numaCreate(nd)) == NULL)
1142  return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
1143  numaGetParameters(nad, &start, &oldsize);
1144  numaSetParameters(nad, start, oldsize * newsize);
1145 
1146  for (i = 0; i < nd; i++) { /* new bins */
1147  count = 0;
1148  index = i * newsize;
1149  for (j = 0; j < newsize; j++) {
1150  if (index < ns) {
1151  numaGetIValue(nas, index, &val);
1152  count += val;
1153  index++;
1154  }
1155  }
1156  numaAddNumber(nad, count);
1157  }
1158 
1159  return nad;
1160 }
1161 
1162 
1171 NUMA *
1173  l_float32 tsum)
1174 {
1175 l_int32 i, ns;
1176 l_float32 sum, factor, fval;
1177 NUMA *nad;
1178 
1179  PROCNAME("numaNormalizeHistogram");
1180 
1181  if (!nas)
1182  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
1183  if (tsum <= 0.0)
1184  return (NUMA *)ERROR_PTR("tsum must be > 0.0", procName, NULL);
1185  if ((ns = numaGetCount(nas)) == 0)
1186  return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL);
1187 
1188  numaGetSum(nas, &sum);
1189  factor = tsum / sum;
1190 
1191  if ((nad = numaCreate(ns)) == NULL)
1192  return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
1193  numaCopyParameters(nad, nas);
1194 
1195  for (i = 0; i < ns; i++) {
1196  numaGetFValue(nas, i, &fval);
1197  fval *= factor;
1198  numaAddNumber(nad, fval);
1199  }
1200 
1201  return nad;
1202 }
1203 
1204 
1253 l_ok
1255  l_int32 maxbins,
1256  l_float32 *pmin,
1257  l_float32 *pmax,
1258  l_float32 *pmean,
1259  l_float32 *pvariance,
1260  l_float32 *pmedian,
1261  l_float32 rank,
1262  l_float32 *prval,
1263  NUMA **phisto)
1264 {
1265 l_int32 i, n;
1266 l_float32 minval, maxval, fval, mean, sum;
1267 NUMA *nah;
1268 
1269  PROCNAME("numaGetStatsUsingHistogram");
1270 
1271  if (pmin) *pmin = 0.0;
1272  if (pmax) *pmax = 0.0;
1273  if (pmean) *pmean = 0.0;
1274  if (pvariance) *pvariance = 0.0;
1275  if (pmedian) *pmedian = 0.0;
1276  if (prval) *prval = 0.0;
1277  if (phisto) *phisto = NULL;
1278  if (!na)
1279  return ERROR_INT("na not defined", procName, 1);
1280  if ((n = numaGetCount(na)) == 0)
1281  return ERROR_INT("numa is empty", procName, 1);
1282 
1283  numaGetMin(na, &minval, NULL);
1284  numaGetMax(na, &maxval, NULL);
1285  if (pmin) *pmin = minval;
1286  if (pmax) *pmax = maxval;
1287  if (pmean || pvariance) {
1288  sum = 0.0;
1289  for (i = 0; i < n; i++) {
1290  numaGetFValue(na, i, &fval);
1291  sum += fval;
1292  }
1293  mean = sum / (l_float32)n;
1294  if (pmean) *pmean = mean;
1295  }
1296  if (pvariance) {
1297  sum = 0.0;
1298  for (i = 0; i < n; i++) {
1299  numaGetFValue(na, i, &fval);
1300  sum += fval * fval;
1301  }
1302  *pvariance = sum / (l_float32)n - mean * mean;
1303  }
1304 
1305  if (!pmedian && !prval && !phisto)
1306  return 0;
1307 
1308  nah = numaMakeHistogramAuto(na, maxbins);
1309  if (pmedian)
1310  numaHistogramGetValFromRank(nah, 0.5, pmedian);
1311  if (prval)
1312  numaHistogramGetValFromRank(nah, rank, prval);
1313  if (phisto)
1314  *phisto = nah;
1315  else
1316  numaDestroy(&nah);
1317  return 0;
1318 }
1319 
1320 
1344 l_ok
1346  l_float32 startx,
1347  l_float32 deltax,
1348  l_float32 *pxmean,
1349  l_float32 *pxmedian,
1350  l_float32 *pxmode,
1351  l_float32 *pxvariance)
1352 {
1353  PROCNAME("numaGetHistogramStats");
1354 
1355  if (pxmean) *pxmean = 0.0;
1356  if (pxmedian) *pxmedian = 0.0;
1357  if (pxmode) *pxmode = 0.0;
1358  if (pxvariance) *pxvariance = 0.0;
1359  if (!nahisto)
1360  return ERROR_INT("nahisto not defined", procName, 1);
1361 
1362  return numaGetHistogramStatsOnInterval(nahisto, startx, deltax, 0, -1,
1363  pxmean, pxmedian, pxmode,
1364  pxvariance);
1365 }
1366 
1367 
1393 l_ok
1395  l_float32 startx,
1396  l_float32 deltax,
1397  l_int32 ifirst,
1398  l_int32 ilast,
1399  l_float32 *pxmean,
1400  l_float32 *pxmedian,
1401  l_float32 *pxmode,
1402  l_float32 *pxvariance)
1403 {
1404 l_int32 i, n, imax;
1405 l_float32 sum, sumval, halfsum, moment, var, x, y, ymax;
1406 
1407  PROCNAME("numaGetHistogramStatsOnInterval");
1408 
1409  if (pxmean) *pxmean = 0.0;
1410  if (pxmedian) *pxmedian = 0.0;
1411  if (pxmode) *pxmode = 0.0;
1412  if (pxvariance) *pxvariance = 0.0;
1413  if (!nahisto)
1414  return ERROR_INT("nahisto not defined", procName, 1);
1415  if (!pxmean && !pxmedian && !pxmode && !pxvariance)
1416  return ERROR_INT("nothing to compute", procName, 1);
1417 
1418  n = numaGetCount(nahisto);
1419  ifirst = L_MAX(0, ifirst);
1420  if (ilast < 0) ilast = n - 1;
1421  if (ifirst >= n)
1422  return ERROR_INT("invalid ifirst", procName, 1);
1423  if (ilast >= n) {
1424  L_WARNING("ilast = %d is beyond max index = %d; adjusting\n",
1425  procName, ilast, n - 1);
1426  ilast = n - 1;
1427  }
1428  if (ifirst > ilast)
1429  return ERROR_INT("ifirst > ilast", procName, 1);
1430  for (sum = 0.0, moment = 0.0, var = 0.0, i = ifirst; i <= ilast ; i++) {
1431  x = startx + i * deltax;
1432  numaGetFValue(nahisto, i, &y);
1433  sum += y;
1434  moment += x * y;
1435  var += x * x * y;
1436  }
1437  if (sum == 0.0) {
1438  L_INFO("sum is 0\n", procName);
1439  return 0;
1440  }
1441 
1442  if (pxmean)
1443  *pxmean = moment / sum;
1444  if (pxvariance)
1445  *pxvariance = var / sum - moment * moment / (sum * sum);
1446 
1447  if (pxmedian) {
1448  halfsum = sum / 2.0;
1449  for (sumval = 0.0, i = ifirst; i <= ilast; i++) {
1450  numaGetFValue(nahisto, i, &y);
1451  sumval += y;
1452  if (sumval >= halfsum) {
1453  *pxmedian = startx + i * deltax;
1454  break;
1455  }
1456  }
1457  }
1458 
1459  if (pxmode) {
1460  imax = -1;
1461  ymax = -1.0e10;
1462  for (i = ifirst; i <= ilast; i++) {
1463  numaGetFValue(nahisto, i, &y);
1464  if (y > ymax) {
1465  ymax = y;
1466  imax = i;
1467  }
1468  }
1469  *pxmode = startx + imax * deltax;
1470  }
1471 
1472  return 0;
1473 }
1474 
1475 
1487 l_ok
1488 numaMakeRankFromHistogram(l_float32 startx,
1489  l_float32 deltax,
1490  NUMA *nasy,
1491  l_int32 npts,
1492  NUMA **pnax,
1493  NUMA **pnay)
1494 {
1495 l_int32 i, n;
1496 l_float32 sum, fval;
1497 NUMA *nan, *nar;
1498 
1499  PROCNAME("numaMakeRankFromHistogram");
1500 
1501  if (pnax) *pnax = NULL;
1502  if (!pnay)
1503  return ERROR_INT("&nay not defined", procName, 1);
1504  *pnay = NULL;
1505  if (!nasy)
1506  return ERROR_INT("nasy not defined", procName, 1);
1507  if ((n = numaGetCount(nasy)) == 0)
1508  return ERROR_INT("no bins in nas", procName, 1);
1509 
1510  /* Normalize and generate the rank array corresponding to
1511  * the binned histogram. */
1512  nan = numaNormalizeHistogram(nasy, 1.0);
1513  nar = numaCreate(n + 1); /* rank numa corresponding to nan */
1514  sum = 0.0;
1515  numaAddNumber(nar, sum); /* first element is 0.0 */
1516  for (i = 0; i < n; i++) {
1517  numaGetFValue(nan, i, &fval);
1518  sum += fval;
1519  numaAddNumber(nar, sum);
1520  }
1521 
1522  /* Compute rank array on full range with specified
1523  * number of points and correspondence to x-values. */
1524  numaInterpolateEqxInterval(startx, deltax, nar, L_LINEAR_INTERP,
1525  startx, startx + n * deltax, npts,
1526  pnax, pnay);
1527  numaDestroy(&nan);
1528  numaDestroy(&nar);
1529  return 0;
1530 }
1531 
1532 
1555 l_ok
1557  l_float32 rval,
1558  l_float32 *prank)
1559 {
1560 l_int32 i, ibinval, n;
1561 l_float32 startval, binsize, binval, maxval, fractval, total, sum, val;
1562 
1563  PROCNAME("numaHistogramGetRankFromVal");
1564 
1565  if (!prank)
1566  return ERROR_INT("prank not defined", procName, 1);
1567  *prank = 0.0;
1568  if (!na)
1569  return ERROR_INT("na not defined", procName, 1);
1570  numaGetParameters(na, &startval, &binsize);
1571  n = numaGetCount(na);
1572  if (rval < startval)
1573  return 0;
1574  maxval = startval + n * binsize;
1575  if (rval > maxval) {
1576  *prank = 1.0;
1577  return 0;
1578  }
1579 
1580  binval = (rval - startval) / binsize;
1581  ibinval = (l_int32)binval;
1582  if (ibinval >= n) {
1583  *prank = 1.0;
1584  return 0;
1585  }
1586  fractval = binval - (l_float32)ibinval;
1587 
1588  sum = 0.0;
1589  for (i = 0; i < ibinval; i++) {
1590  numaGetFValue(na, i, &val);
1591  sum += val;
1592  }
1593  numaGetFValue(na, ibinval, &val);
1594  sum += fractval * val;
1595  numaGetSum(na, &total);
1596  *prank = sum / total;
1597 
1598 /* fprintf(stderr, "binval = %7.3f, rank = %7.3f\n", binval, *prank); */
1599 
1600  return 0;
1601 }
1602 
1603 
1626 l_ok
1628  l_float32 rank,
1629  l_float32 *prval)
1630 {
1631 l_int32 i, n;
1632 l_float32 startval, binsize, rankcount, total, sum, fract, val;
1633 
1634  PROCNAME("numaHistogramGetValFromRank");
1635 
1636  if (!prval)
1637  return ERROR_INT("prval not defined", procName, 1);
1638  *prval = 0.0;
1639  if (!na)
1640  return ERROR_INT("na not defined", procName, 1);
1641  if (rank < 0.0) {
1642  L_WARNING("rank < 0; setting to 0.0\n", procName);
1643  rank = 0.0;
1644  }
1645  if (rank > 1.0) {
1646  L_WARNING("rank > 1.0; setting to 1.0\n", procName);
1647  rank = 1.0;
1648  }
1649 
1650  n = numaGetCount(na);
1651  numaGetParameters(na, &startval, &binsize);
1652  numaGetSum(na, &total);
1653  rankcount = rank * total; /* count that corresponds to rank */
1654  sum = 0.0;
1655  for (i = 0; i < n; i++) {
1656  numaGetFValue(na, i, &val);
1657  if (sum + val >= rankcount)
1658  break;
1659  sum += val;
1660  }
1661  if (val <= 0.0) /* can == 0 if rank == 0.0 */
1662  fract = 0.0;
1663  else /* sum + fract * val = rankcount */
1664  fract = (rankcount - sum) / val;
1665 
1666  /* The use of the fraction of a bin allows a simple calculation
1667  * for the histogram value at the given rank. */
1668  *prval = startval + binsize * ((l_float32)i + fract);
1669 
1670 /* fprintf(stderr, "rank = %7.3f, val = %7.3f\n", rank, *prval); */
1671 
1672  return 0;
1673 }
1674 
1675 
1706 l_ok
1708  l_int32 nbins,
1709  NUMA **pnarbin,
1710  NUMA **pnam,
1711  NUMA **pnar,
1712  NUMA **pnabb)
1713 {
1714 NUMA *nar; /* rank value as function of intensity */
1715 NUMA *nam; /* median intensity in the rank bins */
1716 NUMA *nabb; /* rank bin right boundaries (in intensity) */
1717 NUMA *narbin; /* binned rank value as a function of intensity */
1718 l_int32 i, j, npts, start, midfound, mcount, rightedge;
1719 l_float32 sum, midrank, endrank, val;
1720 
1721  PROCNAME("numaDiscretizeRankAndIntensity");
1722 
1723  if (pnarbin) *pnarbin = NULL;
1724  if (pnam) *pnam = NULL;
1725  if (pnar) *pnar = NULL;
1726  if (pnabb) *pnabb = NULL;
1727  if (!pnarbin && !pnam && !pnar && !pnabb)
1728  return ERROR_INT("no output requested", procName, 1);
1729  if (!na)
1730  return ERROR_INT("na not defined", procName, 1);
1731  if (nbins < 2)
1732  return ERROR_INT("nbins must be > 1", procName, 1);
1733 
1734  /* Get cumulative normalized histogram (rank vs intensity value).
1735  * For a normalized histogram from an 8 bpp grayscale image
1736  * as input, we have 256 bins and 257 points in the
1737  * cumulative (rank) histogram. */
1738  npts = numaGetCount(na);
1739  if ((nar = numaCreate(npts + 1)) == NULL)
1740  return ERROR_INT("nar not made", procName, 1);
1741  sum = 0.0;
1742  numaAddNumber(nar, sum); /* left side of first bin */
1743  for (i = 0; i < npts; i++) {
1744  numaGetFValue(na, i, &val);
1745  sum += val;
1746  numaAddNumber(nar, sum);
1747  }
1748 
1749  nam = numaCreate(nbins);
1750  narbin = numaCreate(npts);
1751  nabb = numaCreate(nbins);
1752  if (!nam || !narbin || !nabb) {
1753  numaDestroy(&nar);
1754  numaDestroy(&nam);
1755  numaDestroy(&narbin);
1756  numaDestroy(&nabb);
1757  return ERROR_INT("numa not made", procName, 1);
1758  }
1759 
1760  /* We find the intensity value at the right edge of each of
1761  * the rank bins. We also find the median intensity in the bin,
1762  * where approximately half the samples are lower and half are
1763  * higher. This can be considered as a simple approximation
1764  * for the average intensity in the bin. */
1765  start = 0; /* index in nar */
1766  mcount = 0; /* count of median values in rank bins; not to exceed nbins */
1767  for (i = 0; i < nbins; i++) {
1768  midrank = (l_float32)(i + 0.5) / (l_float32)(nbins);
1769  endrank = (l_float32)(i + 1.0) / (l_float32)(nbins);
1770  endrank = L_MAX(0.0, L_MIN(endrank - 0.001, 1.0));
1771  midfound = FALSE;
1772  for (j = start; j < npts; j++) { /* scan up for each bin value */
1773  numaGetFValue(nar, j, &val);
1774  /* Use (j == npts - 1) tests in case all weight is at top end */
1775  if ((!midfound && val >= midrank) ||
1776  (mcount < nbins && j == npts - 1)) {
1777  midfound = TRUE;
1778  numaAddNumber(nam, j);
1779  mcount++;
1780  }
1781  if ((val >= endrank) || (j == npts - 1)) {
1782  numaAddNumber(nabb, j);
1783  if (val == endrank)
1784  start = j;
1785  else
1786  start = j - 1;
1787  break;
1788  }
1789  }
1790  }
1791  numaSetValue(nabb, nbins - 1, npts - 1); /* extend to max */
1792 
1793  /* Error checking: did we get data in all bins? */
1794  if (mcount != nbins)
1795  L_WARNING("found data for %d bins; should be %d\n",
1796  procName, mcount, nbins);
1797 
1798  /* Generate LUT that maps from intensity to bin number */
1799  start = 0;
1800  for (i = 0; i < nbins; i++) {
1801  numaGetIValue(nabb, i, &rightedge);
1802  for (j = start; j < npts; j++) {
1803  if (j <= rightedge)
1804  numaAddNumber(narbin, i);
1805  if (j > rightedge) {
1806  start = j;
1807  break;
1808  }
1809  if (j == npts - 1) { /* we're done */
1810  start = j + 1;
1811  break;
1812  }
1813  }
1814  }
1815 
1816  if (pnarbin)
1817  *pnarbin = narbin;
1818  else
1819  numaDestroy(&narbin);
1820  if (pnam)
1821  *pnam = nam;
1822  else
1823  numaDestroy(&nam);
1824  if (pnar)
1825  *pnar = nar;
1826  else
1827  numaDestroy(&nar);
1828  if (pnabb)
1829  *pnabb = nabb;
1830  else
1831  numaDestroy(&nabb);
1832  return 0;
1833 }
1834 
1835 
1854 l_ok
1856  l_int32 nbins,
1857  NUMA **pnarbin,
1858  NUMA **pnam)
1859 {
1860 NUMA *nah, *nan; /* histo and normalized histo */
1861 l_int32 maxbins, discardval;
1862 l_float32 maxval, delx;
1863 
1864  PROCNAME("numaGetRankBinValues");
1865 
1866  if (pnarbin) *pnarbin = NULL;
1867  if (pnam) *pnam = NULL;
1868  if (!pnarbin && !pnam)
1869  return ERROR_INT("no output requested", procName, 1);
1870  if (!na)
1871  return ERROR_INT("na not defined", procName, 1);
1872  if (numaGetCount(na) == 0)
1873  return ERROR_INT("na is empty", procName, 1);
1874  if (nbins < 2)
1875  return ERROR_INT("nbins must be > 1", procName, 1);
1876 
1877  /* Get normalized histogram */
1878  numaGetMax(na, &maxval, NULL);
1879  maxbins = L_MIN(100002, (l_int32)maxval + 2);
1880  nah = numaMakeHistogram(na, maxbins, &discardval, NULL);
1881  nan = numaNormalizeHistogram(nah, 1.0);
1882 
1883  /* Warn if there is a scale change. This shouldn't happen
1884  * unless the max value is above 100000. */
1885  numaGetParameters(nan, NULL, &delx);
1886  if (delx > 1.0)
1887  L_WARNING("scale change: delx = %6.2f\n", procName, delx);
1888 
1889  /* Rank bin the results */
1890  numaDiscretizeRankAndIntensity(nan, nbins, pnarbin, pnam, NULL, NULL);
1891  numaDestroy(&nah);
1892  numaDestroy(&nan);
1893  return 0;
1894 }
1895 
1896 
1897 /*----------------------------------------------------------------------*
1898  * Splitting a distribution *
1899  *----------------------------------------------------------------------*/
1949 l_ok
1951  l_float32 scorefract,
1952  l_int32 *psplitindex,
1953  l_float32 *pave1,
1954  l_float32 *pave2,
1955  l_float32 *pnum1,
1956  l_float32 *pnum2,
1957  NUMA **pnascore)
1958 {
1959 l_int32 i, n, bestsplit, minrange, maxrange, maxindex;
1960 l_float32 ave1, ave2, ave1prev, ave2prev;
1961 l_float32 num1, num2, num1prev, num2prev;
1962 l_float32 val, minval, sum, fract1;
1963 l_float32 norm, score, minscore, maxscore;
1964 NUMA *nascore, *naave1, *naave2, *nanum1, *nanum2;
1965 
1966  PROCNAME("numaSplitDistribution");
1967 
1968  if (psplitindex) *psplitindex = 0;
1969  if (pave1) *pave1 = 0.0;
1970  if (pave2) *pave2 = 0.0;
1971  if (pnum1) *pnum1 = 0.0;
1972  if (pnum2) *pnum2 = 0.0;
1973  if (pnascore) *pnascore = NULL;
1974  if (!na)
1975  return ERROR_INT("na not defined", procName, 1);
1976 
1977  n = numaGetCount(na);
1978  if (n <= 1)
1979  return ERROR_INT("n = 1 in histogram", procName, 1);
1980  numaGetSum(na, &sum);
1981  if (sum <= 0.0)
1982  return ERROR_INT("sum <= 0.0", procName, 1);
1983  norm = 4.0 / ((l_float32)(n - 1) * (n - 1));
1984  ave1prev = 0.0;
1985  numaGetHistogramStats(na, 0.0, 1.0, &ave2prev, NULL, NULL, NULL);
1986  num1prev = 0.0;
1987  num2prev = sum;
1988  maxindex = n / 2; /* initialize with something */
1989 
1990  /* Split the histogram with [0 ... i] in the lower part
1991  * and [i+1 ... n-1] in upper part. First, compute an otsu
1992  * score for each possible splitting. */
1993  if ((nascore = numaCreate(n)) == NULL)
1994  return ERROR_INT("nascore not made", procName, 1);
1995  naave1 = (pave1) ? numaCreate(n) : NULL;
1996  naave2 = (pave2) ? numaCreate(n) : NULL;
1997  nanum1 = (pnum1) ? numaCreate(n) : NULL;
1998  nanum2 = (pnum2) ? numaCreate(n) : NULL;
1999  maxscore = 0.0;
2000  for (i = 0; i < n; i++) {
2001  numaGetFValue(na, i, &val);
2002  num1 = num1prev + val;
2003  if (num1 == 0)
2004  ave1 = ave1prev;
2005  else
2006  ave1 = (num1prev * ave1prev + i * val) / num1;
2007  num2 = num2prev - val;
2008  if (num2 == 0)
2009  ave2 = ave2prev;
2010  else
2011  ave2 = (num2prev * ave2prev - i * val) / num2;
2012  fract1 = num1 / sum;
2013  score = norm * (fract1 * (1 - fract1)) * (ave2 - ave1) * (ave2 - ave1);
2014  numaAddNumber(nascore, score);
2015  if (pave1) numaAddNumber(naave1, ave1);
2016  if (pave2) numaAddNumber(naave2, ave2);
2017  if (pnum1) numaAddNumber(nanum1, num1);
2018  if (pnum2) numaAddNumber(nanum2, num2);
2019  if (score > maxscore) {
2020  maxscore = score;
2021  maxindex = i;
2022  }
2023  num1prev = num1;
2024  num2prev = num2;
2025  ave1prev = ave1;
2026  ave2prev = ave2;
2027  }
2028 
2029  /* Next, for all contiguous scores within a specified fraction
2030  * of the max, choose the split point as the value with the
2031  * minimum in the histogram. */
2032  minscore = (1. - scorefract) * maxscore;
2033  for (i = maxindex - 1; i >= 0; i--) {
2034  numaGetFValue(nascore, i, &val);
2035  if (val < minscore)
2036  break;
2037  }
2038  minrange = i + 1;
2039  for (i = maxindex + 1; i < n; i++) {
2040  numaGetFValue(nascore, i, &val);
2041  if (val < minscore)
2042  break;
2043  }
2044  maxrange = i - 1;
2045  numaGetFValue(na, minrange, &minval);
2046  bestsplit = minrange;
2047  for (i = minrange + 1; i <= maxrange; i++) {
2048  numaGetFValue(na, i, &val);
2049  if (val < minval) {
2050  minval = val;
2051  bestsplit = i;
2052  }
2053  }
2054 
2055  /* Add one to the bestsplit value to get the threshold value,
2056  * because when we take a threshold, as in pixThresholdToBinary(),
2057  * we always choose the set with values below the threshold. */
2058  bestsplit = L_MIN(255, bestsplit + 1);
2059 
2060  if (psplitindex) *psplitindex = bestsplit;
2061  if (pave1) numaGetFValue(naave1, bestsplit, pave1);
2062  if (pave2) numaGetFValue(naave2, bestsplit, pave2);
2063  if (pnum1) numaGetFValue(nanum1, bestsplit, pnum1);
2064  if (pnum2) numaGetFValue(nanum2, bestsplit, pnum2);
2065 
2066  if (pnascore) { /* debug mode */
2067  fprintf(stderr, "minrange = %d, maxrange = %d\n", minrange, maxrange);
2068  fprintf(stderr, "minval = %10.0f\n", minval);
2069  gplotSimple1(nascore, GPLOT_PNG, "/tmp/lept/nascore",
2070  "Score for split distribution");
2071  *pnascore = nascore;
2072  } else {
2073  numaDestroy(&nascore);
2074  }
2075 
2076  if (pave1) numaDestroy(&naave1);
2077  if (pave2) numaDestroy(&naave2);
2078  if (pnum1) numaDestroy(&nanum1);
2079  if (pnum2) numaDestroy(&nanum2);
2080  return 0;
2081 }
2082 
2083 
2084 /*----------------------------------------------------------------------*
2085  * Comparing histograms *
2086  *----------------------------------------------------------------------*/
2111 l_ok
2113  NUMAA *naa2,
2114  NUMA **pnad)
2115 {
2116 l_int32 i, n, nt;
2117 l_float32 dist;
2118 NUMA *na1, *na2, *nad;
2119 
2120  PROCNAME("grayHistogramsToEMD");
2121 
2122  if (!pnad)
2123  return ERROR_INT("&nad not defined", procName, 1);
2124  *pnad = NULL;
2125  if (!naa1 || !naa2)
2126  return ERROR_INT("na1 and na2 not both defined", procName, 1);
2127  n = numaaGetCount(naa1);
2128  if (n != numaaGetCount(naa2))
2129  return ERROR_INT("naa1 and naa2 numa counts differ", procName, 1);
2130  nt = numaaGetNumberCount(naa1);
2131  if (nt != numaaGetNumberCount(naa2))
2132  return ERROR_INT("naa1 and naa2 number counts differ", procName, 1);
2133  if (256 * n != nt) /* good enough check */
2134  return ERROR_INT("na sizes must be 256", procName, 1);
2135 
2136  nad = numaCreate(n);
2137  *pnad = nad;
2138  for (i = 0; i < n; i++) {
2139  na1 = numaaGetNuma(naa1, i, L_CLONE);
2140  na2 = numaaGetNuma(naa2, i, L_CLONE);
2141  numaEarthMoverDistance(na1, na2, &dist);
2142  numaAddNumber(nad, dist / 255.); /* normalize to [0.0 - 1.0] */
2143  numaDestroy(&na1);
2144  numaDestroy(&na2);
2145  }
2146  return 0;
2147 }
2148 
2149 
2177 l_ok
2179  NUMA *na2,
2180  l_float32 *pdist)
2181 {
2182 l_int32 n, norm, i;
2183 l_float32 sum1, sum2, diff, total;
2184 l_float32 *array1, *array3;
2185 NUMA *na3;
2186 
2187  PROCNAME("numaEarthMoverDistance");
2188 
2189  if (!pdist)
2190  return ERROR_INT("&dist not defined", procName, 1);
2191  *pdist = 0.0;
2192  if (!na1 || !na2)
2193  return ERROR_INT("na1 and na2 not both defined", procName, 1);
2194  n = numaGetCount(na1);
2195  if (n != numaGetCount(na2))
2196  return ERROR_INT("na1 and na2 have different size", procName, 1);
2197 
2198  /* Generate na3; normalize to na1 if necessary */
2199  numaGetSum(na1, &sum1);
2200  numaGetSum(na2, &sum2);
2201  norm = (L_ABS(sum1 - sum2) < 0.00001 * L_ABS(sum1)) ? 1 : 0;
2202  if (!norm)
2203  na3 = numaTransform(na2, 0, sum1 / sum2);
2204  else
2205  na3 = numaCopy(na2);
2206  array1 = numaGetFArray(na1, L_NOCOPY);
2207  array3 = numaGetFArray(na3, L_NOCOPY);
2208 
2209  /* Move earth in n3 from array elements, to match n1 */
2210  total = 0;
2211  for (i = 1; i < n; i++) {
2212  diff = array1[i - 1] - array3[i - 1];
2213  array3[i] -= diff;
2214  total += L_ABS(diff);
2215  }
2216  *pdist = total / sum1;
2217 
2218  numaDestroy(&na3);
2219  return 0;
2220 }
2221 
2222 
2268 l_ok
2270  l_int32 wc,
2271  NUMA **pnam,
2272  NUMA **pnams,
2273  NUMA **pnav,
2274  NUMA **pnarv)
2275 {
2276 l_int32 i, j, n, nn;
2277 l_float32 **arrays;
2278 l_float32 mean, var, rvar;
2279 NUMA *na1, *na2, *na3, *na4;
2280 
2281  PROCNAME("grayInterHistogramStats");
2282 
2283  if (pnam) *pnam = NULL;
2284  if (pnams) *pnams = NULL;
2285  if (pnav) *pnav = NULL;
2286  if (pnarv) *pnarv = NULL;
2287  if (!pnam && !pnams && !pnav && !pnarv)
2288  return ERROR_INT("nothing requested", procName, 1);
2289  if (!naa)
2290  return ERROR_INT("naa not defined", procName, 1);
2291  n = numaaGetCount(naa);
2292  for (i = 0; i < n; i++) {
2293  nn = numaaGetNumaCount(naa, i);
2294  if (nn != 256) {
2295  L_ERROR("%d numbers in numa[%d]\n", procName, nn, i);
2296  return 1;
2297  }
2298  }
2299 
2300  if (pnam) *pnam = numaCreate(256);
2301  if (pnams) *pnams = numaCreate(256);
2302  if (pnav) *pnav = numaCreate(256);
2303  if (pnarv) *pnarv = numaCreate(256);
2304 
2305  /* First, use mean smoothing, normalize each histogram,
2306  * and save all results in a 2D matrix. */
2307  arrays = (l_float32 **)LEPT_CALLOC(n, sizeof(l_float32 *));
2308  for (i = 0; i < n; i++) {
2309  na1 = numaaGetNuma(naa, i, L_CLONE);
2310  na2 = numaWindowedMean(na1, wc);
2311  na3 = numaNormalizeHistogram(na2, 10000.);
2312  arrays[i] = numaGetFArray(na3, L_COPY);
2313  numaDestroy(&na1);
2314  numaDestroy(&na2);
2315  numaDestroy(&na3);
2316  }
2317 
2318  /* Get stats between histograms */
2319  for (j = 0; j < 256; j++) {
2320  na4 = numaCreate(n);
2321  for (i = 0; i < n; i++) {
2322  numaAddNumber(na4, arrays[i][j]);
2323  }
2324  numaSimpleStats(na4, 0, -1, &mean, &var, &rvar);
2325  if (pnam) numaAddNumber(*pnam, mean);
2326  if (pnams) numaAddNumber(*pnams, mean * mean);
2327  if (pnav) numaAddNumber(*pnav, var);
2328  if (pnarv) numaAddNumber(*pnarv, rvar);
2329  numaDestroy(&na4);
2330  }
2331 
2332  for (i = 0; i < n; i++)
2333  LEPT_FREE(arrays[i]);
2334  LEPT_FREE(arrays);
2335  return 0;
2336 }
2337 
2338 
2339 /*----------------------------------------------------------------------*
2340  * Extrema finding *
2341  *----------------------------------------------------------------------*/
2358 NUMA *
2360  l_int32 nmax,
2361  l_float32 fract1,
2362  l_float32 fract2)
2363 {
2364 l_int32 i, k, n, maxloc, lloc, rloc;
2365 l_float32 fmaxval, sum, total, newtotal, val, lastval;
2366 l_float32 peakfract;
2367 NUMA *na, *napeak;
2368 
2369  PROCNAME("numaFindPeaks");
2370 
2371  if (!nas)
2372  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
2373  n = numaGetCount(nas);
2374  numaGetSum(nas, &total);
2375 
2376  /* We munge this copy */
2377  if ((na = numaCopy(nas)) == NULL)
2378  return (NUMA *)ERROR_PTR("na not made", procName, NULL);
2379  if ((napeak = numaCreate(4 * nmax)) == NULL) {
2380  numaDestroy(&na);
2381  return (NUMA *)ERROR_PTR("napeak not made", procName, NULL);
2382  }
2383 
2384  for (k = 0; k < nmax; k++) {
2385  numaGetSum(na, &newtotal);
2386  if (newtotal == 0.0) /* sanity check */
2387  break;
2388  numaGetMax(na, &fmaxval, &maxloc);
2389  sum = fmaxval;
2390  lastval = fmaxval;
2391  lloc = 0;
2392  for (i = maxloc - 1; i >= 0; --i) {
2393  numaGetFValue(na, i, &val);
2394  if (val == 0.0) {
2395  lloc = i + 1;
2396  break;
2397  }
2398  if (val > fract1 * fmaxval) {
2399  sum += val;
2400  lastval = val;
2401  continue;
2402  }
2403  if (lastval - val > fract2 * lastval) {
2404  sum += val;
2405  lastval = val;
2406  continue;
2407  }
2408  lloc = i;
2409  break;
2410  }
2411  lastval = fmaxval;
2412  rloc = n - 1;
2413  for (i = maxloc + 1; i < n; ++i) {
2414  numaGetFValue(na, i, &val);
2415  if (val == 0.0) {
2416  rloc = i - 1;
2417  break;
2418  }
2419  if (val > fract1 * fmaxval) {
2420  sum += val;
2421  lastval = val;
2422  continue;
2423  }
2424  if (lastval - val > fract2 * lastval) {
2425  sum += val;
2426  lastval = val;
2427  continue;
2428  }
2429  rloc = i;
2430  break;
2431  }
2432  peakfract = sum / total;
2433  numaAddNumber(napeak, lloc);
2434  numaAddNumber(napeak, maxloc);
2435  numaAddNumber(napeak, rloc);
2436  numaAddNumber(napeak, peakfract);
2437 
2438  for (i = lloc; i <= rloc; i++)
2439  numaSetValue(na, i, 0.0);
2440  }
2441 
2442  numaDestroy(&na);
2443  return napeak;
2444 }
2445 
2446 
2473 NUMA *
2475  l_float32 delta,
2476  NUMA **pnav)
2477 {
2478 l_int32 i, n, found, loc, direction;
2479 l_float32 startval, val, maxval, minval;
2480 NUMA *nav, *nad;
2481 
2482  PROCNAME("numaFindExtrema");
2483 
2484  if (pnav) *pnav = NULL;
2485  if (!nas)
2486  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
2487  if (delta < 0.0)
2488  return (NUMA *)ERROR_PTR("delta < 0", procName, NULL);
2489 
2490  n = numaGetCount(nas);
2491  nad = numaCreate(0);
2492  nav = NULL;
2493  if (pnav) {
2494  nav = numaCreate(0);
2495  *pnav = nav;
2496  }
2497 
2498  /* We don't know if we'll find a peak or valley first,
2499  * but use the first element of nas as the reference point.
2500  * Break when we deviate by 'delta' from the first point. */
2501  numaGetFValue(nas, 0, &startval);
2502  found = FALSE;
2503  for (i = 1; i < n; i++) {
2504  numaGetFValue(nas, i, &val);
2505  if (L_ABS(val - startval) >= delta) {
2506  found = TRUE;
2507  break;
2508  }
2509  }
2510 
2511  if (!found)
2512  return nad; /* it's empty */
2513 
2514  /* Are we looking for a peak or a valley? */
2515  if (val > startval) { /* peak */
2516  direction = 1;
2517  maxval = val;
2518  } else {
2519  direction = -1;
2520  minval = val;
2521  }
2522  loc = i;
2523 
2524  /* Sweep through the rest of the array, recording alternating
2525  * peak/valley extrema. */
2526  for (i = i + 1; i < n; i++) {
2527  numaGetFValue(nas, i, &val);
2528  if (direction == 1 && val > maxval ) { /* new local max */
2529  maxval = val;
2530  loc = i;
2531  } else if (direction == -1 && val < minval ) { /* new local min */
2532  minval = val;
2533  loc = i;
2534  } else if (direction == 1 && (maxval - val >= delta)) {
2535  numaAddNumber(nad, loc); /* save the current max location */
2536  if (nav) numaAddNumber(nav, maxval);
2537  direction = -1; /* reverse: start looking for a min */
2538  minval = val;
2539  loc = i; /* current min location */
2540  } else if (direction == -1 && (val - minval >= delta)) {
2541  numaAddNumber(nad, loc); /* save the current min location */
2542  if (nav) numaAddNumber(nav, minval);
2543  direction = 1; /* reverse: start looking for a max */
2544  maxval = val;
2545  loc = i; /* current max location */
2546  }
2547  }
2548 
2549  /* Save the final extremum */
2550 /* numaAddNumber(nad, loc); */
2551  return nad;
2552 }
2553 
2554 
2576 l_ok
2578  l_float32 minreversal,
2579  l_int32 *pnr,
2580  l_float32 *prd)
2581 {
2582 l_int32 i, n, nr, ival, binvals;
2583 l_int32 *ia;
2584 l_float32 fval, delx, len;
2585 NUMA *nat;
2586 
2587  PROCNAME("numaCountReversals");
2588 
2589  if (pnr) *pnr = 0;
2590  if (prd) *prd = 0.0;
2591  if (!pnr && !prd)
2592  return ERROR_INT("neither &nr nor &rd are defined", procName, 1);
2593  if (!nas)
2594  return ERROR_INT("nas not defined", procName, 1);
2595  if ((n = numaGetCount(nas)) == 0) {
2596  L_INFO("nas is empty\n", procName);
2597  return 0;
2598  }
2599  if (minreversal < 0.0)
2600  return ERROR_INT("minreversal < 0", procName, 1);
2601 
2602  /* Decide if the only values are 0 and 1 */
2603  binvals = TRUE;
2604  for (i = 0; i < n; i++) {
2605  numaGetFValue(nas, i, &fval);
2606  if (fval != 0.0 && fval != 1.0) {
2607  binvals = FALSE;
2608  break;
2609  }
2610  }
2611 
2612  nr = 0;
2613  if (binvals) {
2614  if (minreversal > 1.0) {
2615  L_WARNING("binary values but minreversal > 1\n", procName);
2616  } else {
2617  ia = numaGetIArray(nas);
2618  ival = ia[0];
2619  for (i = 1; i < n; i++) {
2620  if (ia[i] != ival) {
2621  nr++;
2622  ival = ia[i];
2623  }
2624  }
2625  LEPT_FREE(ia);
2626  }
2627  } else {
2628  nat = numaFindExtrema(nas, minreversal, NULL);
2629  nr = numaGetCount(nat);
2630  numaDestroy(&nat);
2631  }
2632  if (pnr) *pnr = nr;
2633  if (prd) {
2634  numaGetParameters(nas, NULL, &delx);
2635  len = delx * n;
2636  *prd = (l_float32)nr / len;
2637  }
2638 
2639  return 0;
2640 }
2641 
2642 
2643 /*----------------------------------------------------------------------*
2644  * Threshold crossings and frequency analysis *
2645  *----------------------------------------------------------------------*/
2671 l_ok
2673  NUMA *nay,
2674  l_float32 estthresh,
2675  l_float32 *pbestthresh)
2676 {
2677 l_int32 i, inrun, istart, iend, maxstart, maxend, runlen, maxrunlen;
2678 l_int32 val, maxval, nmax, count;
2679 l_float32 thresh, fmaxval, fmodeval;
2680 NUMA *nat, *nac;
2681 
2682  PROCNAME("numaSelectCrossingThreshold");
2683 
2684  if (!pbestthresh)
2685  return ERROR_INT("&bestthresh not defined", procName, 1);
2686  *pbestthresh = 0.0;
2687  if (!nay)
2688  return ERROR_INT("nay not defined", procName, 1);
2689 
2690  /* Compute the number of crossings for different thresholds */
2691  nat = numaCreate(41);
2692  for (i = 0; i < 41; i++) {
2693  thresh = estthresh - 80.0 + 4.0 * i;
2694  nac = numaCrossingsByThreshold(nax, nay, thresh);
2695  numaAddNumber(nat, numaGetCount(nac));
2696  numaDestroy(&nac);
2697  }
2698 
2699  /* Find the center of the plateau of max crossings, which
2700  * extends from thresh[istart] to thresh[iend]. */
2701  numaGetMax(nat, &fmaxval, NULL);
2702  maxval = (l_int32)fmaxval;
2703  nmax = 0;
2704  for (i = 0; i < 41; i++) {
2705  numaGetIValue(nat, i, &val);
2706  if (val == maxval)
2707  nmax++;
2708  }
2709  if (nmax < 3) { /* likely accidental max; try the mode */
2710  numaGetMode(nat, &fmodeval, &count);
2711  if (count > nmax && fmodeval > 0.5 * fmaxval)
2712  maxval = (l_int32)fmodeval; /* use the mode */
2713  }
2714 
2715  inrun = FALSE;
2716  iend = 40;
2717  maxrunlen = 0, maxstart = 0, maxend = 0;
2718  for (i = 0; i < 41; i++) {
2719  numaGetIValue(nat, i, &val);
2720  if (val == maxval) {
2721  if (!inrun) {
2722  istart = i;
2723  inrun = TRUE;
2724  }
2725  continue;
2726  }
2727  if (inrun && (val != maxval)) {
2728  iend = i - 1;
2729  runlen = iend - istart + 1;
2730  inrun = FALSE;
2731  if (runlen > maxrunlen) {
2732  maxstart = istart;
2733  maxend = iend;
2734  maxrunlen = runlen;
2735  }
2736  }
2737  }
2738  if (inrun) {
2739  runlen = i - istart;
2740  if (runlen > maxrunlen) {
2741  maxstart = istart;
2742  maxend = i - 1;
2743  maxrunlen = runlen;
2744  }
2745  }
2746 
2747  *pbestthresh = estthresh - 80.0 + 2.0 * (l_float32)(maxstart + maxend);
2748 
2749 #if DEBUG_CROSSINGS
2750  fprintf(stderr, "\nCrossings attain a maximum at %d thresholds, between:\n"
2751  " thresh[%d] = %5.1f and thresh[%d] = %5.1f\n",
2752  nmax, maxstart, estthresh - 80.0 + 4.0 * maxstart,
2753  maxend, estthresh - 80.0 + 4.0 * maxend);
2754  fprintf(stderr, "The best choice: %5.1f\n", *pbestthresh);
2755  fprintf(stderr, "Number of crossings at the 41 thresholds:");
2756  numaWriteStream(stderr, nat);
2757 #endif /* DEBUG_CROSSINGS */
2758 
2759  numaDestroy(&nat);
2760  return 0;
2761 }
2762 
2763 
2778 NUMA *
2780  NUMA *nay,
2781  l_float32 thresh)
2782 {
2783 l_int32 i, n;
2784 l_float32 startx, delx;
2785 l_float32 xval1, xval2, yval1, yval2, delta1, delta2, crossval, fract;
2786 NUMA *nad;
2787 
2788  PROCNAME("numaCrossingsByThreshold");
2789 
2790  if (!nay)
2791  return (NUMA *)ERROR_PTR("nay not defined", procName, NULL);
2792  n = numaGetCount(nay);
2793 
2794  if (nax && (numaGetCount(nax) != n))
2795  return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL);
2796 
2797  nad = numaCreate(0);
2798  numaGetFValue(nay, 0, &yval1);
2799  numaGetParameters(nay, &startx, &delx);
2800  if (nax)
2801  numaGetFValue(nax, 0, &xval1);
2802  else
2803  xval1 = startx;
2804  for (i = 1; i < n; i++) {
2805  numaGetFValue(nay, i, &yval2);
2806  if (nax)
2807  numaGetFValue(nax, i, &xval2);
2808  else
2809  xval2 = startx + i * delx;
2810  delta1 = yval1 - thresh;
2811  delta2 = yval2 - thresh;
2812  if (delta1 == 0.0) {
2813  numaAddNumber(nad, xval1);
2814  } else if (delta2 == 0.0) {
2815  numaAddNumber(nad, xval2);
2816  } else if (delta1 * delta2 < 0.0) { /* crossing */
2817  fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
2818  crossval = xval1 + fract * (xval2 - xval1);
2819  numaAddNumber(nad, crossval);
2820  }
2821  xval1 = xval2;
2822  yval1 = yval2;
2823  }
2824 
2825  return nad;
2826 }
2827 
2828 
2843 NUMA *
2845  NUMA *nay,
2846  l_float32 delta)
2847 {
2848 l_int32 i, j, n, np, previndex, curindex;
2849 l_float32 startx, delx;
2850 l_float32 xval1, xval2, yval1, yval2, delta1, delta2;
2851 l_float32 prevval, curval, thresh, crossval, fract;
2852 NUMA *nap, *nad;
2853 
2854  PROCNAME("numaCrossingsByPeaks");
2855 
2856  if (!nay)
2857  return (NUMA *)ERROR_PTR("nay not defined", procName, NULL);
2858 
2859  n = numaGetCount(nay);
2860  if (nax && (numaGetCount(nax) != n))
2861  return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL);
2862 
2863  /* Find the extrema. Also add last point in nay to get
2864  * the last transition (from the last peak to the end).
2865  * The number of crossings is 1 more than the number of extrema. */
2866  nap = numaFindExtrema(nay, delta, NULL);
2867  numaAddNumber(nap, n - 1);
2868  np = numaGetCount(nap);
2869  L_INFO("Number of crossings: %d\n", procName, np);
2870 
2871  /* Do all computation in index units of nax or the delx of nay */
2872  nad = numaCreate(np); /* output crossing locations, in nax units */
2873  previndex = 0; /* prime the search with 1st point */
2874  numaGetFValue(nay, 0, &prevval); /* prime the search with 1st point */
2875  numaGetParameters(nay, &startx, &delx);
2876  for (i = 0; i < np; i++) {
2877  numaGetIValue(nap, i, &curindex);
2878  numaGetFValue(nay, curindex, &curval);
2879  thresh = (prevval + curval) / 2.0;
2880  if (nax)
2881  numaGetFValue(nax, previndex, &xval1);
2882  else
2883  xval1 = startx + previndex * delx;
2884  numaGetFValue(nay, previndex, &yval1);
2885  for (j = previndex + 1; j <= curindex; j++) {
2886  if (nax)
2887  numaGetFValue(nax, j, &xval2);
2888  else
2889  xval2 = startx + j * delx;
2890  numaGetFValue(nay, j, &yval2);
2891  delta1 = yval1 - thresh;
2892  delta2 = yval2 - thresh;
2893  if (delta1 == 0.0) {
2894  numaAddNumber(nad, xval1);
2895  break;
2896  } else if (delta2 == 0.0) {
2897  numaAddNumber(nad, xval2);
2898  break;
2899  } else if (delta1 * delta2 < 0.0) { /* crossing */
2900  fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
2901  crossval = xval1 + fract * (xval2 - xval1);
2902  numaAddNumber(nad, crossval);
2903  break;
2904  }
2905  xval1 = xval2;
2906  yval1 = yval2;
2907  }
2908  previndex = curindex;
2909  prevval = curval;
2910  }
2911 
2912  numaDestroy(&nap);
2913  return nad;
2914 }
2915 
2916 
2954 l_ok
2956  l_float32 relweight,
2957  l_int32 nwidth,
2958  l_int32 nshift,
2959  l_float32 minwidth,
2960  l_float32 maxwidth,
2961  l_float32 *pbestwidth,
2962  l_float32 *pbestshift,
2963  l_float32 *pbestscore)
2964 {
2965 l_int32 i, j;
2966 l_float32 delwidth, delshift, width, shift, score;
2967 l_float32 bestwidth, bestshift, bestscore;
2968 
2969  PROCNAME("numaEvalBestHaarParameters");
2970 
2971  if (pbestscore) *pbestscore = 0.0;
2972  if (pbestwidth) *pbestwidth = 0.0;
2973  if (pbestshift) *pbestshift = 0.0;
2974  if (!pbestwidth || !pbestshift)
2975  return ERROR_INT("&bestwidth and &bestshift not defined", procName, 1);
2976  if (!nas)
2977  return ERROR_INT("nas not defined", procName, 1);
2978 
2979  bestscore = bestwidth = bestshift = 0.0;
2980  delwidth = (maxwidth - minwidth) / (nwidth - 1.0);
2981  for (i = 0; i < nwidth; i++) {
2982  width = minwidth + delwidth * i;
2983  delshift = width / (l_float32)(nshift);
2984  for (j = 0; j < nshift; j++) {
2985  shift = j * delshift;
2986  numaEvalHaarSum(nas, width, shift, relweight, &score);
2987  if (score > bestscore) {
2988  bestscore = score;
2989  bestwidth = width;
2990  bestshift = shift;
2991 #if DEBUG_FREQUENCY
2992  fprintf(stderr, "width = %7.3f, shift = %7.3f, score = %7.3f\n",
2993  width, shift, score);
2994 #endif /* DEBUG_FREQUENCY */
2995  }
2996  }
2997  }
2998 
2999  *pbestwidth = bestwidth;
3000  *pbestshift = bestshift;
3001  if (pbestscore)
3002  *pbestscore = bestscore;
3003  return 0;
3004 }
3005 
3006 
3039 l_ok
3041  l_float32 width,
3042  l_float32 shift,
3043  l_float32 relweight,
3044  l_float32 *pscore)
3045 {
3046 l_int32 i, n, nsamp, index;
3047 l_float32 score, weight, val;
3048 
3049  PROCNAME("numaEvalHaarSum");
3050 
3051  if (!pscore)
3052  return ERROR_INT("&score not defined", procName, 1);
3053  *pscore = 0.0;
3054  if (!nas)
3055  return ERROR_INT("nas not defined", procName, 1);
3056  if ((n = numaGetCount(nas)) < 2 * width)
3057  return ERROR_INT("nas size too small", procName, 1);
3058 
3059  score = 0.0;
3060  nsamp = (l_int32)((n - shift) / width);
3061  for (i = 0; i < nsamp; i++) {
3062  index = (l_int32)(shift + i * width);
3063  weight = (i % 2) ? 1.0 : -1.0 * relweight;
3064  numaGetFValue(nas, index, &val);
3065  score += weight * val;
3066  }
3067 
3068  *pscore = 2.0 * width * score / (l_float32)n;
3069  return 0;
3070 }
3071 
3072 
3073 /*----------------------------------------------------------------------*
3074  * Generating numbers in a range under constraints *
3075  *----------------------------------------------------------------------*/
3096 NUMA *
3098  l_int32 last,
3099  l_int32 nmax,
3100  l_int32 use_pairs)
3101 {
3102 l_int32 i, nsets, val;
3103 l_float32 delta;
3104 NUMA *na;
3105 
3106  PROCNAME("genConstrainedNumaInRange");
3107 
3108  first = L_MAX(0, first);
3109  if (last < first)
3110  return (NUMA *)ERROR_PTR("last < first!", procName, NULL);
3111  if (nmax < 1)
3112  return (NUMA *)ERROR_PTR("nmax < 1!", procName, NULL);
3113 
3114  nsets = L_MIN(nmax, last - first + 1);
3115  if (use_pairs == 1)
3116  nsets = nsets / 2;
3117  if (nsets == 0)
3118  return (NUMA *)ERROR_PTR("nsets == 0", procName, NULL);
3119 
3120  /* Select delta so that selection covers the full range if possible */
3121  if (nsets == 1) {
3122  delta = 0.0;
3123  } else {
3124  if (use_pairs == 0)
3125  delta = (l_float32)(last - first) / (nsets - 1);
3126  else
3127  delta = (l_float32)(last - first - 1) / (nsets - 1);
3128  }
3129 
3130  na = numaCreate(nsets);
3131  for (i = 0; i < nsets; i++) {
3132  val = (l_int32)(first + i * delta + 0.5);
3133  numaAddNumber(na, val);
3134  if (use_pairs == 1)
3135  numaAddNumber(na, val + 1);
3136  }
3137 
3138  return na;
3139 }
l_ok numaGetSum(NUMA *na, l_float32 *psum)
numaGetSum()
Definition: numafunc1.c:527
l_ok numaGetFValue(NUMA *na, l_int32 index, l_float32 *pval)
numaGetFValue()
Definition: numabasic.c:692
NUMA * numaFindExtrema(NUMA *nas, l_float32 delta, NUMA **pnav)
numaFindExtrema()
Definition: numafunc2.c:2474
NUMA * numaDilate(NUMA *nas, l_int32 size)
numaDilate()
Definition: numafunc2.c:246
NUMA * numaCrossingsByPeaks(NUMA *nax, NUMA *nay, l_float32 delta)
numaCrossingsByPeaks()
Definition: numafunc2.c:2844
l_ok numaHasOnlyIntegers(NUMA *na, l_int32 maxsamples, l_int32 *pallints)
numaHasOnlyIntegers()
Definition: numafunc1.c:645
NUMA * numaConvertToInt(NUMA *nas)
numaConvertToInt()
Definition: numafunc2.c:827
NUMA * numaMakeHistogram(NUMA *na, l_int32 maxbins, l_int32 *pbinsize, l_int32 *pbinstart)
numaMakeHistogram()
Definition: numafunc2.c:879
l_ok numaSimpleStats(NUMA *na, l_int32 first, l_int32 last, l_float32 *pmean, l_float32 *pvar, l_float32 *prvar)
numaSimpleStats()
Definition: numafunc2.c:446
l_ok numaAddNumber(NUMA *na, l_float32 val)
numaAddNumber()
Definition: numabasic.c:473
NUMA * numaFindPeaks(NUMA *nas, l_int32 nmax, l_float32 fract1, l_float32 fract2)
numaFindPeaks()
Definition: numafunc2.c:2359
l_int32 numaaGetNumberCount(NUMAA *naa)
numaaGetNumberCount()
Definition: numabasic.c:1589
Definition: pix.h:716
NUMA * numaRebinHistogram(NUMA *nas, l_int32 newsize)
numaRebinHistogram()
Definition: numafunc2.c:1124
l_ok numaEarthMoverDistance(NUMA *na1, NUMA *na2, l_float32 *pdist)
numaEarthMoverDistance()
Definition: numafunc2.c:2178
l_ok numaCopyParameters(NUMA *nad, NUMA *nas)
numaCopyParameters()
Definition: numabasic.c:989
l_ok numaGetMedian(NUMA *na, l_float32 *pval)
numaGetMedian()
Definition: numafunc1.c:3135
NUMA * numaMakeConstant(l_float32 val, l_int32 size)
numaMakeConstant()
Definition: numafunc1.c:781
NUMA * numaErode(NUMA *nas, l_int32 size)
numaErode()
Definition: numafunc2.c:177
NUMA * numaMakeHistogramClipped(NUMA *na, l_float32 binsize, l_float32 maxsize)
numaMakeHistogramClipped()
Definition: numafunc2.c:1075
l_ok numaSplitDistribution(NUMA *na, l_float32 scorefract, l_int32 *psplitindex, l_float32 *pave1, l_float32 *pave2, l_float32 *pnum1, l_float32 *pnum2, NUMA **pnascore)
numaSplitDistribution()
Definition: numafunc2.c:1950
l_ok numaWriteStream(FILE *fp, NUMA *na)
numaWriteStream()
Definition: numabasic.c:1246
NUMA * numaCreate(l_int32 n)
numaCreate()
Definition: numabasic.c:187
l_ok numaSetCount(NUMA *na, l_int32 newcount)
numaSetCount()
Definition: numabasic.c:658
NUMA * numaWindowedMedian(NUMA *nas, l_int32 halfwin)
numaWindowedMedian()
Definition: numafunc2.c:778
NUMA * numaClipToInterval(NUMA *nas, l_int32 first, l_int32 last)
numaClipToInterval()
Definition: numafunc1.c:1105
l_ok numaSetValue(NUMA *na, l_int32 index, l_float32 val)
numaSetValue()
Definition: numabasic.c:759
NUMA * numaRemoveBorder(NUMA *nas, l_int32 left, l_int32 right)
numaRemoveBorder()
Definition: numafunc1.c:926
l_ok numaSelectCrossingThreshold(NUMA *nax, NUMA *nay, l_float32 estthresh, l_float32 *pbestthresh)
numaSelectCrossingThreshold()
Definition: numafunc2.c:2672
l_ok numaGetHistogramStats(NUMA *nahisto, l_float32 startx, l_float32 deltax, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance)
numaGetHistogramStats()
Definition: numafunc2.c:1345
l_int32 * numaGetIArray(NUMA *na)
numaGetIArray()
Definition: numabasic.c:820
l_ok numaCountReversals(NUMA *nas, l_float32 minreversal, l_int32 *pnr, l_float32 *prd)
numaCountReversals()
Definition: numafunc2.c:2577
NUMA * genConstrainedNumaInRange(l_int32 first, l_int32 last, l_int32 nmax, l_int32 use_pairs)
genConstrainedNumaInRange()
Definition: numafunc2.c:3097
NUMA * numaaGetNuma(NUMAA *naa, l_int32 index, l_int32 accessflag)
numaaGetNuma()
Definition: numabasic.c:1659
NUMA * numaClose(NUMA *nas, l_int32 size)
numaClose()
Definition: numafunc2.c:362
l_ok numaGetIValue(NUMA *na, l_int32 index, l_int32 *pival)
numaGetIValue()
Definition: numabasic.c:727
Definition: array.h:59
l_int32 numaGetCount(NUMA *na)
numaGetCount()
Definition: numabasic.c:631
l_ok numaDiscretizeRankAndIntensity(NUMA *na, l_int32 nbins, NUMA **pnarbin, NUMA **pnam, NUMA **pnar, NUMA **pnabb)
numaDiscretizeRankAndIntensity()
Definition: numafunc2.c:1707
l_ok numaGetMode(NUMA *na, l_float32 *pval, l_int32 *pcount)
numaGetMode()
Definition: numafunc1.c:3290
l_ok numaInterpolateEqxInterval(l_float32 startx, l_float32 deltax, NUMA *nasy, l_int32 type, l_float32 x0, l_float32 x1, l_int32 npts, NUMA **pnax, NUMA **pnay)
numaInterpolateEqxInterval()
Definition: numafunc1.c:1828
l_ok numaSetParameters(NUMA *na, l_float32 startx, l_float32 delx)
numaSetParameters()
Definition: numabasic.c:966
NUMA * numaAddSpecifiedBorder(NUMA *nas, l_int32 left, l_int32 right, l_int32 type)
numaAddSpecifiedBorder()
Definition: numafunc1.c:875
NUMA * numaWindowedMeanSquare(NUMA *nas, l_int32 wc)
numaWindowedMeanSquare()
Definition: numafunc2.c:642
NUMA * numaOpen(NUMA *nas, l_int32 size)
numaOpen()
Definition: numafunc2.c:315
l_ok numaWindowedStats(NUMA *nas, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv)
numaWindowedStats()
Definition: numafunc2.c:531
l_ok numaGetParameters(NUMA *na, l_float32 *pstartx, l_float32 *pdelx)
numaGetParameters()
Definition: numabasic.c:936
NUMA * numaMakeHistogramAuto(NUMA *na, l_int32 maxbins)
numaMakeHistogramAuto()
Definition: numafunc2.c:991
l_ok numaHistogramGetValFromRank(NUMA *na, l_float32 rank, l_float32 *prval)
numaHistogramGetValFromRank()
Definition: numafunc2.c:1627
Definition: array.h:71
NUMA * numaCopy(NUMA *na)
numaCopy()
Definition: numabasic.c:394
l_ok numaEvalBestHaarParameters(NUMA *nas, l_float32 relweight, l_int32 nwidth, l_int32 nshift, l_float32 minwidth, l_float32 maxwidth, l_float32 *pbestwidth, l_float32 *pbestshift, l_float32 *pbestscore)
numaEvalBestHaarParameters()
Definition: numafunc2.c:2955
l_ok numaGetHistogramStatsOnInterval(NUMA *nahisto, l_float32 startx, l_float32 deltax, l_int32 ifirst, l_int32 ilast, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance)
numaGetHistogramStatsOnInterval()
Definition: numafunc2.c:1394
NUMA * numaAddBorder(NUMA *nas, l_int32 left, l_int32 right, l_float32 val)
numaAddBorder()
Definition: numafunc1.c:832
l_ok numaMakeRankFromHistogram(l_float32 startx, l_float32 deltax, NUMA *nasy, l_int32 npts, NUMA **pnax, NUMA **pnay)
numaMakeRankFromHistogram()
Definition: numafunc2.c:1488
void numaDestroy(NUMA **pna)
numaDestroy()
Definition: numabasic.c:360
NUMA * numaCrossingsByThreshold(NUMA *nax, NUMA *nay, l_float32 thresh)
numaCrossingsByThreshold()
Definition: numafunc2.c:2779
l_ok numaGetMin(NUMA *na, l_float32 *pminval, l_int32 *piminloc)
numaGetMin()
Definition: numafunc1.c:444
NUMA * numaTransform(NUMA *nas, l_float32 shift, l_float32 scale)
numaTransform()
Definition: numafunc2.c:409
l_ok numaGetRankBinValues(NUMA *na, l_int32 nbins, NUMA **pnarbin, NUMA **pnam)
numaGetRankBinValues()
Definition: numafunc2.c:1855
l_ok numaHistogramGetRankFromVal(NUMA *na, l_float32 rval, l_float32 *prank)
numaHistogramGetRankFromVal()
Definition: numafunc2.c:1556
NUMA * numaWindowedMean(NUMA *nas, l_int32 wc)
numaWindowedMean()
Definition: numafunc2.c:582
l_float32 * numaGetFArray(NUMA *na, l_int32 copyflag)
numaGetFArray()
Definition: numabasic.c:865
l_int32 numaaGetCount(NUMAA *naa)
numaaGetCount()
Definition: numabasic.c:1550
l_int32 numaaGetNumaCount(NUMAA *naa, l_int32 index)
numaaGetNumaCount()
Definition: numabasic.c:1568
Definition: pix.h:718
Definition: pix.h:719
l_ok grayHistogramsToEMD(NUMAA *naa1, NUMAA *naa2, NUMA **pnad)
grayHistogramsToEMD()
Definition: numafunc2.c:2112
l_ok numaWindowedVariance(NUMA *nam, NUMA *nams, NUMA **pnav, NUMA **pnarv)
numaWindowedVariance()
Definition: numafunc2.c:710
NUMA * numaNormalizeHistogram(NUMA *nas, l_float32 tsum)
numaNormalizeHistogram()
Definition: numafunc2.c:1172
l_ok numaGetMax(NUMA *na, l_float32 *pmaxval, l_int32 *pimaxloc)
numaGetMax()
Definition: numafunc1.c:486
l_ok grayInterHistogramStats(NUMAA *naa, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv)
grayInterHistogramStats()
Definition: numafunc2.c:2269
l_ok numaEvalHaarSum(NUMA *nas, l_float32 width, l_float32 shift, l_float32 relweight, l_float32 *pscore)
numaEvalHaarSum()
Definition: numafunc2.c:3040
l_ok gplotSimple1(NUMA *na, l_int32 outformat, const char *outroot, const char *title)
gplotSimple1()
Definition: gplot.c:575
l_ok numaGetStatsUsingHistogram(NUMA *na, l_int32 maxbins, l_float32 *pmin, l_float32 *pmax, l_float32 *pmean, l_float32 *pvariance, l_float32 *pmedian, l_float32 rank, l_float32 *prval, NUMA **phisto)
numaGetStatsUsingHistogram()
Definition: numafunc2.c:1254