Leptonica  1.77.0
Image processing and image analysis suite
dnahash.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
103 #include "allheaders.h"
104 
105 /*--------------------------------------------------------------------------*
106  * Dna hash: Creation and destruction *
107  *--------------------------------------------------------------------------*/
121 L_DNAHASH *
122 l_dnaHashCreate(l_int32 nbuckets,
123  l_int32 initsize)
124 {
125 L_DNAHASH *dahash;
126 
127  PROCNAME("l_dnaHashCreate");
128 
129  if (nbuckets <= 0)
130  return (L_DNAHASH *)ERROR_PTR("negative hash size", procName, NULL);
131  if ((dahash = (L_DNAHASH *)LEPT_CALLOC(1, sizeof(L_DNAHASH))) == NULL)
132  return (L_DNAHASH *)ERROR_PTR("dahash not made", procName, NULL);
133  if ((dahash->dna = (L_DNA **)LEPT_CALLOC(nbuckets, sizeof(L_DNA *)))
134  == NULL) {
135  LEPT_FREE(dahash);
136  return (L_DNAHASH *)ERROR_PTR("dna ptr array not made", procName, NULL);
137  }
138 
139  dahash->nbuckets = nbuckets;
140  dahash->initsize = initsize;
141  return dahash;
142 }
143 
144 
151 void
153 {
154 L_DNAHASH *dahash;
155 l_int32 i;
156 
157  PROCNAME("l_dnaHashDestroy");
158 
159  if (pdahash == NULL) {
160  L_WARNING("ptr address is NULL!\n", procName);
161  return;
162  }
163 
164  if ((dahash = *pdahash) == NULL)
165  return;
166 
167  for (i = 0; i < dahash->nbuckets; i++)
168  l_dnaDestroy(&dahash->dna[i]);
169  LEPT_FREE(dahash->dna);
170  LEPT_FREE(dahash);
171  *pdahash = NULL;
172 }
173 
174 
175 /*--------------------------------------------------------------------------*
176  * Dna hash: Accessors and modifiers *
177  *--------------------------------------------------------------------------*/
184 l_int32
186 {
187 
188  PROCNAME("l_dnaHashGetCount");
189 
190  if (!dahash)
191  return ERROR_INT("dahash not defined", procName, 0);
192  return dahash->nbuckets;
193 }
194 
195 
202 l_int32
204 {
205 l_int32 i, n;
206 L_DNA *da;
207 
208  PROCNAME("l_dnaHashGetTotalCount");
209 
210  if (!dahash)
211  return ERROR_INT("dahash not defined", procName, 0);
212 
213  for (i = 0, n = 0; i < dahash->nbuckets; i++) {
214  da = l_dnaHashGetDna(dahash, i, L_NOCOPY);
215  if (da)
216  n += l_dnaGetCount(da);
217  }
218 
219  return n;
220 }
221 
222 
231 L_DNA *
233  l_uint64 key,
234  l_int32 copyflag)
235 {
236 l_int32 bucket;
237 L_DNA *da;
238 
239  PROCNAME("l_dnaHashGetDna");
240 
241  if (!dahash)
242  return (L_DNA *)ERROR_PTR("dahash not defined", procName, NULL);
243  bucket = key % dahash->nbuckets;
244  da = dahash->dna[bucket];
245  if (da) {
246  if (copyflag == L_NOCOPY)
247  return da;
248  else if (copyflag == L_COPY)
249  return l_dnaCopy(da);
250  else
251  return l_dnaClone(da);
252  }
253  else
254  return NULL;
255 }
256 
257 
266 l_ok
268  l_uint64 key,
269  l_float64 value)
270 {
271 l_int32 bucket;
272 L_DNA *da;
273 
274  PROCNAME("l_dnaHashAdd");
275 
276  if (!dahash)
277  return ERROR_INT("dahash not defined", procName, 1);
278  bucket = key % dahash->nbuckets;
279  da = dahash->dna[bucket];
280  if (!da) {
281  if ((da = l_dnaCreate(dahash->initsize)) == NULL)
282  return ERROR_INT("da not made", procName, 1);
283  dahash->dna[bucket] = da;
284  }
285  l_dnaAddNumber(da, value);
286  return 0;
287 }
288 
289 
290 /*--------------------------------------------------------------------------*
291  * DnaHash: Operations on Dna *
292  *--------------------------------------------------------------------------*/
305 L_DNAHASH *
307 {
308 l_int32 i, n;
309 l_uint32 nsize;
310 l_uint64 key;
311 l_float64 val;
312 L_DNAHASH *dahash;
313 
314  PROCNAME("l_dnaHashCreateFromDna");
315 
316  if (!da)
317  return (L_DNAHASH *)ERROR_PTR("da not defined", procName, NULL);
318 
319  n = l_dnaGetCount(da);
320  findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */
321 
322  dahash = l_dnaHashCreate(nsize, 8);
323  for (i = 0; i < n; i++) {
324  l_dnaGetDValue(da, i, &val);
325  l_hashFloat64ToUint64(nsize, val, &key);
326  l_dnaHashAdd(dahash, key, (l_float64)i);
327  }
328 
329  return dahash;
330 }
331 
332 
349 l_ok
351  L_DNA **pdad,
352  L_DNAHASH **pdahash)
353 {
354 l_int32 i, n, index, items;
355 l_uint32 nsize;
356 l_uint64 key;
357 l_float64 val;
358 L_DNA *dad;
359 L_DNAHASH *dahash;
360 
361  PROCNAME("l_dnaRemoveDupsByHash");
362 
363  if (pdahash) *pdahash = NULL;
364  if (!pdad)
365  return ERROR_INT("&dad not defined", procName, 1);
366  *pdad = NULL;
367  if (!das)
368  return ERROR_INT("das not defined", procName, 1);
369 
370  n = l_dnaGetCount(das);
371  findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */
372  dahash = l_dnaHashCreate(nsize, 8);
373  dad = l_dnaCreate(n);
374  *pdad = dad;
375  for (i = 0, items = 0; i < n; i++) {
376  l_dnaGetDValue(das, i, &val);
377  l_dnaFindValByHash(dad, dahash, val, &index);
378  if (index < 0) { /* not found */
379  l_hashFloat64ToUint64(nsize, val, &key);
380  l_dnaHashAdd(dahash, key, (l_float64)items);
381  l_dnaAddNumber(dad, val);
382  items++;
383  }
384  }
385 
386  if (pdahash)
387  *pdahash = dahash;
388  else
389  l_dnaHashDestroy(&dahash);
390  return 0;
391 }
392 
393 
420 l_ok
422  L_DNAHASH **pdahash,
423  L_DNA **pdav,
424  L_DNA **pdac)
425 {
426 l_int32 i, n, nitems, index, count;
427 l_uint32 nsize;
428 l_uint64 key;
429 l_float64 val;
430 L_DNA *dac, *dav;
431 L_DNAHASH *dahash;
432 
433  PROCNAME("l_dnaMakeHistoByHash");
434 
435  if (pdahash) *pdahash = NULL;
436  if (pdac) *pdac = NULL;
437  if (pdav) *pdav = NULL;
438  if (!pdahash || !pdac || !pdav)
439  return ERROR_INT("&dahash, &dac, &dav not all defined", procName, 1);
440  if (!das)
441  return ERROR_INT("das not defined", procName, 1);
442  if ((n = l_dnaGetCount(das)) == 0)
443  return ERROR_INT("no data in das", procName, 1);
444 
445  findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */
446  dahash = l_dnaHashCreate(nsize, 8);
447  dac = l_dnaCreate(n); /* histogram */
448  dav = l_dnaCreate(n); /* the values */
449  for (i = 0, nitems = 0; i < n; i++) {
450  l_dnaGetDValue(das, i, &val);
451  /* Is this value already stored in dav? */
452  l_dnaFindValByHash(dav, dahash, val, &index);
453  if (index >= 0) { /* found */
454  l_dnaGetIValue(dac, (l_float64)index, &count);
455  l_dnaSetValue(dac, (l_float64)index, count + 1);
456  } else { /* not found */
457  l_hashFloat64ToUint64(nsize, val, &key);
458  l_dnaHashAdd(dahash, key, (l_float64)nitems);
459  l_dnaAddNumber(dav, val);
460  l_dnaAddNumber(dac, 1);
461  nitems++;
462  }
463  }
464 
465  *pdahash = dahash;
466  *pdac = dac;
467  *pdav = dav;
468  return 0;
469 }
470 
471 
484 L_DNA *
486  L_DNA *da2)
487 {
488 l_int32 n1, n2, nsmall, nbuckets, i, index1, index2;
489 l_uint32 nsize2;
490 l_uint64 key;
491 l_float64 val;
492 L_DNAHASH *dahash1, *dahash2;
493 L_DNA *da_small, *da_big, *dad;
494 
495  PROCNAME("l_dnaIntersectionByHash");
496 
497  if (!da1)
498  return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL);
499  if (!da2)
500  return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL);
501 
502  /* Put the elements of the biggest array into a dnahash */
503  n1 = l_dnaGetCount(da1);
504  n2 = l_dnaGetCount(da2);
505  da_small = (n1 < n2) ? da1 : da2; /* do not destroy da_small */
506  da_big = (n1 < n2) ? da2 : da1; /* do not destroy da_big */
507  dahash1 = l_dnaHashCreateFromDna(da_big);
508 
509  /* Build up the intersection of numbers. Add to %dad
510  * if the number is in da_big (using dahash1) but hasn't
511  * yet been seen in the traversal of da_small (using dahash2). */
512  dad = l_dnaCreate(0);
513  nsmall = l_dnaGetCount(da_small);
514  findNextLargerPrime(nsmall / 20, &nsize2); /* buckets in hash table */
515  dahash2 = l_dnaHashCreate(nsize2, 0);
516  nbuckets = l_dnaHashGetCount(dahash2);
517  for (i = 0; i < nsmall; i++) {
518  l_dnaGetDValue(da_small, i, &val);
519  l_dnaFindValByHash(da_big, dahash1, val, &index1);
520  if (index1 >= 0) { /* found */
521  l_dnaFindValByHash(da_small, dahash2, val, &index2);
522  if (index2 == -1) { /* not found */
523  l_dnaAddNumber(dad, val);
524  l_hashFloat64ToUint64(nbuckets, val, &key);
525  l_dnaHashAdd(dahash2, key, (l_float64)i);
526  }
527  }
528  }
529 
530  l_dnaHashDestroy(&dahash1);
531  l_dnaHashDestroy(&dahash2);
532  return dad;
533 }
534 
535 
552 l_ok
554  L_DNAHASH *dahash,
555  l_float64 val,
556  l_int32 *pindex)
557 {
558 l_int32 i, nbuckets, nvals, indexval;
559 l_float64 vali;
560 l_uint64 key;
561 L_DNA *da1;
562 
563  PROCNAME("l_dnaFindValByHash");
564 
565  if (!pindex)
566  return ERROR_INT("&index not defined", procName, 1);
567  *pindex = -1;
568  if (!da)
569  return ERROR_INT("da not defined", procName, 1);
570  if (!dahash)
571  return ERROR_INT("dahash not defined", procName, 1);
572 
573  nbuckets = l_dnaHashGetCount(dahash);
574  l_hashFloat64ToUint64(nbuckets, val, &key);
575  da1 = l_dnaHashGetDna(dahash, key, L_NOCOPY);
576  if (!da1) return 0;
577 
578  /* Run through da1, looking for this %val */
579  nvals = l_dnaGetCount(da1);
580  for (i = 0; i < nvals; i++) {
581  l_dnaGetIValue(da1, i, &indexval);
582  l_dnaGetDValue(da, indexval, &vali);
583  if (val == vali) {
584  *pindex = indexval;
585  return 0;
586  }
587  }
588 
589  return 0;
590 }
l_ok l_dnaRemoveDupsByHash(L_DNA *das, L_DNA **pdad, L_DNAHASH **pdahash)
l_dnaRemoveDupsByHash()
Definition: dnahash.c:350
L_DNA * l_dnaHashGetDna(L_DNAHASH *dahash, l_uint64 key, l_int32 copyflag)
l_dnaHashGetDna()
Definition: dnahash.c:232
l_int32 l_dnaGetCount(L_DNA *da)
l_dnaGetCount()
Definition: dnabasic.c:597
void l_dnaDestroy(L_DNA **pda)
l_dnaDestroy()
Definition: dnabasic.c:321
Definition: pix.h:716
void l_dnaHashDestroy(L_DNAHASH **pdahash)
l_dnaHashDestroy()
Definition: dnahash.c:152
struct L_Dna ** dna
Definition: array.h:108
l_int32 l_dnaHashGetTotalCount(L_DNAHASH *dahash)
l_dnaHashGetTotalCount()
Definition: dnahash.c:203
l_ok l_hashFloat64ToUint64(l_int32 nbuckets, l_float64 val, l_uint64 *phash)
l_hashFloat64ToUint64()
Definition: utils1.c:665
l_ok l_dnaGetIValue(L_DNA *da, l_int32 index, l_int32 *pival)
l_dnaGetIValue()
Definition: dnabasic.c:693
Definition: array.h:83
l_ok findNextLargerPrime(l_int32 start, l_uint32 *pprime)
findNextLargerPrime()
Definition: utils1.c:689
L_DNA * l_dnaClone(L_DNA *da)
l_dnaClone()
Definition: dnabasic.c:389
l_ok l_dnaAddNumber(L_DNA *da, l_float64 val)
l_dnaAddNumber()
Definition: dnabasic.c:439
l_int32 l_dnaHashGetCount(L_DNAHASH *dahash)
l_dnaHashGetCount()
Definition: dnahash.c:185
l_ok l_dnaMakeHistoByHash(L_DNA *das, L_DNAHASH **pdahash, L_DNA **pdav, L_DNA **pdac)
l_dnaMakeHistoByHash()
Definition: dnahash.c:421
l_ok l_dnaHashAdd(L_DNAHASH *dahash, l_uint64 key, l_float64 value)
l_dnaHashAdd()
Definition: dnahash.c:267
l_ok l_dnaFindValByHash(L_DNA *da, L_DNAHASH *dahash, l_float64 val, l_int32 *pindex)
l_dnaFindValByHash()
Definition: dnahash.c:553
L_DNAHASH * l_dnaHashCreateFromDna(L_DNA *da)
l_dnaHashCreateFromDna()
Definition: dnahash.c:306
l_ok l_dnaGetDValue(L_DNA *da, l_int32 index, l_float64 *pval)
l_dnaGetDValue()
Definition: dnabasic.c:658
l_ok l_dnaSetValue(L_DNA *da, l_int32 index, l_float64 val)
l_dnaSetValue()
Definition: dnabasic.c:725
L_DNA * l_dnaCreate(l_int32 n)
l_dnaCreate()
Definition: dnabasic.c:169
Definition: pix.h:718
l_int32 initsize
Definition: array.h:107
L_DNA * l_dnaIntersectionByHash(L_DNA *da1, L_DNA *da2)
l_dnaIntersectionByHash()
Definition: dnahash.c:485
L_DNA * l_dnaCopy(L_DNA *da)
l_dnaCopy()
Definition: dnabasic.c:360
L_DNAHASH * l_dnaHashCreate(l_int32 nbuckets, l_int32 initsize)
l_dnaHashCreate()
Definition: dnahash.c:122