::: 강좌/소스/문서 :::

강좌/소스/문서 성격에 맞지 않는 광고,비방,질문의 글은 즉시 삭제하며
내용을 복사하여 사용할 경우 반드시 이곳(http://www.howto.pe.kr)을 출처로 명시하여 주세요


Category

  김영대(2003-07-29 13:38:59, Hit : 6202, Vote : 1383
 [소스] String 해쉬(hash) 함수

/*
* hash_str.c,v 2.1 1992/08/11 00:29:20 pete Exp
* hash_str.c,v
* Revision 2.1  1992/08/11  00:29:20  pete
* Added proper RCS log and id entries.
*
*/

#include        <stdlib.h>
#if defined(DBUG_NEW) && !defined(_DBUG_NEW_H_)
#include "dbug_new.h"
#endif


/*
* From "Fast Hashing of Variable Length Text Strings" by Peter K. Pearson,
* June 1990, Communications of the ACM, pp 677-680.  This implementation
* uses the variation to achieve a 16 bit hash index instead of the 8 bit as
* specified in the algorithm.
*
* The advantages of the algorithm are:
*        - No restriction is placed on the length of the text string.
*        - The length of the string need not be known before hand.
*        - Very little arithmetic is performed on each character being
*          hashed.
*        - Similar strings are not likely to collide
*        - Minimal, perfect hashing functions can be built in this form.
*
* Applying the test case to /usr/lib/dict/words produced 6871 collisions
* over 23739 entries vs. the more typical hash function (hash_simple) which
* produced 14665 collions over 23739 entries.  On the other hand, smaller
* lists of words (such as the passwd file with 136 entries), had similiar
* number of collisions (~90 with a table size of 50).
*/

/*
* The psuedo random permutation of integers from 0-255 (Table 1 in the
* article.
*/

#define ARRAY_SIZE 256
static int table[ARRAY_SIZE] =
{
1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 249, 230, 44, 163,
14, 197, 213, 181, 161, 85, 218, 80, 64, 239, 24, 226, 236, 142, 38, 200,
110, 177, 104, 103, 141, 253, 255, 50, 77, 101, 81, 18, 45, 96, 31, 222,
25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20,214, 244, 227, 149, 235,
97, 234, 57, 22, 60, 250, 82, 175, 208, 5, 127, 199, 111, 62, 135, 248,
174, 169, 211, 58, 66, 154, 106, 195, 245, 171, 17, 187, 182, 179, 0, 243,
132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13, 153, 246, 216, 219,
119, 68, 223, 78, 83, 88, 201, 99, 122, 11, 92, 32, 136, 114, 52, 10,
138, 30, 48, 183, 156, 35, 61, 26, 143, 74, 251, 94, 129, 162, 63, 152,
170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220, 90, 53, 23, 131,
125, 173, 15, 238, 79, 95, 89, 16, 105, 137, 225, 224, 217, 160, 37, 123,
118, 73, 2, 157, 46, 116, 9, 145, 134, 228, 207, 212, 202, 215, 69, 229,
27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247, 19, 205, 39, 203,
233, 40, 186, 147, 198, 192, 155, 33, 164, 191, 98, 204, 165, 180, 117, 76,
140, 36, 210, 172, 41, 54, 159, 8, 185, 232, 113, 196, 231, 47, 146, 120,
51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71, 109, 184, 209
};

static int
calc_hash (const char *str, int offset)
{
        int        value;

        if (!str)
                return 0;
        /*
         * Determine the initial value (i.e. use the offset).  This is no
         * different if the offset == 0.
         */
        value = table[*str++ + offset];

        /*
         * Now calculate the hash value
         */
        while (*str) {
                value = table[value ^ *str++];
        }
        return value;
}

int
hash_str (const char *str)
{
        if (str && *str)
                return (calc_hash (str, 0) << 8) | calc_hash (str, 1);
        else if (str)
                return *str;
        else
                return 0;
}


#ifdef TEST
static int
hash_simple (const char *str)
{
        int        value = 0;
        int        i;

        if (!str)
                return 0;
        for (i = 0; *str; i++) {
                value ^= (*str++ << ((i & 3)*8));
        }
        return value;
}

/*
* Let's use /usr/lib/dict/words to verify how good this hash function
* really is.
*/

main (int argc, char **argv)
{
        FILE                *fptr;
        char                *file = "/usr/lib/dict/words";
        int                index;                /* index of hash values */
        int                count;                /* number of words */
        int                use_trivial = 0; /* use simple hash function flag */
        int                collisions;        /* how many collisions */
        int                *results;        /* where they are distributed */
        char                buf[BUFSIZ];        /* buffer to read word into */
        int                i;
        int                table_size = 1 << 15;
        int                opt;
        extern char        *optarg;

        while ((opt = getopt (argc, argv, "f:s:t")) != EOF) {
                switch (opt) {
                case 'f':
                        file = optarg;
                        break;
                case 's':
                        table_size = atoi (optarg);
                        break;
                case 't':
                        use_trivial = 1;
                        break;
                }
        }

        if ((fptr = fopen (file, "r")) == NULL) {
                perror (file);
                exit (1);
        }

        results = (int *) malloc (sizeof (int) * table_size);
        for (i = 0; i < table_size; i++) {
                results[i] = 0;
        }

        count = 0;
        collisions = 0;
        while (fgets (buf, sizeof (buf), fptr) != NULL) {
                if ((count % 500 ) == 0) {
                        printf (".");
                        fflush (stdout);
                }
                ++count;
                if (use_trivial)
                        index = hash_simple (buf) % table_size;
                else
                        index = hash_str (buf) % table_size;
                if (results[index]) {
                        ++collisions;
                }
                ++results[index];
        }

        printf ("n%d entries, %d collisionsn", count, collisions);
}

/*
* A simple function to check that the above array was typed in (yuch)
* correctly.  The array is checked to make sure are values are between
* 0, 255; there are no duplicates; there are only 256 entries; all 256
* values are used.  If there are any errors, the table is printed out (in
* the same format as the one on page 678.
*/
check_table ()
{
        int        i;
        int        array[ARRAY_SIZE];
        int        err = 0;
        
        /* initialize the array */

        for (i = 0; i < ARRAY_SIZE; i++)
                array[i] = -1;

        for (i = 0; i < ARRAY_SIZE; i++) {
                if (table[i] < 0 || table[i] >= ARRAY_SIZE) {
                        ++err;
                        printf ("table[%d] = %d (out of bounds)n", i, table[i]);
                        continue;
                }
                if (array[table[i]] != -1) {
                        ++err;
                        printf ("table[%d] == table[%d] == %dn", i, array[i],
                                table[i]);
                }
                array[table[i]] = i;
        }

        /*
         * Make sure everything is used
         */
        if (err == 0) {
                for (i = 0; i < ARRAY_SIZE; i++)
                        if (array[i] == -1) {
                                printf ("%d not in tablen", i);
                                ++err;
                        }
        }

        /*
         * Print out the table if there are any errors
         */

        if (err) {
                for (i = 0; i < ARRAY_SIZE; i++) {
                        printf ("%3d ", table[i]);
                        if ((i + 1) % 16 == 0)
                                printf ("n");
                }
        }
        fprintf (stderr, "%d errorsn", err);
        exit (err == 0?0:1);
}
#endif /* TEST */





62   [Unix/Linux] [강좌] 인텔 센트리노 무선랜 카드: ndiswrapper  김영대 2004/06/27 10841 1932
61   [Unix/Linux] [소스] top for System V Release 4, Intel or Sparc CPU  김영대 2004/02/20 6483 1121
60   [Unix/Linux] [소스] top for SunOS 5.x (Solaris 2.x)  김영대 2004/02/20 8737 1166
  [Unix/Linux] [소스] String 해쉬(hash) 함수  김영대 2003/07/29 6202 1383
58   [Unix/Linux] [강좌] 웹로그분석을 위한 Webalizer + GDlib + PNGlib + Zlib 설치  김영대 2003/05/04 7027 1344
57   [Unix/Linux] [강좌] Apache + MySQL + PHP4 + Zend Optimizer 설치  김영대 2003/04/15 6083 1320
56   [Unix/Linux] [System V IPC] shmop() function - 공유메모리  김영대 2003/03/17 5640 1376
55   [Unix/Linux] [System V IPC] shmctl() function - 공유메모리  김영대 2003/03/17 5659 1526
54   [Unix/Linux] [System V IPC] shmget() function - 공유메모리  김영대 2003/03/17 5997 1504
53   [Unix/Linux] [System V IPC] semop() function - 세마포어  김영대 2003/03/17 4873 1286
52   [Unix/Linux] [System V IPC] semctl() function - 세마포어  김영대 2003/03/17 4545 1253
51   [Unix/Linux] [System V IPC] semget() function - 세마포어  김영대 2003/03/17 5636 1508
50   [Unix/Linux] [System V IPC] msgget() function - 메세지큐  김영대 2003/03/17 5550 1423
49   [Unix/Linux] [System V IPC] msgctl() function - 메세지큐  김영대 2003/03/17 5636 1319
48   [Unix/Linux] [System V IPC] msgrcv() function - 메세지큐  김영대 2003/03/17 6052 1550
47   [Unix/Linux] [System V IPC] msgsnd() function - 메세지큐  김영대 2003/03/17 7395 1489
46   [Unix/Linux] [POSIX IPC] 생산자/소비자 - 세마포어  김영대 2003/03/17 7017 1452
45   [Unix/Linux] [POSIX IPC] mq_receive() function - 메세지큐  김영대 2003/03/17 6499 1589
44   [Unix/Linux] [POSIX IPC] mq_send() function - 메세지큐  김영대 2003/03/17 7626 1507
43   [Unix/Linux] [Thread] pthread_cond() function  김영대 2003/03/17 5783 1544

1 [2][3][4]
 

Copyright 1999-2017 Zeroboard / skin by zero