diff --git a/README b/README deleted file mode 100644 index a4a1f69..0000000 --- a/README +++ /dev/null @@ -1,10 +0,0 @@ -This is a simple C hashmap, using strings for the keys. - -Originally based on code by Eliot Back at http://elliottback.com/wp/hashmap-implementation-in-c/ -Reworked by Pete Warden - http://petewarden.typepad.com/searchbrowser/2010/01/c-hashmap.html - -main.c contains an example that tests the functionality of the hashmap module. -To compile it, run something like this on your system: -gcc main.c hashmap.c -o hashmaptest - -There are no restrictions on how you reuse this code. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..677b847 --- /dev/null +++ b/README.md @@ -0,0 +1,39 @@ +This is a simple C hashmap, using strings for the keys. + +Originally based on code by Eliot Back at http://elliottback.com/wp/hashmap-implementation-in-c/ + +Reworked by Pete Warden - http://petewarden.typepad.com/searchbrowser/2010/01/c-hashmap.html + +Zaks Wang + + 1.fix bug that put same key the map value will increase + + 2.add feature that you can change hash function + + You can chose SIMPLE_HASH RS_HASH JS_HASH PJW_HASH ELF_HASH BKDR_HASH DJB_HASH AP_HASH + CRC_HAHS + +main.c contains an example that tests the functionality of the hashmap module. + +To compile it, run something like this on your system: + +gcc main.c hashmap.c hash.c -o hashmaptest + +There are no restrictions on how you reuse this code. + +hash_func_test +############## + + 一个字符串hash函数的评测,原文http://blog.csdn.net/liuben/article/details/5050697 + 实际语料测试结果,BKDR_HASH远远高于其他HASH函数,其次是AP_HASH + 如果冲突,建议将MAX_CHAIN_LENGTH设置稍大 + +hashMap +####### + + cheungmine修改版hashmap http://blog.csdn.net/cheungmine/article/details/7704686 + + +待解决的问题: + 仅仅一个数组保存pair的指针,当分配8亿多长度的数组时候,内存会不够,可以分为多段数组, + 再用一个hash解决在多个数组间跳跃问题。有时间再改! diff --git a/hash.c b/hash.c new file mode 100644 index 0000000..825439b --- /dev/null +++ b/hash.c @@ -0,0 +1,183 @@ +#include +#include "hash.h" + +/* A Simple Hash Function */ +unsigned int simple_hash(char *str) +{ + register unsigned int hash; + register unsigned char *p; + + for(hash = 0, p = (unsigned char *)str; *p ; p++) + hash = 31 * hash + *p; + + return (hash & 0x7FFFFFFF); +} + +/* RS Hash Function */ +unsigned int RS_hash(char *str) +{ + unsigned int b = 378551; + unsigned int a = 63689; + unsigned int hash = 0; + + while (*str) + { + hash = hash * a + (*str++); + a *= b; + } + + return (hash & 0x7FFFFFFF); +} + +/* JS Hash Function */ +unsigned int JS_hash(char *str) +{ + unsigned int hash = 1315423911; + + while (*str) + { + hash ^= ((hash << 5) + (*str++) + (hash >> 2)); + } + + return (hash & 0x7FFFFFFF); +} + +/* P. J. Weinberger Hash Function */ +unsigned int PJW_hash(char *str) +{ + unsigned int BitsInUnignedInt = (unsigned int)(sizeof(unsigned int) * 8); + unsigned int ThreeQuarters = (unsigned int)((BitsInUnignedInt * 3) / 4); + unsigned int OneEighth = (unsigned int)(BitsInUnignedInt / 8); + + unsigned int HighBits = (unsigned int)(0xFFFFFFFF) << (BitsInUnignedInt - OneEighth); + unsigned int hash = 0; + unsigned int test = 0; + + while (*str) + { + hash = (hash << OneEighth) + (*str++); + if ((test = hash & HighBits) != 0) + { + hash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits)); + } + } + + return (hash & 0x7FFFFFFF); +} + +/* ELF Hash Function */ +unsigned int ELF_hash(char *str) +{ + unsigned int hash = 0; + unsigned int x = 0; + + while (*str) + { + hash = (hash << 4) + (*str++); + if ((x = hash & 0xF0000000L) != 0) + { + hash ^= (x >> 24); + hash &= ~x; + } + } + + return (hash & 0x7FFFFFFF); +} + +/* BKDR Hash Function */ +unsigned int BKDR_hash(char *str) +{ + unsigned int seed = 131; // 31 131 1313 13131 131313 etc.. + unsigned int hash = 0; + + while (*str) + { + hash = hash * seed + (*str++); + } + + return (hash & 0x7FFFFFFF); +} + +/* SDBM Hash Function */ +unsigned int SDBM_hash(char *str) +{ + unsigned int hash = 0; + + while (*str) + { + hash = (*str++) + (hash << 6) + (hash << 16) - hash; + } + + return (hash & 0x7FFFFFFF); +} + +/* DJB Hash Function */ +unsigned int DJB_hash(char *str) +{ + unsigned int hash = 5381; + + while (*str) + { + hash += (hash << 5) + (*str++); + } + + return (hash & 0x7FFFFFFF); +} + +/* AP Hash Function */ +unsigned int AP_hash(char *str) +{ + unsigned int hash = 0; + int i; + for (i=0; *str; i++) + { + if ((i & 1) == 0) + { + hash ^= ((hash << 7) ^ (*str++) ^ (hash >> 3)); + } + else + { + hash ^= (~((hash << 11) ^ (*str++) ^ (hash >> 5))); + } + } + + return (hash & 0x7FFFFFFF); +} + +/* CRC Hash Function */ +unsigned int CRC_hash(char *str) +{ + unsigned int nleft = strlen(str); + unsigned long long sum = 0; + unsigned short int *w = (unsigned short int *)str; + unsigned short int answer = 0; + + /* + * Our algorithm is simple, using a 32 bit accumulator (sum), we add + * sequential 16 bit words to it, and at the end, fold back all the + * carry bits from the top 16 bits into the lower 16 bits. + */ + while ( nleft > 1 ) { + sum += *w++; + nleft -= 2; + } + /* + * mop up an odd byte, if necessary + */ + if ( 1 == nleft ) { + *( unsigned char * )( &answer ) = *( unsigned char * )w ; + sum += answer; + } + /* + * add back carry outs from top 16 bits to low 16 bits + * add hi 16 to low 16 + */ + sum = ( sum >> 16 ) + ( sum & 0xFFFF ); + /* add carry */ + sum += ( sum >> 16 ); + /* truncate to 16 bits */ + answer = ~sum; + + return (answer & 0xFFFFFFFF); +} + diff --git a/hash.h b/hash.h new file mode 100644 index 0000000..3ba95b6 --- /dev/null +++ b/hash.h @@ -0,0 +1,42 @@ +#ifndef _HASH_H +#define _HASH_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* A Simple Hash Function */ +unsigned int simple_hash(char *str); + +/* RS Hash Function */ +unsigned int RS_hash(char *str); + +/* JS Hash Function */ +unsigned int JS_hash(char *str); + +/* P. J. Weinberger Hash Function */ +unsigned int PJW_hash(char *str); + +/* ELF Hash Function */ +unsigned int ELF_hash(char *str); + +/* BKDR Hash Function */ +unsigned int BKDR_hash(char *str); + +/* SDBM Hash Function */ +unsigned int SDBM_hash(char *str); + +/* DJB Hash Function */ +unsigned int DJB_hash(char *str); + +/* AP Hash Function */ +unsigned int AP_hash(char *str); + +/* CRC Hash Function */ +unsigned int CRC_hash(char *str); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/hashMap/README b/hashMap/README new file mode 100644 index 0000000..47fe459 --- /dev/null +++ b/hashMap/README @@ -0,0 +1,2 @@ +作者:cheungmine +网址:http://blog.csdn.net/cheungmine/article/details/7704686 diff --git a/hashMap/hashmap.c b/hashMap/hashmap.c new file mode 100644 index 0000000..9445e2f --- /dev/null +++ b/hashMap/hashmap.c @@ -0,0 +1,397 @@ +/** + * hashmap.c + * Generic hash map implementation. + */ +#include "hashmap.h" + +#include +#include +#include + +#define HMAP_INITIAL_SIZE (256) +#define HMAP_CHAIN_LENGTH (8) + +typedef enum _use_state { + unused_0 = 0, + used_1 = 1 +} use_state; + +/* A element to keep keys and values */ +typedef struct _hashmap_elem_t{ + char *key; /* pointer to actual key storage */ + use_state used; /* unused_0, used_1 */ + void_ptr data; /* pointer to value memory allocated by callee */ +} hashmap_elem_t; + +/* A hashmap has maximum size and current size, as well as the elems to hold */ +typedef struct _hashmap_map_t{ + int table_size; + int size; + hashmap_elem_t *elems; +} hashmap_map_t; + +/** + * The implementation here was originally done by Gary S. Brown. + * I have borrowed the tables directly, and made some minor changes. + * + * COPYRIGHT (C) 1986 Gary S. Brown. + * You may use this program, or code or tables extracted from it, + * as desired without restriction. + */ +static unsigned long crc32_tab[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; + +/** + * Return a 32-bit CRC of the contents of the buffer. + */ +static unsigned long crc32(const unsigned char *s, unsigned int len) +{ + unsigned int i; + unsigned long crc32val = 0; + for (i = 0; i < len; i ++) { + crc32val = crc32_tab[(crc32val ^ s[i]) & 0xff] ^ (crc32val >> 8); + } + return crc32val; +} + +/** + * Hashing function for a string + */ +static unsigned int _find_hash_index(hashmap_map_t * m, const char* keystring){ + unsigned long key = crc32(keystring, strlen(keystring)); + + /* Robert Jenkins' 32 bit Mix Function */ + key += (key << 12); + key ^= (key >> 22); + key += (key << 4); + key ^= (key >> 9); + key += (key << 10); + key ^= (key >> 2); + key += (key << 7); + key ^= (key >> 12); + + /* Knuth's Multiplicative Method */ + key = (key >> 3) * 0x9E3779B1; + + return key % m->table_size; +} + +/** + * Return the integer of the location in data to store the point to the item, + * or HMAP_E_OVERFLOW. + */ +static int _hashmap_hash(hmap_t in, char* key){ + int curr; + int i; + hashmap_elem_t *elem; + hashmap_map_t *m = (hashmap_map_t *) in; + + /* If full, return immediately */ + if (m->size >= (m->table_size/2)) { + return HMAP_E_OVERFLOW; + } + + /* Find the best index */ + curr = _find_hash_index(m, key); + + /* Linear probing */ + for (i = 0; i< HMAP_CHAIN_LENGTH; i++) { + elem = m->elems + curr; + if(elem->used == unused_0) { + return curr; + } + + if(elem->used == used_1 && (!strcmp(elem->key, key))) { + return curr; + } + + curr = (curr + 1) % m->table_size; + } + return HMAP_E_OVERFLOW; +} + +/** + * Doubles the size of the hashmap, and rehashes all the elements + */ +static int _hashmap_rehash(hmap_t in){ + int i; + int old_size; + hashmap_elem_t *curr; + hashmap_elem_t *elem; + + /* Setup the new elements */ + hashmap_map_t *m = (hashmap_map_t *) in; + hashmap_elem_t *temp = (hashmap_elem_t *) calloc(2 * m->table_size, sizeof(hashmap_elem_t)); + if (!temp) { + return HMAP_E_OUTMEM; + } + + /* Update the array */ + curr = m->elems; + m->elems = temp; + + /* Update the size */ + old_size = m->table_size; + m->table_size = 2 * m->table_size; + m->size = 0; + + /* Rehash the elements */ + for (i = 0; i < old_size; i++){ + int status; + elem = curr + i; + if (elem->used == unused_0) { + continue; + } + status = hashmap_put(m, elem->key, elem->data); + if (status != HMAP_S_OK) { + return status; + } + } + + free(curr); + return HMAP_S_OK; +} + +/** + * Create an empty hashmap + */ +hmap_t hashmap_create() { + hashmap_map_t* m = (hashmap_map_t*) malloc(sizeof(hashmap_map_t)); + if (!m) { + exit(HMAP_E_OUTMEM); + } + + m->elems = (hashmap_elem_t*) calloc(HMAP_INITIAL_SIZE, sizeof(hashmap_elem_t)); + if (!m->elems) { + free(m); + exit(HMAP_E_OUTMEM); + } + + m->table_size = HMAP_INITIAL_SIZE; + m->size = 0; + + return m; +} + +/** + * Add a pair of key-value to the hashmap + */ +int hashmap_put(hmap_t in, char* key, void_ptr value){ + int index; + hashmap_map_t *m; + hashmap_elem_t *elem; + + m = (hashmap_map_t *) in; + + /* Find a place to put our value */ + index = _hashmap_hash(in, key); + while (index == HMAP_E_OVERFLOW) { + if (_hashmap_rehash(in) == HMAP_E_OUTMEM) { + return HMAP_E_OUTMEM; + } + index = _hashmap_hash(in, key); + } + + /* Set the elems */ + elem = m->elems + index; + if (elem->used == used_1) { + /* Find a repeated key */ + return HMAP_E_KEYUSED; + } + elem->data = value; + elem->key = key; /* only set to a reference */ + elem->used = used_1; + m->size++; + + return HMAP_S_OK; +} + +/** + * Get your pointer out of the hashmap with a key + */ +int hashmap_get(hmap_t in, const char* key, void_ptr *value){ + int curr; + int i; + hashmap_map_t *m; + hashmap_elem_t *elem; + + m = (hashmap_map_t *) in; + + /* Find element location */ + curr = _find_hash_index(m, key); + + /* Linear probing, if necessary */ + for (i = 0; ielems + curr; + if (elem->used == used_1) { + if (!strcmp(elem->key, key)) { + *value = (elem->data); + return HMAP_S_OK; + } + } + curr = (curr + 1) % m->table_size; + } + + *value = NULL; + return HMAP_E_NOTFOUND; +} + +/** + * Iterate the function parameter over each element in the hashmap. The + * additional void_ptr argument is passed to the function as its first + * argument and the hashmap element is the second. + */ +int hashmap_iterate(hmap_t in, hmap_callback_func fnIterValue, void_ptr arg) { + int i; + hashmap_elem_t *elem; + hashmap_map_t *m = (hashmap_map_t*) in; + + if (hashmap_size(m) <= 0) { + return HMAP_E_NOTFOUND; + } + + for (i = 0; i< m->table_size; i++) { + elem = m->elems+i; + if(elem->used == used_1) { + int status = fnIterValue(elem->data, arg); + if (status != HMAP_S_OK) { + return status; + } + } + } + return HMAP_S_OK; +} + +/** + * Remove an element with that key from the map + */ +int hashmap_remove(hmap_t in, char* key, void_ptr *outValue){ + int i, curr; + hashmap_map_t* m; + hashmap_elem_t *elem; + + /* Cast the hashmap */ + m = (hashmap_map_t *) in; + + if (outValue) { + *outValue = NULL; + } + + /* Find key */ + curr = _find_hash_index(m, key); + + /* Linear probing, if necessary */ + for (i = 0; ielems + curr; + if (elem->used == used_1){ + if (!strcmp(elem->key, key)){ + /* Blank out the fields */ + elem->used = unused_0; + elem->key = NULL; + if (outValue) { + *outValue = elem->data; + } + elem->data = NULL; + + /* Reduce the size */ + m->size--; + return HMAP_S_OK; + } + } + curr = (curr + 1) % m->table_size; + } + /* Data not found */ + return HMAP_E_NOTFOUND; +} + +/** + * Deallocate the hashmap + */ +void hashmap_destroy(hmap_t in, hmap_callback_func fnFreeValue, void_ptr arg){ + hashmap_elem_t *elem; + void_ptr data; + hashmap_map_t* m = (hashmap_map_t*) in; + + while (m->table_size-->0) { + elem = m->elems+(m->table_size); + if (elem->used == used_1) { + elem->used = unused_0; + elem->key = NULL; + data = elem->data; + elem->data = NULL; + + if (fnFreeValue) { + fnFreeValue(data, arg); + } + } + } + + free(m->elems); + free(m); +} + +/** + * Return the length of the hashmap + */ +int hashmap_size(hmap_t in){ + hashmap_map_t* m = (hashmap_map_t *) in; + if (m) { + return m->size; + } + else { + return 0; + } +} + diff --git a/hashMap/hashmap.h b/hashMap/hashmap.h new file mode 100644 index 0000000..0c24db9 --- /dev/null +++ b/hashMap/hashmap.h @@ -0,0 +1,87 @@ +/** + * hashmap.h + */ +#ifndef _HASHMAP_H_INCLUDED +#define _HASHMAP_H_INCLUDED + +#if defined(__cplusplus) +extern "C" { +#endif + +#define HMAP_E_KEYUSED (-5) /* Key already existed */ +#define HMAP_E_OUTMEM (-4) /* Out of Memory */ +#define HMAP_E_NOTFOUND (-3) /* No such element */ +#define HMAP_E_OVERFLOW (-2) /* Hashmap is full */ +#define HMAP_E_FAIL (-1) /* Hashmap api fail */ +#define HMAP_S_OK (0) /* Success */ + +/** + * void_ptr is a pointer. This allows you to put arbitrary structures in the hashmap. + */ +typedef void* void_ptr; + +/** + * hmap_t is a pointer to an internally maintained data structure. + * Clients of this package do not need to know how hashmaps are + * represented. They see and manipulate only hmap_t's. + */ +typedef void_ptr hmap_t; + +/** + * hmap_callback_func is a pointer to a function that can take two void_ptr arguments + * and return an integer. Returns status code.. + */ +typedef int (*hmap_callback_func)(void_ptr, void_ptr); + +/** + * prototype for map element type + */ +typedef struct _hmap_pair_t { + char *key; + void_ptr data; +} hmap_pair_t; + +/** + * Return an empty hashmap. Returns NULL if empty. + */ +extern hmap_t hashmap_create(); + +/** + * Iteratively call fn with argument (value, arg) for each element data + * in the hashmap. The function returns anything other than HMAP_S_OK + * the traversal is terminated. fn must not modify any hashmap functions. + */ +extern int hashmap_iterate(hmap_t in, hmap_callback_func fnIterValue, void_ptr arg); + +/** + * Add an element to the hashmap. + * Return HMAP_S_OK, HMAP_E_KEYUSED or HMAP_E_OUTMEM. + */ +extern int hashmap_put(hmap_t in, char* key, void_ptr elem); + +/** + * Get an element from the hashmap. Return HMAP_S_OK or HMAP_E_NOTFOUND. + */ +extern int hashmap_get(hmap_t in, const char* key, void_ptr *elem); + +/** + * Remove an element from the hashmap. Return HMAP_S_OK or HMAP_E_NOTFOUND. + */ +extern int hashmap_remove(hmap_t in, char* key, void_ptr *outValue); + +/** + * Free the hashmap + */ +extern void hashmap_destroy(hmap_t in, hmap_callback_func fnFreeValue, void_ptr arg); + +/** + * Get the current size of a hashmap + */ +extern int hashmap_size(hmap_t in); + +#if defined(__cplusplus) +} +#endif + +#endif /* _HASHMAP_H_INCLUDED */ + diff --git a/hashMap/main.c b/hashMap/main.c new file mode 100644 index 0000000..5e57b9c --- /dev/null +++ b/hashMap/main.c @@ -0,0 +1,117 @@ +/** + * main.c + * + * Detecting memory leaks only for windows . + * Place the following snippet where leak to be tested: + * #if defined(_CRTDBG_MAP_ALLOC) + * _CrtDumpMemoryLeaks(); + * #endif + */ +#if defined(WIN32) && defined(_DEBUG) + #ifndef _CRTDBG_MAP_ALLOC + #pragma message( __FILE__": _CRTDBG_MAP_ALLOC defined only for DEBUG on Win32." ) + #define _CRTDBG_MAP_ALLOC + #include + #include + #endif +#endif + +#include +#include +#include + +#include "hashmap.h" + +typedef struct userelem_t { + char key[20]; + char *value; +} userelem; + +typedef struct userdata_t { + char name[20]; + hmap_t map; /* userelem map */ +} userdata; + +static int iter_elem(void* elem, void *arg) { + userelem *el = (userelem *) elem; + printf("key=%s; value=%s\n", el->key, el->value); + return 0; +} + +static int free_elem(void* elem, void *arg) { + userelem *el = (userelem *) elem; + free(el->value); + free(el); + return 0; +} + +static int free_data(void* data, void *arg) { + userdata *dat = (userdata *) data; +  /* 删除整个子 map */ +  hashmap_destroy(dat->map, free_elem, 0); + free(dat); + return 0; +} + +int main(int argc, char* argv[]) +{ + hmap_t map; + userdata *dat; + userelem *el; + int ret, i, j; + + /* 创建 hashmap */ + map = hashmap_create(); + + /* 插入 hashmap 元素 */ + for (i=0; i<100; i++) { + dat = (userdata *)malloc(sizeof(userdata)); + + /* 创建子 hashmap */ + dat->map = hashmap_create(); + + /* 插入子 hashmap 元素 */ + for (j=0; j<10; j++) { + el = (userelem *)malloc(sizeof(userelem)); + sprintf(el->key, "%d", j); + + el->value = (char*) malloc(30); + sprintf(el->value, "%d", j+1000); + ret = hashmap_put(dat->map, el->key, el); + assert(ret==HMAP_S_OK); + } + + sprintf(dat->name, "%d", i); + ret = hashmap_put(map, dat->name, dat); + assert(ret==HMAP_S_OK); + } + + printf("hashmap_size: %d\n", hashmap_size(map)); + + /* 删除指定元素: key="10" */ + ret = hashmap_remove(map, "10", &dat); + assert(ret==HMAP_S_OK); + printf("hashmap_remove: name=%s. size=%d\n", dat->name, hashmap_size(map)); + hashmap_iterate(dat->map, iter_elem, 0); + free_data(dat, 0); + + /* 删除指定元素: key="11" */ + ret = hashmap_remove(map, "11", &dat); + assert(ret==HMAP_S_OK); + printf("hashmap_remove: name=%s. size=%d\n", dat->name, hashmap_size(map)); + hashmap_iterate(dat->map, iter_elem, 0); + free_data(dat, 0); + + /* 查询元素: key="99" */ + ret = hashmap_get(map, "99", &dat); + assert(ret==HMAP_S_OK); + printf("hashmap_get: name=%s. size=%d\n", dat->name, hashmap_size(map)); + hashmap_iterate(dat->map, iter_elem, 0); + + /* 删除整个 map */ + hashmap_destroy(map, free_data, 0); + + _CrtDumpMemoryLeaks(); + return 0; +} + diff --git a/hash_func_test/hash.c b/hash_func_test/hash.c new file mode 100644 index 0000000..825439b --- /dev/null +++ b/hash_func_test/hash.c @@ -0,0 +1,183 @@ +#include +#include "hash.h" + +/* A Simple Hash Function */ +unsigned int simple_hash(char *str) +{ + register unsigned int hash; + register unsigned char *p; + + for(hash = 0, p = (unsigned char *)str; *p ; p++) + hash = 31 * hash + *p; + + return (hash & 0x7FFFFFFF); +} + +/* RS Hash Function */ +unsigned int RS_hash(char *str) +{ + unsigned int b = 378551; + unsigned int a = 63689; + unsigned int hash = 0; + + while (*str) + { + hash = hash * a + (*str++); + a *= b; + } + + return (hash & 0x7FFFFFFF); +} + +/* JS Hash Function */ +unsigned int JS_hash(char *str) +{ + unsigned int hash = 1315423911; + + while (*str) + { + hash ^= ((hash << 5) + (*str++) + (hash >> 2)); + } + + return (hash & 0x7FFFFFFF); +} + +/* P. J. Weinberger Hash Function */ +unsigned int PJW_hash(char *str) +{ + unsigned int BitsInUnignedInt = (unsigned int)(sizeof(unsigned int) * 8); + unsigned int ThreeQuarters = (unsigned int)((BitsInUnignedInt * 3) / 4); + unsigned int OneEighth = (unsigned int)(BitsInUnignedInt / 8); + + unsigned int HighBits = (unsigned int)(0xFFFFFFFF) << (BitsInUnignedInt - OneEighth); + unsigned int hash = 0; + unsigned int test = 0; + + while (*str) + { + hash = (hash << OneEighth) + (*str++); + if ((test = hash & HighBits) != 0) + { + hash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits)); + } + } + + return (hash & 0x7FFFFFFF); +} + +/* ELF Hash Function */ +unsigned int ELF_hash(char *str) +{ + unsigned int hash = 0; + unsigned int x = 0; + + while (*str) + { + hash = (hash << 4) + (*str++); + if ((x = hash & 0xF0000000L) != 0) + { + hash ^= (x >> 24); + hash &= ~x; + } + } + + return (hash & 0x7FFFFFFF); +} + +/* BKDR Hash Function */ +unsigned int BKDR_hash(char *str) +{ + unsigned int seed = 131; // 31 131 1313 13131 131313 etc.. + unsigned int hash = 0; + + while (*str) + { + hash = hash * seed + (*str++); + } + + return (hash & 0x7FFFFFFF); +} + +/* SDBM Hash Function */ +unsigned int SDBM_hash(char *str) +{ + unsigned int hash = 0; + + while (*str) + { + hash = (*str++) + (hash << 6) + (hash << 16) - hash; + } + + return (hash & 0x7FFFFFFF); +} + +/* DJB Hash Function */ +unsigned int DJB_hash(char *str) +{ + unsigned int hash = 5381; + + while (*str) + { + hash += (hash << 5) + (*str++); + } + + return (hash & 0x7FFFFFFF); +} + +/* AP Hash Function */ +unsigned int AP_hash(char *str) +{ + unsigned int hash = 0; + int i; + for (i=0; *str; i++) + { + if ((i & 1) == 0) + { + hash ^= ((hash << 7) ^ (*str++) ^ (hash >> 3)); + } + else + { + hash ^= (~((hash << 11) ^ (*str++) ^ (hash >> 5))); + } + } + + return (hash & 0x7FFFFFFF); +} + +/* CRC Hash Function */ +unsigned int CRC_hash(char *str) +{ + unsigned int nleft = strlen(str); + unsigned long long sum = 0; + unsigned short int *w = (unsigned short int *)str; + unsigned short int answer = 0; + + /* + * Our algorithm is simple, using a 32 bit accumulator (sum), we add + * sequential 16 bit words to it, and at the end, fold back all the + * carry bits from the top 16 bits into the lower 16 bits. + */ + while ( nleft > 1 ) { + sum += *w++; + nleft -= 2; + } + /* + * mop up an odd byte, if necessary + */ + if ( 1 == nleft ) { + *( unsigned char * )( &answer ) = *( unsigned char * )w ; + sum += answer; + } + /* + * add back carry outs from top 16 bits to low 16 bits + * add hi 16 to low 16 + */ + sum = ( sum >> 16 ) + ( sum & 0xFFFF ); + /* add carry */ + sum += ( sum >> 16 ); + /* truncate to 16 bits */ + answer = ~sum; + + return (answer & 0xFFFFFFFF); +} + diff --git a/hash_func_test/hash.h b/hash_func_test/hash.h new file mode 100644 index 0000000..3ba95b6 --- /dev/null +++ b/hash_func_test/hash.h @@ -0,0 +1,42 @@ +#ifndef _HASH_H +#define _HASH_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* A Simple Hash Function */ +unsigned int simple_hash(char *str); + +/* RS Hash Function */ +unsigned int RS_hash(char *str); + +/* JS Hash Function */ +unsigned int JS_hash(char *str); + +/* P. J. Weinberger Hash Function */ +unsigned int PJW_hash(char *str); + +/* ELF Hash Function */ +unsigned int ELF_hash(char *str); + +/* BKDR Hash Function */ +unsigned int BKDR_hash(char *str); + +/* SDBM Hash Function */ +unsigned int SDBM_hash(char *str); + +/* DJB Hash Function */ +unsigned int DJB_hash(char *str); + +/* AP Hash Function */ +unsigned int AP_hash(char *str); + +/* CRC Hash Function */ +unsigned int CRC_hash(char *str); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/hash_func_test/hashtest.c b/hash_func_test/hashtest.c new file mode 100644 index 0000000..5811ac6 --- /dev/null +++ b/hash_func_test/hashtest.c @@ -0,0 +1,223 @@ +#include +#include +#include +#include +#include +#include +#include +#include "hash.h" +#include "md5.h" + +struct hash_key { + unsigned char *key; + struct hash_key *next; +}; + +struct hash_counter_entry { + unsigned int hit_count; + unsigned int entry_count; + struct hash_key *keys; +}; + +#define BLOCK_LEN 4096 + +static int backet_len = 10240; +static int hash_call_count = 0; +static struct hash_counter_entry *hlist = NULL; +unsigned int (*hash_func)(char *str); + +void choose_hash_func(char *hash_func_name) +{ + if (0 == strcmp(hash_func_name, "simple_hash")) + hash_func = simple_hash; + else if (0 == strcmp(hash_func_name, "RS_hash")) + hash_func = RS_hash; + else if (0 == strcmp(hash_func_name, "JS_hash")) + hash_func = JS_hash; + else if (0 == strcmp(hash_func_name, "PJW_hash")) + hash_func = PJW_hash; + else if (0 == strcmp(hash_func_name, "ELF_hash")) + hash_func = ELF_hash; + else if (0 == strcmp(hash_func_name, "BKDR_hash")) + hash_func = BKDR_hash; + else if (0 == strcmp(hash_func_name, "SDBM_hash")) + hash_func = SDBM_hash; + else if (0 == strcmp(hash_func_name, "DJB_hash")) + hash_func = DJB_hash; + else if (0 == strcmp(hash_func_name, "AP_hash")) + hash_func = AP_hash; + else if (0 == strcmp(hash_func_name, "CRC_hash")) + hash_func = CRC_hash; + else + hash_func = NULL; +} + +void insert_hash_entry(unsigned char *key, struct hash_counter_entry *hlist) +{ + unsigned int hash_value = hash_func(key) % backet_len; + struct hash_key *p; + + p = hlist[hash_value].keys; + while(p) { + if (0 == strcmp(key, p->key)) + break; + p = p->next; + } + if (p == NULL) + { + p = (struct hash_key *)malloc(sizeof(struct hash_key)); + if (p == NULL) + { + perror("malloc in insert_hash_entry"); + return; + } + p->key = strdup(key); + p->next = hlist[hash_value].keys; + hlist[hash_value].keys = p; + hlist[hash_value].entry_count++; + } + hlist[hash_value].hit_count++; +} + +void hashtest_init() +{ + int i; + + hash_call_count = 0; + hlist = (struct hash_counter_entry *) malloc (sizeof(struct hash_counter_entry) * backet_len); + if (NULL == hlist) + { + perror("malloc in hashtest_init"); + return; + } + for (i = 0; i < backet_len; i++) + { + hlist[i].hit_count = 0; + hlist[i].entry_count = 0; + hlist[i].keys = NULL; + } +} + +void hashtest_clean() +{ + int i; + struct hash_key *pentry, *p; + + if (NULL == hlist) + return; + + for (i = 0; i < backet_len; ++i) + { + pentry = hlist[i].keys; + while(pentry) + { + p = pentry->next; + if (pentry->key) free(pentry->key); + free(pentry); + pentry = p; + } + } + free(hlist); +} + +void show_hashtest_result() +{ + int i, backet = 0, max_link = 0, sum = 0; + int conflict_count = 0, hit_count = 0; + float avg_link, backet_usage; + + for(i = 0; i < backet_len; i++) + { + if (hlist[i].hit_count > 0) + { + backet++; + sum += hlist[i].entry_count; + if (hlist[i].entry_count > max_link) + { + max_link = hlist[i].entry_count; + } + if (hlist[i].entry_count > 1) + { + conflict_count++; + } + hit_count += hlist[i].hit_count; + } + } + + backet_usage = backet/1.0/backet_len * 100;; + avg_link = sum/1.0/backet; + + printf("backet_len = %d/n", backet_len); + printf("hash_call_count = %d/n", hash_call_count); + printf("hit_count = %d/n", hit_count); + printf("conflict count = %d/n", conflict_count); + printf("longest hash entry = %d/n", max_link); + printf("average hash entry length = %.2f/n", avg_link); + printf("backet usage = %.2f%/n", backet_usage); +} + +void usage() +{ + printf("Usage: hashtest filename hash_func_name [backet_len]/n"); + printf("hash_func_name:/n"); + printf("/tsimple_hash/n"); + printf("/tRS_hash/n"); + printf("/tJS_hash/n"); + printf("/tPJW_hash/n"); + printf("/tELF_hash/n"); + printf("/tBKDR_hash/n"); + printf("/tSDBM_hash/n"); + printf("/tDJB_hash/n"); + printf("/tAP_hash/n"); + printf("/tCRC_hash/n"); +} + +void md5_to_32(unsigned char *md5_16, unsigned char *md5_32) +{ + int i; + + for (i = 0; i < 16; ++i) + { + sprintf(md5_32 + i * 2, "%02x", md5_16[i]); + } +} + +int main(int argc, char *argv[]) +{ + int fd = -1, rwsize = 0; + unsigned char md5_checksum[16 + 1] = {0}; + unsigned char buf[BLOCK_LEN] = {0}; + + if (argc < 3) + { + usage(); + return -1; + } + + if (-1 == (fd = open(argv[1], O_RDONLY))) + { + perror("open source file"); + return errno; + } + + if (argc == 4) + { + backet_len = atoi(argv[3]); + } + + hashtest_init(); + choose_hash_func(argv[2]); + while (rwsize = read(fd, buf, BLOCK_LEN)) + { + md5(buf, rwsize, md5_checksum); + insert_hash_entry(md5_checksum, hlist); + hash_call_count++; + memset(buf, 0, BLOCK_LEN); + memset(md5_checksum, 0, 16 + 1); + } + close(fd); + + show_hashtest_result(); + hashtest_clean(); + return 0; +} diff --git a/hashmap.c b/hashmap.c index 3978400..dc9c8b1 100644 --- a/hashmap.c +++ b/hashmap.c @@ -1,14 +1,45 @@ /* * Generic map implementation. + * Zaks Wang fix bug if put same key will increase the map size. + * Add SGI STL primes + * 2013-5-9 */ #include "hashmap.h" #include #include #include +#define HASHFUN BKDR_hash +unsigned int (*hash_fun)(char *keystring); -#define INITIAL_SIZE (256) #define MAX_CHAIN_LENGTH (8) +/* + * Zaks Wang add the SGI C++ STL primes + * 2013-5-8 + */ +#define num_primes 28 +static const unsigned long prime_list[num_primes]= +{ + 53ul,97ul,193ul,389ul,769ul, + 1543ul,3079ul,6151ul,12289ul,24593ul, + 49157ul,98317ul,196613ul,393241ul,786433ul, + 1572869ul,3145739ul,6291469ul,12582917ul,25165843ul, + 50331653ul,100663319ul,201326611ul,402653189ul,805306457ul, + 1610612741ul,3221225473ul,4294967291ul +}; + +inline unsigned long next_prime(unsigned long n){ + const unsigned long * first = prime_list; + const unsigned long * last = prime_list+(int)num_primes; + while(first != last&&*first <= n){ + first++; //获取一个比输入大的质数 + } + if(first==last){ + return *(last-1); //返回最大素数的 + }else{ + return *first; + } +} /* We need to keep keys and values */ typedef struct _hashmap_element{ @@ -28,14 +59,16 @@ typedef struct _hashmap_map{ /* * Return an empty hashmap, or NULL on failure. */ -map_t hashmap_new() { +map_t hashmap_new(unsigned long size) { hashmap_map* m = (hashmap_map*) malloc(sizeof(hashmap_map)); + hash_fun=HASHFUN; //设置hash函数 if(!m) goto err; - - m->data = (hashmap_element*) calloc(INITIAL_SIZE, sizeof(hashmap_element)); + size = next_prime(size); + //long total = size*sizeof(hashmap_element); + m->data = (hashmap_element*) calloc(size, sizeof(hashmap_element)); //calloc会把数据初始化为0 if(!m->data) goto err; - m->table_size = INITIAL_SIZE; + m->table_size = size; m->size = 0; return m; @@ -150,7 +183,7 @@ unsigned long crc32(const unsigned char *s, unsigned int len) { unsigned int i; unsigned long crc32val; - + crc32val = 0; for (i = 0; i < len; i ++) { @@ -160,6 +193,13 @@ unsigned long crc32(const unsigned char *s, unsigned int len) } return crc32val; } +/* + * 可以自定义hansh函数 + */ +unsigned int hashmap_hash_int_diff(hashmap_map *m,char*keystring){ + unsigned int key = hash_fun(keystring); + return key%m->table_size; +} /* * Hashing function for a string @@ -199,7 +239,7 @@ int hashmap_hash(map_t in, char* key){ if(m->size >= (m->table_size/2)) return MAP_FULL; /* Find the best index */ - curr = hashmap_hash_int(m, key); + curr = hashmap_hash_int_diff(m, key); /* Linear probing */ for(i = 0; i< MAX_CHAIN_LENGTH; i++){ @@ -225,8 +265,9 @@ int hashmap_rehash(map_t in){ /* Setup the new elements */ hashmap_map *m = (hashmap_map *) in; + unsigned long nextSize = next_prime(m->table_size); hashmap_element* temp = (hashmap_element *) - calloc(2 * m->table_size, sizeof(hashmap_element)); + calloc(nextSize, sizeof(hashmap_element)); if(!temp) return MAP_OMEM; /* Update the array */ @@ -235,7 +276,7 @@ int hashmap_rehash(map_t in){ /* Update the size */ old_size = m->table_size; - m->table_size = 2 * m->table_size; + m->table_size = nextSize; m->size = 0; /* Rehash the elements */ @@ -244,7 +285,7 @@ int hashmap_rehash(map_t in){ if (curr[i].in_use == 0) continue; - + status = hashmap_put(m, curr[i].key, curr[i].data); if (status != MAP_OK) return status; @@ -273,12 +314,18 @@ int hashmap_put(map_t in, char* key, any_t value){ } index = hashmap_hash(in, key); } - + /* + * bug fixed by Zaks Wang + * 当插入同样的key时候,返回,map size不增加 + */ + if(m->data[index].in_use==1){ + return MAP_USED; + } /* Set the data */ m->data[index].data = value; m->data[index].key = key; m->data[index].in_use = 1; - m->size++; + m->size++; return MAP_OK; } @@ -295,7 +342,7 @@ int hashmap_get(map_t in, char* key, any_t *arg){ m = (hashmap_map *) in; /* Find data location */ - curr = hashmap_hash_int(m, key); + curr = hashmap_hash_int_diff(m, key); /* Linear probing, if necessary */ for(i = 0; itable_size; i++) @@ -357,7 +404,7 @@ int hashmap_remove(map_t in, char* key){ m = (hashmap_map *) in; /* Find key */ - curr = hashmap_hash_int(m, key); + curr = hashmap_hash_int_diff(m, key); /* Linear probing, if necessary */ for(i = 0; isize; else return 0; -} \ No newline at end of file +} diff --git a/hashmap.h b/hashmap.h index 000efad..4f90b3e 100644 --- a/hashmap.h +++ b/hashmap.h @@ -13,6 +13,8 @@ #define MAP_FULL -2 /* Hashmap is full */ #define MAP_OMEM -1 /* Out of Memory */ #define MAP_OK 0 /* OK */ +#define MAP_USED -4 /* 被占用 */ +#include "hash.h" /* * any_t is a pointer. This allows you to put arbitrary structures in @@ -32,11 +34,14 @@ typedef int (*PFany)(any_t, any_t); * represented. They see and manipulate only map_t's. */ typedef any_t map_t; +#if defined(__cplusplus) +extern "C" { +#endif /* * Return an empty hashmap. Returns NULL if empty. */ -extern map_t hashmap_new(); +map_t hashmap_new(); /* * Iteratively call f with argument (item, data) for @@ -45,37 +50,40 @@ extern map_t hashmap_new(); * than MAP_OK the traversal is terminated. f must * not reenter any hashmap functions, or deadlock may arise. */ -extern int hashmap_iterate(map_t in, PFany f, any_t item); +int hashmap_iterate(map_t in, PFany f, any_t item); /* * Add an element to the hashmap. Return MAP_OK or MAP_OMEM. */ -extern int hashmap_put(map_t in, char* key, any_t value); +int hashmap_put(map_t in, char* key, any_t value); /* * Get an element from the hashmap. Return MAP_OK or MAP_MISSING. */ -extern int hashmap_get(map_t in, char* key, any_t *arg); +int hashmap_get(map_t in, char* key, any_t *arg); /* * Remove an element from the hashmap. Return MAP_OK or MAP_MISSING. */ -extern int hashmap_remove(map_t in, char* key); +int hashmap_remove(map_t in, char* key); /* * Get any element. Return MAP_OK or MAP_MISSING. * remove - should the element be removed from the hashmap */ -extern int hashmap_get_one(map_t in, any_t *arg, int remove); +int hashmap_get_one(map_t in, any_t *arg, int remove); /* * Free the hashmap */ -extern void hashmap_free(map_t in); +void hashmap_free(map_t in); /* * Get the current size of a hashmap */ -extern int hashmap_length(map_t in); +int hashmap_length(map_t in); +#if defined(__cplusplus) +} +#endif -#endif __HASHMAP_H__ \ No newline at end of file +#endif /*__HASHMAP_H__*/ diff --git a/main.c b/main.c index 4c128e3..7da41ed 100644 --- a/main.c +++ b/main.c @@ -25,8 +25,8 @@ int main(char* argv, int argc) map_t mymap; char key_string[KEY_MAX_LENGTH]; data_struct_t* value; - - mymap = hashmap_new(); + + mymap = hashmap_new(2000000ul); /* First, populate the hash map with ascending values */ for (index=0; indexnumber==index); } - + /* Make sure that a value that wasn't in the map can't be found */ snprintf(key_string, KEY_MAX_LENGTH, "%s%d", KEY_PREFIX, KEY_COUNT); error = hashmap_get(mymap, key_string, (void**)(&value)); - + /* Make sure the value was not found */ assert(error==MAP_MISSING); @@ -71,11 +71,11 @@ int main(char* argv, int argc) error = hashmap_remove(mymap, key_string); assert(error==MAP_OK); - free(value); + free(value); } - + /* Now, destroy the map */ hashmap_free(mymap); return 1; -} \ No newline at end of file +}