散列存储方法--中文百科全书

基本思想

散列法存储的基本思想是：由节点的关键码值决定节点的存储地址。散列技术除了可以用于查找外，还可以用于存储。

特点

散列是数组存储方式的一种发展，相比数组，散列的数据访问速度要高于数组，因为可以依据存储数据的部分内容找到数据在数组中的存储位置，进而能够快速实现数据的访问，理想的散列访问速度是非常迅速的，而不像在数组中的遍历过程，採用存储数组中内容的部分元素作为映射函式的输入，映射函式的输出就是存储数据的位置，这样的访问速度就省去了遍历数组的实现，因此时间複杂度可以认为为O(1)，而数组遍历的时间複杂度为O(n)。

散列是能一种快速实现访问的存储方式。通常作为检索部分的数据项是整形或者字元串，当是字元串时，字元串的数量要远远大于数组的长度，这时候就会有多个字元串映射到一个存储位置的情况，这就是所谓的冲突问题，而且冲突时肯定存在的，这时候如何实现数据的存储又是需要解决的。

分类

主要的解决方式有两大类，第一种採用鍊表的形式，将所有冲突的数据项採用鍊表的形式连结起来，这样搜寻数据的複杂度就包含了鍊表的遍历问题，特别是当所有的项都连结到一个鍊表下时，这时候实际上就是遍历鍊表，複杂度并不一定有很大的进步，但是这种鍊表连结的方式有很高的填充率。

探测法探测空闲的空间

第二种就是充分利用没有实现的存储空间，利用探测法探测空闲的空间，进而实现数据的存储，有三种探测方式：线性探测法、平方探测法，以及双散列法，三种方式中平方探测法运用比较多，但是都存在各种各样的优缺点，这时候的散列搜寻优势就没有理想情况下那么明显。有时候甚至比遍历数组更加的慢。但是确实不失为一种处理方式。

冲突解决

映射函式可选择的比较多，其实完全可以定义自己的映射函式，但是有时候为了降低冲突的机率设定了一些比较好的映射函式，比如求和取余，或者乘以一定的係数再求和取余等。

本文採用平方探测法解决了冲突问题，具体的实现如下所示：

1、结构体定义

#ifndef__HASHMAP_H_H_

#define__HASHMAP_H_H_

#includelist.h

#defineTABSIZE101

/*状态变数*/

typedefenumSTATE{EMPTY=0,ACTIVE=1,DELETED=2}State;

/*键值结构体*/

typedefstruct_pair

{

char*key;

char*value;

}Pair_t,*Pair_handle_t;

/*每一个实际的存储对象*/

typedefstruct_hashEntry

{

Pair_handle_tpair;

Statestate;

}HashEntry_t,*HashEntry_handle_t;

/*哈希表结构体，便于创建*/

typedefstruct_hashmap

{

HashEntry_t*map;

/*存储实际的存储量*/

intsize;

/*容量*/

intcapacity;

}Hashmap_t,*Hashmap_handle_t;

/*隐射函式类型定义*/

typedefint(*hashfunc)(constchar*,int);

#ifdef__cplusplus

externC

{

#endif

boolalloc_hashmap(Hashmap_handle_t*hashmap,intcapacity);

boolinit_hashmap(Hashmap_handle_thashmap,intcapacity);

boolinsert_hashnode(Hashmap_handle_thashmap,constchar*key,constchar*value);

Pair_handle_tsearch_hashnode(Hashmap_handle_thashmap,constchar*key);

char*GetValue(Hashmap_handle_thashmap,constchar*key);

booldelete_hashnode(Hashmap_handle_thashmap,constchar*key);

intLength(Hashmap_handle_thashmap);

intCapacity(Hashmap_handle_thashmap);

voiddelete_hashmap(Hashmap_handle_thashmap);

voidfree_hashmap(Hashmap_handle_t*hashmap);

char*key_pair(Pair_handle_tpair);

char*value_pair(Pair_handle_tpair);

Hashmap_handle_tcopy_hashmap(Hashmap_handle_thashmap);

boolresize(Hashmap_handle_thashmap);

#ifdef__cplusplus

}

#endif

实现表的分配和创建，採用了动态分配的方式实现，这样可能在性能上比不上静态数据，但是为了实现数组大小的调整，我选择了动态分配的实现方式。

/*分配一个新的对象，可以实现自动分配*/

boolalloc_hashmap(Hashmap_handle_t*hashmap,intcapacity)

{

HashEntry_handle_ttemp=NULL;

Hashmap_t*map=NULL;

if(*hashmap==NULL)

{

/*分配一个散列对象*/

map=(Hashmap_handle_t)malloc(sizeof(Hashmap_t));

if(map==NULL)

returnfalse;

/*指针指向当前对象*/

*hashmap=map;

map=NULL;

/*分配一个数组空间，大小可以控制*/

temp=(HashEntry_handle_t)malloc(

sizeof(HashEntry_t)*capacity);

if(temp!=NULL)

{

/*散列对象的指针指向数组*/

(*hashmap)->map=temp;

temp=NULL;

/*设定参数*/

(*hashmap)->capacity=capacity;

(*hashmap)->size=0;

/*初始化分配的数组空间*/

Tabinital((*hashmap)->map,capacity);

returntrue;

}

returnfalse;

}

/*初始化一个新的对象，这个对象已经创建，只是没有初始化而已*/

boolinit_hashmap(Hashmap_handle_thashmap,intcapacity)

{

HashEntry_handle_ttemp=NULL;

if(hashmap!=NULL)

{

/*分配数组空间*/

temp=(HashEntry_handle_t)malloc(

sizeof(HashEntry_t)*capacity);

if(temp!=NULL)

{

/*完成对象的填充操作*/

hashmap->map=temp;

temp=NULL;

hashmap->capacity=capacity;

hashmap->size=0;

/*初始化数组对象*/

Tabinital(hashmap->map,capacity);

returntrue;

}

returnfalse;

}

关于数组中对象的创建，和释放操作，如下所示：

/*分配一个pair对象*/

staticboolmake_pair(Pair_handle_t*pair,constchar*key,constchar*value)

{

Pair_handle_tnewpair=(Pair_handle_t)malloc(sizeof(Pair_t));

char*newstr=NULL;

if(newpair==NULL)

returnfalse;

newstr=(char*)malloc(strlen(key)+1);

if(newstr==NULL)

returnfalse;

strcpy(newstr,key);

newstr[strlen(key)]='\0';

newpair->key=newstr;

newstr=NULL;

newstr=(char*)malloc(strlen(value)+1);

if(newstr==NULL)

returnfalse;

strcpy(newstr,value);

newstr[strlen(value)]='\0';

newpair->value=newstr;

newstr=NULL;

(*pair)=newpair;

returntrue;

}

/*释放一个对象pair*/

staticvoiddelete_pair(Pair_handle_t*pair)

{

Pair_handle_ttemp=NULL;

if(*pair==NULL)

return;

temp=*pair;

free(temp->key);

temp->key=NULL;

free(temp->value);

temp->value=NULL;

free(temp);

temp=NULL;

*pair=NULL;

}

数组元素的基本操作：

/*完成数组对象的初始化操作*/

staticvoidTabinital(HashEntry_t*tab,intsize)

{

inti=0;

for(;i

{

tab[i].pair=NULL;

tab[i].state=EMPTY;

}

staticvoiddelete_array(HashEntry_handle_t*array,intsize)

{

inti=0;

if(*array!=NULL)

{

for(i=0;i

{

if((*array)[i].state==ACTIVE)

{

delete_pair(&((*array)[i].pair));

(*array)[i].state=DELETED;

}

free(*array);

*array=NULL;

}

插入元素的操作、有两个函式的创建，其中一个为了便于后期大小的调整操作。

/*插入数据到散列中，採用了二次探测的实现方式，并设定了退出条件*/

staticboolinsert_data(Hashmap_handle_thashmap,

constchar*key,constchar*value,hashfuncfunc)

{

inthashval=func(key,hashmap->capacity);

intindex=0;

char*newstr=NULL;

Pair_handle_tnewpair=NULL;

while(hashmap->map[hashval].state!=EMPTY)

{

if((hashmap->map[hashval].state==ACTIVE)

&&(strcmp(hashmap->map[hashval].pair->key,key)==0))

break;

index++;

hashval+=index*index;

hashval%=hashmap->capacity;

if(index==200)

break;

}

if(hashmap->map[hashval].state==EMPTY)

{

if(make_pair(&newpair,key,value))

{

hashmap->map[hashval].state=ACTIVE;

hashmap->map[hashval].pair=newpair;

newpair=NULL;

hashmap->size++;

returntrue;

}

数据在计算机中存储的物理结构有四种：顺序、鍊表、散列与索引。散列是由单词Hash翻译过来的，有时也直接音译为“哈希”，就是把任意长度的输入（又叫做预映射， pre-image），通过散列算法，变换成固定长度的输出，该输出就是散列值。这种转换是一种压缩映射，也就是，散列值的空间通常远小于输入的空间，不同的输入可能会散列成相同的输出，而不可能从散列值来唯一的确定输入值。

散列存储方法

基本思想

特点

分类

冲突解决

相关词条

相关搜索

其它词条