随着生物基因测试的技术成熟,科学家们可以通过基因相似度检测,现在要对 N 个人进行测试基 因测试,通过基因检测是否为色盲。
测试色盲的基因组包含 8 位基因,编号 1 至 8。每一位基因都可以用一个字符来表示,这个字符 是'A'、'B'、'C'、'D'四个字符之一。
如:ABDBCBAD
通过认真观察研究,生物学家发现,有时候可能通过特定的连续几位基因,就能区分开是正常者 还是色盲者。对于色盲基因,不需要 8 位基因,只需要看其中连续的 4 位基因就可以判定是正常 者还是色盲者,这 4 位基因编号分别是:(第 2、3、4、5)。也就是说,只需要把第 2,3,4,5 这四 位连续的基因与色盲基因库的记录对比,就能判定该人是正常者还是色盲者。
假设给定的色盲基因库如下:
ADBB
BDDC
CDBC
BDBB
......
请测试下列的基因是否为色盲
AADBBBAD
ABDDCBAA
CCDBCBAA
ABDBBBAC
ABDBCBAD
ABDDBBAD
解答思路:
1. 可以直接把待测试基因的 2,3,4,5 位直接与基因库里的记录逐一对比,但如果色盲基因库很庞 大,程序执行效率很低
2. 可以使用哈希表来存储色盲基因库数据,通过哈希函数把 4 位色盲基因映射到哈希表中,大大提高检索的效率.
源码实现:
hash_table.h
#pragma once #define DEFAULT_SIZE 16 typedef struct _ListNode { struct _ListNode* next; void* key; void* data; }ListNode; typedef ListNode* List; typedef ListNode* Element; typedef struct _HashTable { int TableSize; List* Thelists; }HashTable; //哈希函数 int Hash(void* key, int TableSize); //初始化 HashTable* InitHash(int TableSize); //插入 void Insert(HashTable* HashTable, void* key, void* value); //查找 Element Find(HashTable* HashTable, void* key); //销毁 void Destory(HashTable* HashTable); //提取 void* Retrieve(Element e);
hash_table.cpp
#include <stdio.h> #include <stdlib.h> #include <string.h> #include "hash_table1.h" #define BUCKET_SIZE 1024 #define compare(a,b) strcmp((const char*)a,(const char*)b) #define hash_func SDBMHash unsigned int SDBMHash(void* key) { unsigned int hash = 0; char* str = (char*)key; while (*str) { hash = (*str++) + (hash << 6) + (hash << 16) - hash; } return (hash & 0x7FFFFFFF); } int Hash(void* key, int TableSize) { return hash_func(key) % TableSize; //先将hash_func转换为数字,再进行计算 } HashTable* InitHash(int TableSize) { int i = 0; HashTable* hTable = NULL; if (TableSize <= 0) { TableSize = DEFAULT_SIZE; } hTable = (HashTable*)malloc(sizeof(HashTable)); if (hTable == NULL) { printf("HashTable malloc error.\n"); return NULL; } hTable->TableSize = TableSize; //为Hash桶分配内存空间,其为一个指针数组 hTable->Thelists = (List*)malloc(sizeof(List) * TableSize); if (hTable->Thelists == NULL) { printf("HashTable malloc error.\n"); free(hTable); return NULL; } for (i = 0; i < TableSize; i++) { hTable->Thelists[i] = (ListNode*)malloc(sizeof(ListNode)); if (hTable->Thelists[i] == NULL) { printf("HashTable malloc error.\n"); free(hTable->Thelists); free(hTable); return NULL; } else { memset(hTable->Thelists[i], 0, sizeof(ListNode)); //为每一个hTable->Thelists[i]的数组赋值 } } return hTable; } Element Find(HashTable* HashTable, void* key) { int i = 0; List L = NULL; Element e = NULL; i = Hash(key, HashTable->TableSize); L = HashTable->Thelists[i]; e = L->next; while (e != NULL && compare(e->key, key) != 0) { e = e->next; } return e; } void Insert(HashTable* HashTable, void* key, void* value) { Element e = NULL, tmp = NULL; List L = NULL; e = Find(HashTable, key); if (e == NULL) { tmp = (Element)malloc(sizeof(ListNode)); if (tmp == NULL) { printf("malloc error\n"); return; } //前插 int code = Hash(key, HashTable->TableSize); L = HashTable->Thelists[code]; tmp->data = value; tmp->key = key; tmp->next = L->next; L->next = tmp; } else { printf("the key already exist\n"); } } void Delete(HashTable* HashTable, void* key) { Element e = NULL, last = NULL; List L = NULL; int i = Hash(key, HashTable->TableSize); L = HashTable->Thelists[i]; last = L; e = L->next; while (e != NULL && e->key != key) { last = e; e = e->next; } if (e) { last->next = e->next; free(e); } } void* Retrieve(Element e) { return e ? e->data : NULL; } void Destory(HashTable* HashTable) { int i = 0; List L = NULL; Element cur = NULL, next = NULL; for (i = 0; i < HashTable->TableSize; i++) { L = HashTable->Thelists[i]; cur = L->next; while (cur != NULL) { next = cur->next; free(cur); cur = next; } free(L); } free(HashTable->Thelists); free(HashTable); } int main(void) { char* elems[] = { "ADBB","BDDC","CDBC","BDBB" }; char* tester = "ABDBBBAC"; char cur[5] = { '\0' }; int i = 0; HashTable* HashTable = NULL; HashTable = InitHash(BUCKET_SIZE); Insert(HashTable, elems[0], elems[0]); Insert(HashTable, elems[1], elems[1]); Insert(HashTable, elems[2], elems[2]); Insert(HashTable, elems[3], elems[3]); strncpy_s(cur, tester + 1, 4); //ADBB'\0' Element e = Find(HashTable, cur); if (e) { printf("%s\n", (const char*)Retrieve(e)); } else { printf("Node found [key:%s]\n", cur); } system("pause"); return 0; }