修改点 :
1. 因为做排序需要互换结构体里面的内容, 导致 hashnode 里面保存的 filePtr 指向的内容发生改变, 现在把 hashnode 和 file node 结合在一起 (本来应该合在一起,但之前觉得分开管理清晰一点)
2. 最后的函数直接交换两个结构体对象, 导致 next 指针也改变, 这将导致 next 所在链表发生改变, next 指向的地址不变,但是指针内容已经改变, 所以在有 指向指针的 结构体进行直接交换,会有 BUG
3. 改变 : 在最后的函数
(1)创建一个临时的结构体数组, 然后保存到一个 自己实现 或者 STL 优先队列中
(2)直接保存到一个 STL 优先队列中 (优先队列自动会创建 next 等数据, 只需要实现 compare 函数)
(3)做一个判断, 如果数据没有删除, 保存当前的 索引 (memidx) 到一个 int 数组, 然后对这个数组依次进 优先队列
(4)创建一个堆排结构, 在添加和删除的时候都进行维护, 最后每次取头节点 , 然后更新, 重复 N 次 (返回前 N 级别最高的数据)
#include "common.h"
#define nullptr 0
#define u64 unsigned long long int
#define u32 unsigned int
#define maxstringsize 10
typedef struct LinkNode
{
u32 depth; // 这个 node 所在的深度, 根目录是0
u32 mNum;
u32 create_tm;
u32 memidx;
bool is_del;
char str[maxstringsize];
LinkNode *father;
LinkNode *sonlist;
LinkNode *next; // 用于 sonlist 中链接各个子 node
//for hash
u64 hash_value;
LinkNode *hash_next;
}LinkNode;
#define maxmemsize 1007
static LinkNode g_n_mem[maxmemsize];
static int g_n_mem_cnt = 0;
#define maxhashsize 107
static LinkNode *m_hashmap[maxhashsize];
static char filename[20][maxstringsize]
{
{"root"},
{"aaaa"},
{"bbbb"},
{"april"},
{"friday"},
{"monday"},
{"love"},
{"kiss"},
{"autum"},
{"weather"},
{"water"},
{"leak"},
{"mouth"},
{"leader"},
{"gohome"},
{"shafa"},
{"season"},
{"global"},
{"see"},
{"sea"},
};
static LinkNode *root = nullptr;
static u32 g_create_tm = 0;
#define m_max_depth 5 // there is 5 level only
//2^32 = 4294967296
//26 ^ 6 = 308915776
u64 my_hash2(const char str[])
{
u64 h = 0;
while (*str != ' ')
{
h = h * 26 + (*str++) - 'a' + 1; // 字符串全是小写的情况, 可根据情况改成 31 / 131, 减 'a' 是为了保证26 进制且不超过 int 型
}
return h & 0xFFFFFFFFFFFFFFFF;
}
u64 my_hash(const char str[])
{
u64 h = 0;
u64 p = 1;
while (*str != ' ')
{
h += p * (*str++);
p *= 2531011;
}
return h & 0xFFFFFFFFFFFFFFFF;
}
void add_to_hash_list(LinkNode **phead, LinkNode *pNode)
{
pNode->hash_next = *phead;
*phead = pNode;
}
LinkNode *is_exist_in_hash_list(u64 hash_value)
{
LinkNode *p = m_hashmap[hash_value % maxhashsize];
while (p != nullptr)
{
if (p->hash_value == hash_value)
{
return p;
}
p = p->hash_next;
}
return nullptr;
}
void del_from_hash_list(u64 hash_value)
{
LOGE("%lld will be del from hash list idx = %d", hash_value, (hash_value % maxhashsize));
LinkNode *p = m_hashmap[hash_value % maxhashsize];
if (p->hash_value == hash_value)
{
m_hashmap[hash_value % maxhashsize] = p->hash_next;
}
else
{
LinkNode *pre = p;
p = p->hash_next;
while (p != nullptr)
{
if (p->hash_value == hash_value)
{
pre->hash_next = p->hash_next;
break; //must has break
}
pre = p;
p = p->hash_next;
}
}
p->hash_value = 0;
p->hash_next = nullptr;
}
void add_to_father_son_list(LinkNode *father, LinkNode *pNode)
{
pNode->next = father->sonlist;
father->sonlist = pNode;
pNode->father = father;
}
void del_from_father_list(LinkNode *pNode)
{
LinkNode *p = pNode->father->sonlist;
if (p == pNode)
{
pNode->father->sonlist = p->next;
}
else
{
LinkNode *pre = p;
p = p->next;
while (p != nullptr)
{
if (p == pNode)
{
pre->next = p->next;
break; //must has break
}
pre = p;
p = p->next;
}
}
}
void init(int mNum)
{
LOGE("mNum = %d", mNum);
g_create_tm = 0;
for (int i = 0; i < maxhashsize; i++)
{
m_hashmap[i] = nullptr;
}
g_n_mem_cnt = 0;
//add node
u64 hash_value = my_hash(filename[0]);
root = &g_n_mem[g_n_mem_cnt];
strcpy_s(root->str, filename[0]);
root->hash_value = hash_value;
root->sonlist = nullptr;
root->next = nullptr;
root->father = nullptr;
root->depth = 0;
root->mNum = mNum;
root->create_tm = g_create_tm++;
root->is_del = false;
root->memidx = g_n_mem_cnt++;
//add to hash list
root->hash_next = nullptr;
add_to_hash_list(&m_hashmap[root->hash_value % maxhashsize], root);
}
void add_file(char upper[], char newFile[], int mNum)
{
u64 hash_value = my_hash(upper);
LinkNode * father = is_exist_in_hash_list(hash_value);
if (father == nullptr)
{
LOGE("there is no upper= %s mNum=%d return %s", upper, mNum, newFile);
return;
}
else if (mNum >= father->mNum)
{
LOGE("can not add to upper= %s mNum=%d > %d return %s", upper, mNum, father->mNum, newFile);
return;
}
father->mNum -= mNum;
LOGE("father = %s %s , son = %s", father->str, upper, newFile);
//add node
u64 son_hash_value = my_hash(newFile);
LinkNode *son = &g_n_mem[g_n_mem_cnt];
strcpy_s(son->str, newFile);
son->hash_value = son_hash_value;
son->sonlist = nullptr;
son->next = nullptr;
son->mNum = mNum;
son->depth = father->depth + 1;
son->create_tm = g_create_tm++;
son->is_del = false;
son->memidx = g_n_mem_cnt++;
//add to hash list
son->hash_next = nullptr;
add_to_hash_list(&m_hashmap[son->hash_value % maxhashsize], son);
//add to father son list
add_to_father_son_list(father, son);
}
void update_depth(LinkNode *father, u32 *max_depth)
{
if (father->sonlist == nullptr)
{
if (father->depth > *max_depth)
{
*max_depth = father->depth;
}
return;
}
LinkNode *p = father->sonlist;
while (p != nullptr) {
p->depth = father->depth + 1;
update_depth(p, max_depth);
p = p->next;
}
}
//如果file 已经存在于 upper 下, 直接返回
//假设深度最大是5, 如果并入 upper 后,总的深度超过5,则直接返回
//合并规则: file 的 mNum 并入 upper 节点, file 的子链表变成 upper 的子链表
void merge_file(char upper[], char file[])
{
u64 hash_value = my_hash(upper);
LinkNode * father = is_exist_in_hash_list(hash_value);
u64 son_hash_value = my_hash(file);
LinkNode * son = is_exist_in_hash_list(son_hash_value);
if (father == nullptr || son == nullptr)
return;
LOGE("father = %s %s , son = %s %s", father->str, upper, son->str, file);
u32 max_depth = 0;
update_depth(son, &max_depth);
if (father->depth + max_depth - son->depth > m_max_depth)
{
LOGE("depth is beyond limit, return depth: %s(%d) %s(%d) max_depth(%d)",
father->str, father->depth, son->str, son->depth, max_depth);
return;
}
LOGE("max_depth of %s = %d", son->str, max_depth);
//remove from hash list
del_from_hash_list(son->hash_value);
//remove from son list (这里要注意, son 的father 不是当前的 father)
del_from_father_list(son);
//add to father son list
LinkNode *p = son->sonlist;
while (p != nullptr) {
// 这里有 BUG, 因为添加到父节点的子链表会导致 next指向改变, 再指行 p=p->next 会错误
LinkNode *q = p;
p = p->next;
add_to_father_son_list(father, q);
}
max_depth = 0;
update_depth(father, &max_depth);
//remove node
son->sonlist = nullptr;
son->father = nullptr;
son->next = nullptr;
son->hash_value = 0;
son->mNum = 0;
son->depth = 0;
son->is_del = true;
}
//给排名前 depthNum 的文件增加个数 (除了 root 之外)
//排名规则: 人数最少,优先级最高, 如果人数一样,创建时间越晚,优先级越高
//这里有一个问题, 当直接对 g_n_mem 数组进行冒泡排序, 将会改下对于数组里面的内容
//但由于这个数组对应的 hash 节点里的 *filePtr 还是指向之前的内存, 因此需要同步改 hash
//所以直接用一个临时数组去保存后排序
void recuit(u32 depthNum, u32 mNum) { LOGE("depthNum = %d mNum = %d", depthNum, mNum); int count = 0; LinkNode temp[maxmemsize]; for (int i = 1; i < g_n_mem_cnt; i++) // 因为 0 号保存 root, 所以从 1 开始计算 { if (g_n_mem[i].is_del == false) { temp[count++] = g_n_mem[i]; } } for (int i = 0; i < depthNum; i++) { for (int j = i + 1; j < count; j++) { if ((temp[i].mNum > temp[j].mNum || (temp[i].mNum == temp[j].mNum && temp[i].create_tm < temp[j].create_tm))) { LinkNode tmp = temp[i]; temp[i] = temp[j]; temp[j] = tmp; } } } for (int i = 0; i < depthNum; i++) { u32 idx = temp[i].memidx; g_n_mem[idx].mNum += mNum; LOGE("idx = %d mNum = %d", idx, g_n_mem[idx].mNum); } }
测试:
//root->1->3(april)->5(monday)->6(love)->7(kiss)
//root->1->4(friday)
//root->2(bbb)->8(autum)->9(weather)->10(water)
extern void test_hash_optimize()
{
init(100);
add_file(filename[0], filename[1], 50);
add_file(filename[0], filename[2], 40);
add_file(filename[1], filename[3], 10);
add_file(filename[1], filename[4], 15);
add_file(filename[3], filename[5], 5);
add_file(filename[5], filename[6], 3);
add_file(filename[6], filename[7], 2);
add_file(filename[2], filename[8], 7);
//add fail
add_file(filename[8], filename[9], 9); //父只有7个,添加失败
add_file(filename[9], filename[10], 4); //没有父节点,添加失败
add_file(filename[8], filename[9], 6); //父有7个
add_file(filename[9], filename[10], 4); // okay
add_file(filename[9], filename[11], 1); // okay
recuit(5, 10);
merge_file(filename[6], filename[8]);
merge_file(filename[6], filename[9]);
//检查 hash 链表 filePtr 是否已经被修改
for (int i = 0; i < g_n_mem_cnt; i++)
{
u64 hash_value = my_hash(filename[i]);
LinkNode * pNode = is_exist_in_hash_list(hash_value);
if (pNode != nullptr)
LOGE("filename[%d] = %s, node str = %s", i, filename[i], pNode->str);
}
}
e:c++c_testc_testhash_optimize.cpp init 163 : mNum = 100 e:c++c_testc_testhash_optimize.cpp add_file 208 : father = root root , son = aaaa e:c++c_testc_testhash_optimize.cpp add_file 208 : father = root root , son = bbbb e:c++c_testc_testhash_optimize.cpp add_file 208 : father = aaaa aaaa , son = april e:c++c_testc_testhash_optimize.cpp add_file 208 : father = aaaa aaaa , son = friday e:c++c_testc_testhash_optimize.cpp add_file 208 : father = april april , son = monday e:c++c_testc_testhash_optimize.cpp add_file 208 : father = monday monday , son = love e:c++c_testc_testhash_optimize.cpp add_file 208 : father = love love , son = kiss e:c++c_testc_testhash_optimize.cpp add_file 208 : father = bbbb bbbb , son = autum e:c++c_testc_testhash_optimize.cpp add_file 202 : can not add to upper= autum mNum=9 > 7 return weather e:c++c_testc_testhash_optimize.cpp add_file 197 : there is no upper= weather mNum=4 return water e:c++c_testc_testhash_optimize.cpp add_file 208 : father = autum autum , son = weather e:c++c_testc_testhash_optimize.cpp add_file 208 : father = weather weather , son = water e:c++c_testc_testhash_optimize.cpp add_file 208 : father = weather weather , son = leak
// idx 越大, 说明创建时间越晚, num 越小, 优先级越高 e:c++c_testc_testhash_optimize.cpp recuit 312 : depthNum = 5 mNum = 10 e:c++c_testc_testhash_optimize.cpp recuit 341 : idx = 11 mNum = 11 e:c++c_testc_testhash_optimize.cpp recuit 341 : idx = 9 mNum = 11 e:c++c_testc_testhash_optimize.cpp recuit 341 : idx = 8 mNum = 11 e:c++c_testc_testhash_optimize.cpp recuit 341 : idx = 6 mNum = 11 e:c++c_testc_testhash_optimize.cpp recuit 341 : idx = 7 mNum = 12
e:c++c_testc_testhash_optimize.cpp merge_file 264 : father = love love , son = autum autum e:c++c_testc_testhash_optimize.cpp merge_file 271 : depth is beyond limit, return depth: love(4) autum(2) max_depth(4) e:c++c_testc_testhash_optimize.cpp merge_file 264 : father = love love , son = weather weather e:c++c_testc_testhash_optimize.cpp merge_file 274 : max_depth of weather = 4 e:c++c_testc_testhash_optimize.cpp del_from_hash_list 104 : 2575973297045806516 will be del from hash list idx = 69 e:c++c_testc_testhash_optimize.cpp test_hash_optimize 381 : filename[0] = root, node str = root e:c++c_testc_testhash_optimize.cpp test_hash_optimize 381 : filename[1] = aaaa, node str = aaaa e:c++c_testc_testhash_optimize.cpp test_hash_optimize 381 : filename[2] = bbbb, node str = bbbb e:c++c_testc_testhash_optimize.cpp test_hash_optimize 381 : filename[3] = april, node str = april e:c++c_testc_testhash_optimize.cpp test_hash_optimize 381 : filename[4] = friday, node str = friday e:c++c_testc_testhash_optimize.cpp test_hash_optimize 381 : filename[5] = monday, node str = monday e:c++c_testc_testhash_optimize.cpp test_hash_optimize 381 : filename[6] = love, node str = love e:c++c_testc_testhash_optimize.cpp test_hash_optimize 381 : filename[7] = kiss, node str = kiss e:c++c_testc_testhash_optimize.cpp test_hash_optimize 381 : filename[8] = autum, node str = autum e:c++c_testc_testhash_optimize.cpp test_hash_optimize 381 : filename[10] = water, node str = water e:c++c_testc_testhash_optimize.cpp test_hash_optimize 381 : filename[11] = leak, node str = leak e:c++c_testc_testc_test.cpp main 44 : process_time = 10.000000