zoukankan      html  css  js  c++  java
  • 剩余块用switch处理

    在做循环展开时,处理完整除块后,还需要在剩余块处理。做了个实验对比,用switch加速剩余块段处理

    // switch0.c
    
    #include <stdio.h>
    #include <stdint.h>
    #include <stdlib.h>
    #include <time.h>
    
    int main(int argc, char *argv[])
    {
    	int res = atoi(argv[1]);
    	uint32_t accum = 0;
    	int vec[res], *vec_end = vec + res;
    	for (int i = 0; i < res; i++)
    	{
    		vec[i] = rand();
    	}
    
    	clock_t start = clock();
    
    	for (uint32_t i = UINT32_MAX; i; i--)
    	{
    
    		switch (res)
    		{
    		// case 20:
    		// 	accum += vec[20 - 1];
    		// case 19:
    		// 	accum += vec[19 - 1];
    		// case 18:
    		// 	accum += vec[18 - 1];
    		// case 17:
    		// 	accum += vec[17 - 1];
    		case 16:
    			accum += vec[16 - 1];
    		case 15:
    			accum += vec[15 - 1];
    		case 14:
    			accum += vec[14 - 1];
    		case 13:
    			accum += vec[13 - 1];
    		case 12:
    			accum += vec[12 - 1];
    		case 11:
    			accum += vec[11 - 1];
    		case 10:
    			accum += vec[10 - 1];
    		case 9:
    			accum += vec[9 - 1];
    		case 8:
    			accum += vec[8 - 1];
    		case 7:
    			accum += vec[7 - 1];
    		case 6:
    			accum += vec[6 - 1];
    		case 5:
    			accum += vec[5 - 1];
    		case 4:
    			accum += vec[4 - 1];
    		case 3:
    			accum += vec[3 - 1];
    		case 2:
    			accum += vec[2 - 1];
    		case 1:
    			accum += vec[1 - 1];
    		}
    	}
    
    	clock_t end = clock();
    	printf("%u
    ", accum);
    	printf("%lu
    ", end - start);
    }
    
    
    // switch1.c
    
    #include <stdio.h>
    #include <stdint.h>
    #include <stdlib.h>
    #include <time.h>
    
    int main(int argc, char *argv[])
    {
    	int res = atoi(argv[1]);
    	uint32_t accum = 0;
    	int vec[res], *vec_end = vec + res;
    	for (int i = 0; i < res; i++)
    	{
    		vec[i] = rand();
    	}
    
    	clock_t start = clock();
    
    	for (uint32_t i = UINT32_MAX; i; i--)
    	{
    
    		switch (res)
    		{
    		// case 20:
    		// 	accum += vec_end[-20];
    		// case 19:
    		// 	accum += vec_end[-19];
    		// case 18:
    		// 	accum += vec_end[-18];
    		// case 17:
    		// 	accum += vec_end[-17];
    		case 16:
    			accum += vec_end[-16];
    		case 15:
    			accum += vec_end[-15];
    		case 14:
    			accum += vec_end[-14];
    		case 13:
    			accum += vec_end[-13];
    		case 12:
    			accum += vec_end[-12];
    		case 11:
    			accum += vec_end[-11];
    		case 10:
    			accum += vec_end[-10];
    		case 9:
    			accum += vec_end[-9];
    		case 8:
    			accum += vec_end[-8];
    		case 7:
    			accum += vec_end[-7];
    		case 6:
    			accum += vec_end[-6];
    		case 5:
    			accum += vec_end[-5];
    		case 4:
    			accum += vec_end[-4];
    		case 3:
    			accum += vec_end[-3];
    		case 2:
    			accum += vec_end[-2];
    		case 1:
    			accum += vec_end[-1];
    		}
    	}
    
    	clock_t end = clock();
    	printf("%u
    ", accum);
    	printf("%lu
    ", end - start);
    }
    
    
    filename 剩余块大小为7 剩余块大小为14
    swtich0.c 9265104 18538175
    switch1.c 9250006 18597986

    好像性能差不多(我原以为第二种写法会快一些)。

    另外在datasketches-cpp/common/include/MurmurHash3.h里看到里类似于第一种段写法。另外或许可以用accum数组代替accum变量来进一步加速

    一般intel的cpu的cachelinesize为64

  • 相关阅读:
    解说asp.net core MVC 过滤器的执行顺序
    asp.net core 2.0 Microsoft.Extensions.Logging 文本文件日志扩展
    【技术累积】【点】【java】【30】代理模式
    【技术累积】【点】【java】【29】MapUtils
    【技术累积】【点】【java】【28】Map遍历
    【技术累积】【点】【java】【27】@JSONField
    【技术累积】【点】【java】【26】@Value默认值
    【技术累积】【点】【java】【25】Orderd
    【技术累积】【点】【java】【23】super以及重写重载
    【技术累积】【线】【java】【2】AOP
  • 原文地址:https://www.cnblogs.com/Tifa-Best/p/14090297.html
Copyright © 2011-2022 走看看