zoukankan      html  css  js  c++  java
  • GPGPU OpenCL 精确字符串查找

    字符串查找是信息安全、信息过滤领域的重要操作,尤其是对大文本的实时处理。这篇作为实例,使用GPU OpenCL进行精确模式串查找。

    1.加速方法

      (1)将少量常量数据,如模式串长度、文本长度等,保存在线程的private memory中。

      (2)将模式串保存在GPU的local memory中,加速线程对模式串的访问。

      (3)将待查找的文本保存在global memory中,使用尽可能多线程访问global memory,减小线程平均访存时间。

      (4)每个work-group中的线程操作文本中一段,多个work-group并行处理大文本。

    2.同步

      (1)work-group内,使用CLK_LOCAL_MEM_FENCE、CLK_GLOBAL_MEM_FENCE

      (2)全局使用对__global int 的原子操作,来保证每个线程将结果写到全局内存的正确位置。设备支持的操作可以通过查询设备的扩展获得,如下图,可知核函数支持原子操作、printf操作:

      

    3.代码实例,大文本精确模式串搜索

    3.1 核函数(string_search_kernel.cl):

     1 int compare(__global const uchar* text, __local const uchar* pattern, uint length){
     2     for(uint l=0; l<length; ++l){
     3         if (text[l] != pattern[l]) 
     4         return 0;
     5     }
     6     return 1;
     7 }
     8 
     9 __kernel void
    10     StringSearch (
    11       __global uchar* text,        //Input Text
    12       const uint textLength,        //Length of the text
    13       __global const uchar* pattern,    //Pattern string
    14       const uint patternLength,        //Pattern length
    15       const uint maxSearchLength,    //Maximum search positions for each work-group
    16       __global int* resultCount,    //Result counts (global)
    17       __global int* resultBuffer,    //Save the match result
    18       __local uchar* localPattern)    //local buffer for the search pattern
    19 {  
    20 
    21     int localIdx = get_local_id(0);
    22     int localSize = get_local_size(0);
    23     int groupIdx = get_group_id(0);
    24 
    25     uint lastSearchIdx = textLength - patternLength + 1;
    26     uint beginSearchIdx = groupIdx * maxSearchLength;
    27     uint endSearchIdx = beginSearchIdx + maxSearchLength;
    28     if(beginSearchIdx > lastSearchIdx) 
    29     return;
    30     if(endSearchIdx > lastSearchIdx) 
    31     endSearchIdx = lastSearchIdx;
    32 
    33     for(int idx = localIdx; idx < patternLength; idx+=localSize)
    34         localPattern[idx] = pattern[idx];
    35     barrier(CLK_LOCAL_MEM_FENCE);
    36     
    37     for(uint stringPos=beginSearchIdx+localIdx; stringPos<endSearchIdx; stringPos+=localSize){
    38     if (compare(text+stringPos, localPattern, patternLength) == 1){
    39             int count = atomic_inc(resultCount);
    40             resultBuffer[count] = stringPos;
    41         //printf("%d ",stringPos);
    42         }
    43     barrier(CLK_LOCAL_MEM_FENCE);
    44     }
    45 }

    3.2.tool.h 、tool.cpp

    见:http://www.cnblogs.com/xudong-bupt/p/3582780.html

    3.3 StringSearch.cpp

      1 #include <CL/cl.h>
      2 #include "tool.h"
      3 #include <string.h>
      4 #include <stdio.h>
      5 #include <stdlib.h>
      6 #include <iostream>
      7 #include <string>
      8 #include <fstream>
      9 using namespace std;
     10 
     11 
     12 int main(int argc, char* argv[])
     13 {
     14     cl_int    status;
     15     /**Step 1: Getting platforms and choose an available one(first).*/
     16     cl_platform_id platform;
     17     getPlatform(platform);
     18 
     19     /**Step 2:Query the platform and choose the first GPU device if has one.*/
     20     cl_device_id *devices=getCl_device_id(platform);
     21 
     22     /**Step 3: Create context.*/
     23     cl_context context = clCreateContext(NULL,1, devices,NULL,NULL,NULL);
     24 
     25     /**Step 4: Creating command queue associate with the context.*/
     26     cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
     27 
     28     /**Step 5: Create program object */
     29     const char *filename = "string_search_kernel.cl";
     30     string sourceStr;
     31     status = convertToString(filename, sourceStr);
     32     const char *source = sourceStr.c_str();
     33     size_t sourceSize[] = {strlen(source)};
     34     cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
     35 
     36     /**Step 6: Build program. */
     37     status=clBuildProgram(program, 1,devices,NULL,NULL,NULL);
     38 
     39 
     40     /**Step 7: Initial input,output for the host and create memory objects for the kernel*/
     41     string textStr;    //StringSearch_Input.txt
     42     convertToString("StringSearch_Input.txt", textStr);
     43     const char *    text = textStr.c_str();
     44     int        textlen=strlen(text);
     45 
     46     char *    pattern="info";
     47     int        patternlen=strlen(pattern);
     48     int        maxSearchLength=256*64;
     49     int    *    resultCount=new int[1];
     50     *resultCount=0;
     51     int    *    result=new int[textlen];
     52         memset(result,0,sizeof(int)*textlen);
     53 
     54     cl_mem    textBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, sizeof(char)*textlen,(void *)text, NULL);    //global memory
     55     cl_mem    patternBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY|CL_MEM_COPY_HOST_PTR ,sizeof(char)*patternlen, (void *)pattern, NULL);
     56     cl_mem    resultCountBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY|CL_MEM_COPY_HOST_PTR ,sizeof(int), (void *)resultCount, NULL);
     57     cl_mem    resultBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY|CL_MEM_COPY_HOST_PTR ,sizeof(int)*textlen, (void *)result, NULL);
     58 
     59     /**Step 8: Create kernel object */
     60     cl_kernel kernel = clCreateKernel(program,"StringSearch", NULL);
     61 
     62     /**Step 9: Sets Kernel arguments.*/
     63     status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&textBuffer);    //global
     64     status = clSetKernelArg(kernel, 1, sizeof(int), &textlen);        //private
     65     status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&patternBuffer);    //global
     66     status = clSetKernelArg(kernel, 3, sizeof(int), &patternlen);    //private
     67     status = clSetKernelArg(kernel, 4, sizeof(int), &maxSearchLength);    //private
     68     status = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&resultCountBuffer);    //global
     69     status = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *)&resultBuffer);    //global
     70     status = clSetKernelArg(kernel, 7, sizeof(char)*patternlen, NULL);    //local
     71 
     72     /**Step 10: Running the kernel.*/
     73     cl_event enentPoint;
     74     int globalWorkItem=textlen/64;
     75 
     76     if(textlen%64 != 0)
     77         globalWorkItem++;
     78     size_t groupNUm[1]={globalWorkItem};
     79     size_t localNUm[1]={256};
     80 
     81     status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, groupNUm, localNUm, 0, NULL, &enentPoint);
     82 
     83     clWaitForEvents(1,&enentPoint); ///wait
     84     clReleaseEvent(enentPoint);
     85     int    count=0;
     86     status = clEnqueueReadBuffer(commandQueue, resultCountBuffer, CL_TRUE, 0, sizeof(int), &count, 0, NULL, NULL);
     87     cout<<"
    Number of matches:"<<count<<endl;
     88 
     89     /**Step 12: Clean the resources.*/
     90     status = clReleaseKernel(kernel);//*Release kernel.
     91     status = clReleaseProgram(program);    //Release the program object.
     92     status = clReleaseMemObject(resultBuffer);//Release mem object.
     93     status = clReleaseMemObject(textBuffer);//Release mem object.
     94     status = clReleaseMemObject(resultCountBuffer);//Release mem object.
     95     status = clReleaseMemObject(patternBuffer);//Release mem object.
     96     status = clReleaseCommandQueue(commandQueue);//Release  Command queue.
     97     status = clReleaseContext(context);//Release context.
     98 
     99     free(devices);
    100     free(result);
    101     free(resultCount);
    102 
    103     getchar();
    104     return 0;
    105 }
    View Code

    本文:http://www.cnblogs.com/xudong-bupt/p/3627593.html

  • 相关阅读:
    webstorm配置github 以及本地代码上传github。
    日期控件------moment.js
    图片查看器
    日期
    配置一个node服务器
    git代码管理
    vue
    javascript (字符串, 数组, 对象 , 日期 和 操作元素节点 动画 定时器)
    html css
    JS常用方法
  • 原文地址:https://www.cnblogs.com/xudong-bupt/p/3627593.html
Copyright © 2011-2022 走看看