zoukankan      html  css  js  c++  java
  • posix 正则库程序

    使用的是posix 正则库,参考:

    http://see.xidian.edu.cn/cpp/html/1428.html

    执行匹配的时:

    gcc myreg.c

    ip.pat 内容:

     ip
    .*[0-9]+.[0-9]+.[0-9]+.[0-9]+

    ip.txt 内容:

    192.168.1.1

    测试:

    ./a.out ip.pat ip.txt

    下面是myreg.c源代码

    /*  myreg.c  */

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <sys/types.h>
    #include <sys/stat.h>
    #include <fcntl.h>
    #include <regex.h>
    #include <unistd.h>


    #define MAX 256

    /* 存放匹配位置信息的结构体 */
    typedef struct placemsg_t
    {
     int start[MAX]; //匹配的开始位置
     int end[MAX];   //匹配的结束位置
     int count;      //匹配次数
    } placemsg;

    #define TIMES 100
    #define MAX_PATTERN_LEN 8192

    /**
     * @brief 实现一个字符十六进制到十进制的转换
     *
     * @param c 需要转换的字符
     *
     * @return 错误返回 -1
     */
    static int hex2dec(char c)
    {
            switch (c)
            {
                    case '0' ... '9':
                            return c - '0';
                    case 'a' ... 'f':
                            return c - 'a' + 10;
                    case 'A' ... 'F':
                            return c - 'A' + 10;
                    default:
       fprintf(stderr, "hex2dec: bad value! ");
                return -1;          
            }
    }

    /*
     * @brief  把正则的xHH格式的十六进制形式用字符形式代替
     * takes a string with xHH escapes and returns one with the characters they stand for
     *
     * @param s xHH形式的正则表达式
     *
     * @return  返回字符形式的正则表达式
     */
    static char *pre_process(char *s)
    {
            char *result = malloc(strlen(s) + 1);
            int sindex = 0, rindex = 0;
            while( sindex < strlen(s) ) 
            {
                if( sindex + 3 < strlen(s) &&
                    s[sindex] == '\' && s[sindex+1] == 'x' &&
                    isxdigit(s[sindex + 2]) && isxdigit(s[sindex + 3]) )
                    {
                            /* carefully remember to call tolower here... */
                            result[rindex] = tolower( hex2dec(s[sindex + 2])*16 +
                                                      hex2dec(s[sindex + 3] ) );
                            sindex += 3; /* 4 total */
                    }
                    else
                            result[rindex] = tolower(s[sindex]);

                    sindex++; 
                    rindex++;
            }
            result[rindex] = '';

            return result;
    }

    //判断是不是注释
    // Returns true if the line (from a pattern file) is a comment
    static int is_comment(char* line)
    {
     unsigned int i;
        // blank lines are comments
        if(strlen(line) == 0) return 1;

        // lines starting with # are comments
        if(line[0] == '#') return 1;

        // lines with only whitespace are comments
        for(i = 0; i < strlen(line); i++)
     {
           if(!isspace(line[i]))
        {
       return 0;
        }
     }
        return 1;
    }

    //获得.pat中的协议名称
    static char *get_protocol_name (char *line, char **patname)
    {
     unsigned int i, j;
     char *name = *patname;
     j = 0;
     for (i=0; i<strlen(line); i++)
     {
      if(!isspace (line[i]))
      {
       name[j] = line[i];
       j++;
      }
      else
      {
       break;
      }
     }
     return name;
    }

    //找到最后的slash(/)
    int last_mark (char *str, char mark)
    {
     int site = 0;  
     int count = 0;
     int size = strlen(str);
     while (site <= size)
     {
      if (str[site++] == mark)
      {
       count = site; 
      }
     }
     return count;
    }

    //找到第一个dot(.)
    int first_mark (char *str, char mark, int num)
    {
     int count = num;
     while (1)  
     {
      if (str[count++] == mark)
      {
       break;
      }
     }
     return count; 
    }

    /* 从文件全名中把文件名提取出来,没有后缀 */
    int substr (char *srcstr, char **decstr, int lastslash, int firstdot)
    {
     int i = 0;
     //int ls = lastslash;
     char *str = *decstr;
     //printf("last=%d,first=%d ", lastslash, firstdot);
     //printf("size=%d ", firstdot-lastslash);
     int size = firstdot-lastslash-1;
     for (i=0; i<size; i++)
     {
      //str[i] = srcstr[ls++];
      //printf("...%c... ",srcstr[lastslash]);
      str[i] = srcstr[lastslash++];
     }
    }

    //得到文件名
    int basename(char *file, char **name)
    {
     int lastnum = last_mark (file, '/');
     int firstnum = first_mark (file, '.', lastnum);
     substr (file, name, lastnum, firstnum);
     //printf ("name = %s ", name);

     return 0;
    }

    /**
     * @brief 以下是pcre匹配的相关函数原型:
     *   int regcomp(regex_t *preg, const char *regex, int cflags);
     *   cflags: REG_EXTENDED | REG_NEWLINE;
     *     REG_EXTENDED  支持扩展的正则
     *     REG_NEWLINE;  包括换行
     *      int regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
     *   eflags = REG_NOTBOL | REG_NOTEOL;
     *      REG_NOTBOL  行结尾
     *      REG_NOTEOL  文件结尾
     *          size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size);
     *          void regfree(regex_t *preg);
     * @param regexp 正则表达式模式文件
     * @param file_path 要进行匹配的文件路径
     *
     * @return  匹配信息结构体
     */
    struct placemsg_t reg(char *regexpfile, char *file_path)
    {
      regex_t preg;
      struct placemsg_t placeinfo;

      regmatch_t pmatch[MAX] = {};
      size_t nmatch = MAX;
      size_t length;
      char errbuf[MAX] = {};
      size_t errbuf_size;
      int res;
      int errcode;

      FILE *fp;
      int fd;
      long sitestart;
      long siteend;
      long filesize;
      char *string;


      int patternlen, i;
      int flag = 1;
      FILE * fp2;
      char * line = NULL;
      size_t len = 0;
      ssize_t size;

      char *filename = (char*)malloc(256);
      //提取.pat文件名
      basename(regexpfile, &filename);

     //提取.pat中的正则表达式
     //printf ("1regexpfile=%s ", regexpfile);
     fp2 = fopen (regexpfile, "r");
     if (fp2 == NULL)
     {
      perror("fopen");
     }
     flag = 1;
     while ((size= getline(&line, &len, fp2)) != -1)
     {
      line[strlen(line)-1] = '';
      //printf ("line=%s ", line);
      //判断是否是注释
      if(is_comment(line))
      {
       printf("is_comment ");
       continue;
      }
      else if((strstr(line, filename) == 0) && flag)
      {
       //printf("..........strcpy........... ");
       flag = 0;
       continue;
      }
      else if(flag)
      {
       continue;
      }
      else if (flag == 0)
      {
       printf("正则表达式是:%s ", line);
       break;
      }
     }
     fclose(fp2);
     fp2 = NULL;
     free(filename);
     printf ("2line=%s ", line);

     //转换正则表达式为字符
     char *regexpstr = pre_process(line); /* do xHH escapes */
     printf("regexpstr=%s ", regexpstr);
     if(line)
     {
      free(line);
     }

      printf("file_path=%s ", file_path);
      fp = fopen(file_path, "r");
      if (fp == NULL)
      {
      perror("fopen");
      }

      fseek(fp, 0, SEEK_SET);
      sitestart = ftell(fp);
      fseek(fp, 0, SEEK_END);
      siteend = ftell(fp);
      filesize = siteend - sitestart;
      printf("filezize=%d ", filesize);
      string = (char*) malloc(sizeof(char)*filesize+4);
      memset(string, 0, filesize+4);
      fclose(fp);
      fp = NULL;

      fd = open(file_path, O_RDONLY);
      if (fd == -1)
      {
     perror ("open");
      }

      res = read(fd, string, filesize);
      if (res == -1)
      {
     perror ("read");
      }

      close(fd);
      printf("res=%d,string=%s ", res, string);

      printf("begin regcomp......... ");
      int cflags = REG_EXTENDED | REG_NEWLINE;
      //int cflags = REG_EXTENDED;
      //int cflags = 0;
      //编译正则表达式
      res = regcomp (&preg, regexpstr, cflags);

      printf("begin regexec......... ");
      //int eflags = REG_NOTBOL | REG_NOTEOL;
      int eflags =  REG_NOTEOL;
      //int eflags = 0;
      int place[5];
      int start_front=0;
      int start_now=0;
      int end_front=0;
      int end_now=0;
      int temp = 0;
     
      char *p = string;
      i = 0;
      while (1)
      {
        res = regexec (&preg, p, nmatch, pmatch, eflags);
        if (res == 0)
        {
          printf("...........match.......... ");
       start_now = pmatch[0].rm_so;
       end_now = pmatch[0].rm_eo;
     
       temp = end_now - start_now;
       //printf("temp=%d ", temp);
       start_now = end_front + start_now;
       //printf("start_now=%d ", start_now);
          end_now = start_now + temp;
       //printf("end_now=%d ", end_now);

       start_front = start_now;
       end_front = end_now;

       //printf("start place=%d ", pmatch[0].rm_so);
       //printf("end place=%d ", pmatch[0].rm_eo);
       //printf("start place=%d ", start_front);
       //printf("end place=%d ", end_front);
       placeinfo.start[i] = start_front;
       placeinfo.end[i] = end_front;

       p += pmatch[0].rm_eo;
       if(!*p)
       {
      i++;
      break;
       }

        }
        else
        {
       printf("no match ");
       break;
        }

     i++;
      }
     
      placeinfo.count = i;
      //printf(".....i=%d ", i);
      length = regerror (res, &preg, errbuf, errbuf_size);

      regfree(&preg);
      free(string);
      free(regexpstr);

      //printf("over ");

      return placeinfo;
    }

    int main (int argc, char **argv)
    {
     //char *regexp = "r.t";
     char *regexpfile = argv[1];
     
     printf("regexpfile=%s ", regexpfile);
     //char *file_path = "t.txt";
     char *file_path = argv[2];

     struct placemsg_t placeinfo;

     placeinfo = reg(regexpfile, file_path);
     int size = placeinfo.count;
     printf(".....show.... ");
     int i = 0;
     for (i=0; i<size; i++)
     {
      printf ("start[%d]=%d ", i, placeinfo.start[i]);
      printf ("end[%d]=%d ", i, placeinfo.end[i]);
     }
     return 0;
    }

  • 相关阅读:
    ASP.NET HTTP404错误怎么办
    ASP.NET HTTP500错误怎么办
    Fireworks如何制作透明窗口PNG
    CSS如何实现自定义鼠标应用到整个网页
    Dreamweaver如何设置自动换行,修改字体
    火狐浏览器缓存区的利用,如何提取火狐缓存的动画
    PHP快速入门 如何配置Apache服务器
    PHP中调用外部命令的方法
    PHP与SQL数据库交互中文乱码怎么办
    [Angular] Angular Attribute Decorator
  • 原文地址:https://www.cnblogs.com/etangyushan/p/3759543.html
Copyright © 2011-2022 走看看