zoukankan      html  css  js  c++  java
  • 句子分析器

    初步实现了一个接口:

      1 #include<stdio.h>
      2 #include <stdlib.h>
      3 #include <string.h>
      4 #include "lyPublic/lyCodeConvert.h"
      5 #define X_LONGSEN  500
      6 #define Y_LONGWORD 100
      7 struct node 
      8 {
      9     char MWord[Y_LONGWORD]; //对应的最高权权值
     10     int order; //权值、并作为标记是否有词
     11     struct node *next[16];
     12 };
     13 
     14 typedef struct node node;
     15 char z_Str[Y_LONGWORD];
     16 
     17 void insertTree(char *str, node *T, char *MaxWord, int num_max);
     18 void findStr(char *str, node *T);
     19 int findNum(char *str, node *T);
     20 int SentenceTransform(char *FromWord,char *Tostr);
     21 
     22 /*
     23 函数功能:
     24     将一句话里面的部分词 转化为 权值最高的词
     25 变量说明:
     26     FromWord 原句子
     27     Tostr 转化后的句子
     28 */
     29 
     30 int SentenceTransform(char *FromWord,char *Tostr)
     31 {
     32     //FILE *fp;  
     33     FILE *fq;
     34     char GetSentence[X_LONGSEN] = ""; // 读取权值文档的句子
     35     int lenGetSen, leWord, leSen; 
     36     char GetWord[Y_LONGWORD] = "" ;
     37     char ToWord[Y_LONGWORD] = "";
     38     char strhan[Y_LONGWORD] = "";
     39     char MaxWord[Y_LONGWORD] = ""; //每一句的权值最高词
     40     char hanMax[Y_LONGWORD] = "" ;
     41     node *T;
     42     int i, j, k, len, s, num, max_num, f;
     43     int from, to, at;
     44     int num_max;
     45     int lenTostr = 0 , lenZ_Str;
     46 
     47     T = (node *)malloc(sizeof(node));
     48         //初始化节点
     49     memset (T->MWord, 0 ,sizeof(T->MWord));
     50     T->order = -1;
     51     for(i = 0; i < 16; i++)
     52         T->next[i] = NULL;
     53 
     54 
     55 /*
     56     打开权值文档,文档格式:
     57         平凡28&平淡--62 平庸--5 平凡--82 平常--38]
     58         平常83&平常--38 寻常--31]
     59         贫困24&困顿--0 贫困--42 窘迫--4]
     60         贫困24&贫困--42 贫寒--0 清贫--31 贫穷--7 穷苦--1]
     61     说明:
     62         第一个是一句话权值最高的词,紧跟着的就是权值
     63         & 是分隔符
     64         后面的是 近义词 的词和气权值大小
     65 */
     66 
     67 
     68 
     69 //  读取权值文档,建立字典树
     70     fq = fopen ("1.txt","r++");
     71 //    fp = fopen ("jieguo.txt","w+r");
     72     while (fgets (GetSentence, 500, fq) != NULL) //读取权值文档,建立各个词对应的最高权值
     73     {
     74         lenGetSen = strlen(GetSentence);
     75         leSen = 0;
     76         memset (MaxWord, 0, sizeof(MaxWord));
     77         leWord = 0;
     78         leWord = 0;
     79         while(GetSentence[leSen] != '&' && (GetSentence[leSen] < '0' || GetSentence[leSen] >'9'))
     80             MaxWord[leWord++] = GetSentence[leSen++];
     81         //取最高权词的权值 
     82         num_max = 0; 
     83         while(GetSentence[leSen] >= '0' && GetSentence[leSen] <= '9')
     84             num_max = num_max*10 + GetSentence[leSen++] - '0';
     85         leSen++;
     86         while (GetSentence[leSen] != ']' && leSen < lenGetSen)
     87         {
     88             memset (GetWord, 0, sizeof(GetWord));
     89             memset (ToWord, 0, sizeof(ToWord));
     90             leWord = 0;
     91             while (GetSentence[leSen] != '-') 
     92             {
     93                 GetWord[leWord++] = GetSentence[leSen++];
     94             }
     95             HanziToAnsi (GetWord, strlen(GetWord), ToWord, sizeof(ToWord));
     96             insertTree (ToWord, T, MaxWord, num_max);
     97             while (GetSentence[leSen] == ' ' || ( GetSentence[leSen] >='0' && GetSentence[leSen] <= '9') || GetSentence[leSen] == '-')
     98                 leSen++;
     99         }
    100     }
    101 
    102 
    103     //转化句子
    104     len = strlen(FromWord);
    105     at = 0;
    106     for (i = 0; i < len;)
    107     {
    108         max_num = -1;
    109         memset(strhan, 0, sizeof(strhan));
    110         for (j = i; j <= len; j+=2)
    111         {
    112             memset (GetWord, 0, sizeof(GetWord));
    113             s = 0;
    114             num = -10;
    115             //记录汉字
    116             for (k = i; k < j; k++)
    117                 strhan[s++] = FromWord [k];
    118 
    119                 //转码
    120             HanziToAnsi(strhan, strlen(strhan), GetWord, sizeof(GetWord));
    121 
    122             if(strlen(strhan) != 0)
    123                 num = findNum(GetWord, T);
    124             else
    125                 continue;
    126             if (num > max_num)
    127             {
    128                 max_num = num;
    129                 memset(hanMax, 0, sizeof(hanMax));
    130                 strcpy(hanMax, strhan);
    131                 from = i;
    132                 to =j;
    133             }
    134         }
    135         if(max_num != -1)
    136         {
    137             while(at < from)
    138             {
    139                 //fputc(FromWord [at], fp);
    140                 Tostr[lenTostr++] = FromWord[at++];
    141             }
    142             memset (GetWord, 0, sizeof(GetWord));
    143             memset(z_Str, 0, sizeof(z_Str));
    144             HanziToAnsi(hanMax, strlen(hanMax), GetWord, sizeof(GetWord));
    145             findStr(GetWord, T);
    146             lenZ_Str = strlen(z_Str);
    147             for(f = 0; f < lenZ_Str ;f++)
    148                 Tostr[lenTostr++] = z_Str[f];
    149             at = to;
    150             i = to;
    151         }
    152         else
    153         {
    154             for(j = at; j < at+2; j++)
    155                 Tostr[lenTostr++] = FromWord[j];
    156             //    fputc(FromWord[j], fp);
    157             at += 2;
    158             i += 2;
    159         }
    160     }
    161     return 0;
    162 }
    163 
    164 void insertTree(char *str, node *T, char *MaxWord, int num_max)
    165 {
    166     int len, i, j, flag=0, id;
    167     node *p, *q;
    168     p = T;
    169     len = strlen(str);
    170     for (i = 0; i < len; i++)
    171     {
    172         if(str[i]>= 'a' && str[i] <= 'f')//当时abcdef时 转化为数字
    173             id = str[i]- 'a' + 10;
    174         else
    175             id = str[i] - '0';
    176         if( p ->next[id] == NULL)//扩展节点
    177         {
    178             flag = 1;
    179             q = (node *)malloc(sizeof(node));
    180             memset(q->MWord, 0, sizeof(q->MWord));
    181             q->order = -1;
    182             for(j = 0;j < 16 ;j++)
    183                 q ->next[j] = NULL;
    184             p->next[id] = q;
    185         }
    186         p = p->next[id];
    187     }
    188     if(flag)
    189     {
    190         strcpy(p->MWord, MaxWord);
    191         p->order = num_max;
    192     }
    193     else
    194     {
    195         if( p -> order == -1)
    196         {
    197             strcpy(p->MWord, MaxWord);
    198             p->order = num_max ;
    199         }
    200     }
    201 }
    202 
    203 void findStr(char *str, node *T)
    204 {
    205     int len , i, id;
    206     node *p;
    207     p = T;
    208     len = strlen(str);
    209     for (i=0; i< len ; ++i)
    210     {
    211         if(str[i]>= 'a' && str[i] <= 'f')
    212             id = str[i]- 'a' + 10;
    213         else
    214             id = str[i] - '0';
    215         if(p->next[id] == NULL)
    216             return;
    217         p = p->next[id];
    218     }
    219     strcpy(z_Str, p->MWord);
    220 }
    221 
    222 
    223 int findNum(char *str, node *T)
    224 {
    225     int len, i, id;
    226     node *p;
    227     p = T;
    228     len = strlen(str);
    229     for(i = 0; i < len; i++)
    230     {
    231         if(str[i]>= 'a' && str[i] <= 'f')
    232             id = str[i]- 'a' + 10;
    233         else
    234             id = str[i] - '0';
    235         if(p->next[id] == NULL)
    236             return  -1;
    237         p = p->next[id];
    238     }
    239     return p->order;
    240 }
      1 #include<stdio.h>
      2 #include <stdlib.h>
      3 #include <string.h>
      4 #include "lyPublic/lyCodeConvert.h"
      5 #define X_LONGSEN  500
      6 #define Y_LONGWORD 100
      7 struct node 
      8 {
      9     char MWord[Y_LONGWORD]; //对应的最高权权值
     10     int order; //权值、并作为标记是否有词
     11     struct node *next[16];
     12 };
     13 
     14 typedef struct node node;
     15 char z_Str[Y_LONGWORD];
     16 
     17 void insertTree(char *str, node *T, char *MaxWord, int num_max);
     18 void findStr(char *str, node *T);
     19 int findNum(char *str, node *T);
     20 int SentenceTransform(char *FromWord,char *Tostr);
     21 
     22 /*
     23 函数功能:
     24     将一句话里面的部分词 转化为 权值最高的词
     25 变量说明:
     26     FromWord 原句子
     27     Tostr 转化后的句子
     28 */
     29 
     30 int SentenceTransform(char *FromWord,char *Tostr)
     31 {
     32     //FILE *fp;  
     33     FILE *fq;
     34     char GetSentence[X_LONGSEN] = ""; // 读取权值文档的句子
     35     int lenGetSen, leWord, leSen; 
     36     char GetWord[Y_LONGWORD] = "" ;
     37     char ToWord[Y_LONGWORD] = "";
     38     char strhan[Y_LONGWORD] = "";
     39     char MaxWord[Y_LONGWORD] = ""; //每一句的权值最高词
     40     char hanMax[Y_LONGWORD] = "" ;
     41     node *T;
     42     int i, j, k, len, s, num, max_num, f;
     43     int from, to, at;
     44     int num_max;
     45     int lenTostr = 0 , lenZ_Str;
     46 
     47     T = (node *)malloc(sizeof(node));
     48         //初始化节点
     49     memset (T->MWord, 0 ,sizeof(T->MWord));
     50     T->order = -1;
     51     for(i = 0; i < 16; i++)
     52         T->next[i] = NULL;
     53 
     54 
     55 /*
     56     打开权值文档,文档格式:
     57         平凡28&平淡--62 平庸--5 平凡--82 平常--38]
     58         平常83&平常--38 寻常--31]
     59         贫困24&困顿--0 贫困--42 窘迫--4]
     60         贫困24&贫困--42 贫寒--0 清贫--31 贫穷--7 穷苦--1]
     61     说明:
     62         第一个是一句话权值最高的词,紧跟着的就是权值
     63         & 是分隔符
     64         后面的是 近义词 的词和气权值大小
     65 */
     66 
     67 
     68 
     69 //  读取权值文档,建立字典树
     70     fq = fopen ("1.txt","r++");
     71 //    fp = fopen ("jieguo.txt","w+r");
     72     while (fgets (GetSentence, 500, fq) != NULL) //读取权值文档,建立各个词对应的最高权值
     73     {
     74         lenGetSen = strlen(GetSentence);
     75         leSen = 0;
     76         memset (MaxWord, 0, sizeof(MaxWord));
     77         leWord = 0;
     78         leWord = 0;
     79         while(GetSentence[leSen] != '&' && (GetSentence[leSen] < '0' || GetSentence[leSen] >'9'))
     80             MaxWord[leWord++] = GetSentence[leSen++];
     81         //取最高权词的权值 
     82         num_max = 0; 
     83         while(GetSentence[leSen] >= '0' && GetSentence[leSen] <= '9')
     84             num_max = num_max*10 + GetSentence[leSen++] - '0';
     85         leSen++;
     86         while (GetSentence[leSen] != ']' && leSen < lenGetSen)
     87         {
     88             memset (GetWord, 0, sizeof(GetWord));
     89             memset (ToWord, 0, sizeof(ToWord));
     90             leWord = 0;
     91             while (GetSentence[leSen] != '-') 
     92             {
     93                 GetWord[leWord++] = GetSentence[leSen++];
     94             }
     95             HanziToAnsi (GetWord, strlen(GetWord), ToWord, sizeof(ToWord));
     96             insertTree (ToWord, T, MaxWord, num_max);
     97             while (GetSentence[leSen] == ' ' || ( GetSentence[leSen] >='0' && GetSentence[leSen] <= '9') || GetSentence[leSen] == '-')
     98                 leSen++;
     99         }
    100     }
    101 
    102 
    103     //转化句子
    104     len = strlen(FromWord);
    105     at = 0;
    106     for (i = 0; i < len;)
    107     {
    108         max_num = -1;
    109         memset(strhan, 0, sizeof(strhan));
    110         for (j = i; j <= len; j+=2)
    111         {
    112             memset (GetWord, 0, sizeof(GetWord));
    113             s = 0;
    114             num = -10;
    115             //记录汉字
    116             for (k = i; k < j; k++)
    117                 strhan[s++] = FromWord [k];
    118 
    119                 //转码
    120             HanziToAnsi(strhan, strlen(strhan), GetWord, sizeof(GetWord));
    121 
    122             if(strlen(strhan) != 0)
    123                 num = findNum(GetWord, T);
    124             else
    125                 continue;
    126             if (num > max_num)
    127             {
    128                 max_num = num;
    129                 memset(hanMax, 0, sizeof(hanMax));
    130                 strcpy(hanMax, strhan);
    131                 from = i;
    132                 to =j;
    133             }
    134         }
    135         if(max_num != -1)
    136         {
    137             while(at < from)
    138             {
    139                 //fputc(FromWord [at], fp);
    140                 Tostr[lenTostr++] = FromWord[at++];
    141             }
    142             memset (GetWord, 0, sizeof(GetWord));
    143             memset(z_Str, 0, sizeof(z_Str));
    144             HanziToAnsi(hanMax, strlen(hanMax), GetWord, sizeof(GetWord));
    145             findStr(GetWord, T);
    146             lenZ_Str = strlen(z_Str);
    147             for(f = 0; f < lenZ_Str ;f++)
    148                 Tostr[lenTostr++] = z_Str[f];
    149             at = to;
    150             i = to;
    151         }
    152         else
    153         {
    154             for(j = at; j < at+2; j++)
    155                 Tostr[lenTostr++] = FromWord[j];
    156             //    fputc(FromWord[j], fp);
    157             at += 2;
    158             i += 2;
    159         }
    160     }
    161     return 0;
    162 }
    163 
    164 void insertTree(char *str, node *T, char *MaxWord, int num_max)
    165 {
    166     int len, i, j, flag=0, id;
    167     node *p, *q;
    168     p = T;
    169     len = strlen(str);
    170     for (i = 0; i < len; i++)
    171     {
    172         if(str[i]>= 'a' && str[i] <= 'f')//当时abcdef时 转化为数字
    173             id = str[i]- 'a' + 10;
    174         else
    175             id = str[i] - '0';
    176         if( p ->next[id] == NULL)//扩展节点
    177         {
    178             flag = 1;
    179             q = (node *)malloc(sizeof(node));
    180             memset(q->MWord, 0, sizeof(q->MWord));
    181             q->order = -1;
    182             for(j = 0;j < 16 ;j++)
    183                 q ->next[j] = NULL;
    184             p->next[id] = q;
    185         }
    186         p = p->next[id];
    187     }
    188     if(flag)
    189     {
    190         strcpy(p->MWord, MaxWord);
    191         p->order = num_max;
    192     }
    193     else
    194     {
    195         if( p -> order == -1)
    196         {
    197             strcpy(p->MWord, MaxWord);
    198             p->order = num_max ;
    199         }
    200     }
    201 }
    202 
    203 void findStr(char *str, node *T)
    204 {
    205     int len , i, id;
    206     node *p;
    207     p = T;
    208     len = strlen(str);
    209     for (i=0; i< len ; ++i)
    210     {
    211         if(str[i]>= 'a' && str[i] <= 'f')
    212             id = str[i]- 'a' + 10;
    213         else
    214             id = str[i] - '0';
    215         if(p->next[id] == NULL)
    216             return;
    217         p = p->next[id];
    218     }
    219     strcpy(z_Str, p->MWord);
    220 }
    221 
    222 
    223 int findNum(char *str, node *T)
    224 {
    225     int len, i, id;
    226     node *p;
    227     p = T;
    228     len = strlen(str);
    229     for(i = 0; i < len; i++)
    230     {
    231         if(str[i]>= 'a' && str[i] <= 'f')
    232             id = str[i]- 'a' + 10;
    233         else
    234             id = str[i] - '0';
    235         if(p->next[id] == NULL)
    236             return  -1;
    237         p = p->next[id];
    238     }
    239     return p->order;
    240 }

    主函数:

    #include<stdio.h>
    #include<string.h>
    #include "AnalysisWord.h"
    
    int main()
    {
        char strGetFromWeb[500] = "";
        char strGetToWeb[500] = "";
        while(gets(strGetFromWeb))
        {
            SentenceTransform(strGetFromWeb,strGetToWeb);
            puts(strGetToWeb);
            memset(strGetToWeb, 0, sizeof(strGetToWeb));
        }
        return 0;
    }
  • 相关阅读:
    c++获取时间戳
    指针数组学习
    Matlab小波工具箱的使用2
    matlab 小波工具箱
    指针
    低通滤波参数
    git 合并分支到master
    matlab json文件解析 需要下载一个jsonlab-1.5
    matlab2017b
    数据结构-链式栈c++
  • 原文地址:https://www.cnblogs.com/zibuyu/p/3216908.html
Copyright © 2011-2022 走看看