主要思路: 注:warehouse 为字符串数组,newstxt为要处理字符串
- 用C#中字符串的split()方法,以“.”为标志,将字符串切分成n段字符串放到warehouse字符串数组中。
- split()方法会错误的将形如U.S或者2.5hours的分到两个数组中
- 这样的两个数组有一个规律,前者数组的末尾字符是数字或者大写字母,后者的开头字符对应的是数字或者大写字母。
- 如果遇到着这种情况,就将两个数组合并成一个字符串,存放到前者字符串中,后者字符串由后面的字符串覆盖,以此类推,一直遍历到最后一个数组把倒数第二个数组的字符串给覆盖,数组长度减一。
string[] warehouse = newstxt.Split('.');
Boolean markItNumber = false;
Boolean markItLetter = false;
StringBuilder sb = new StringBuilder();
int count1 = 0;
int mark = -1;
int para_len = warehouse.Length;
Console.WriteLine("
******************调试台******************");
//对特殊情况进行处理 例如U.S 1.5hours
char c = '0';
for (int o = 0; o < para_len; o++)
{
Console.WriteLine("开头字母"+ warehouse[o][0]+"结尾字母"+ warehouse[o][warehouse[o].Length-1]+
"markItLetter" + markItLetter+ "markItNumber"+ markItNumber);
if (warehouse[o].Length > 0)
{
//如果开头是大写字母,那么检查上一句是否是大写字母结尾,如果是,那么把两句合并成一句
if (warehouse[o][0] >= 'A' && warehouse[o][0] <= 'Z')
{
if (markItLetter)//上一句是大写字母结尾
{
Console.WriteLine("Last End:" + c + " Now Begin:" + warehouse[o][0]);
warehouse[mark] = warehouse[mark] + "." + warehouse[o];
//将两个字符串合并,数组长度减1
for (int k = mark + 1; k < para_len - 1; k++)
{
warehouse[k] = warehouse[k + 1];
}
para_len--;
--o;
// 重新从最新合并的字符串开始处理
}
markItLetter = false;
}
else if (warehouse[o][0] >= '0' && warehouse[o][0] <= '9')
{
if (markItNumber) //上一句是数字结尾
{
Console.WriteLine("Last End:" + c + " Now Begin:" + warehouse[o][0]);
warehouse[mark] = warehouse[mark] + "." + warehouse[o];
//将两个字符串合并,数组长度减1
for (int k = mark + 1; k < para_len - 1; k++)
{
warehouse[k] = warehouse[k + 1];
}
para_len--;
--o;
}
markItNumber = false;
}
if(warehouse[o][warehouse[o].Length-1]>='A' && warehouse[o][warehouse[o].Length - 1] <= 'Z')
{
markItLetter = true;
mark = o;
c = warehouse[o][warehouse[o].Length - 1];
Console.WriteLine("Last End:" + c );
}
else if(warehouse[o][warehouse[o].Length - 1]>='0' && warehouse[o][warehouse[o].Length - 1] <= '9')
{
markItNumber = true;
mark = o;
c = warehouse[o][warehouse[o].Length - 1];
Console.WriteLine("Last End:" + c);
}
}
}
len = para_len;
Console.WriteLine("
******************调试结束******************");