/// <summary> /// 根据html文本返回url地址集合 /// </summary> /// <param name="sHtmlText"></param> /// <returns></returns> private List<string> GetImgUrlByHtmlText(string sHtmlText) { // 定义正则表达式用来匹配 img 标签 Regex regImg = new Regex(@"<img[^<>]*?src[s ]*=[s ]*[""']?[s ]*(?<imgUrl>[^s ""'<>]*)[^<>]*?/?[s ]*>", RegexOptions.IgnoreCase); // 搜索匹配的字符串 MatchCollection matches = regImg.Matches(sHtmlText); List<string> sUrlList = new List<string>(); // 取得匹配项列表 foreach (Match match in matches) { if (match.Groups["imgUrl"].Value != "") { sUrlList.Add(match.Groups["imgUrl"].Value); } } // 定义正则表达式用来匹配 table 标签 Regex regTable = new Regex(@"<(table|tr|td)[^<>]*?background[s ]*=[s ]*[""']?[s ]*(?<backgroundUrl>[^s ""'<>]*)[^<>]*?/?[s ]*>", RegexOptions.IgnoreCase); MatchCollection matchesTable = regTable.Matches(sHtmlText); // 取得匹配项列表 foreach (Match match in matchesTable) { if (match.Groups["backgroundUrl"].Value != "") { sUrlList.Add(match.Groups["backgroundUrl"].Value); } } return sUrlList; }