工具介绍:
1)分析网页,获取页面图片。
2)分析网页引用CSS文件,获取背景图片。
3)批量下载。
要点:
1)正则
LINK_PATTERN:获取页面所有连接
BACKGROUND_IMAGE_PATTERN:获取CSS中背景图片地址
CHECK_URL_PATTERN :检测URL是否有效
代码
private const string LINK_PATTERN = @"(href|src)=['""]?(?<link>[^'""\s]*)['""]?";
private const string BACKGROUND_IMAGE_PATTERN = @"(url)\(['""]?(?<url>[^'""\s]*)['""]?\)";
private const string CHECK_URL_PATTERN = @"^http(s)?://+([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";
private const string BACKGROUND_IMAGE_PATTERN = @"(url)\(['""]?(?<url>[^'""\s]*)['""]?\)";
private const string CHECK_URL_PATTERN = @"^http(s)?://+([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";
2)线程
代码
/// <summary>
/// 线程
/// </summary>
private void DoWork()
{
//网页路径
string url = txtUrl.Text.Trim();
//保存路径
string saveDir = txtSaveDir.Text.Trim();
lvLog.Items.Clear();
tsslStatus.Text = DESC_ANALYSISING;
tsslTime.Text = string.Format(DESC_SPAND_TIME,0);
tsslCount.Text = string.Format(DESC_IMAGES_COUNT,0);
tsslTotalTime.Text = string.Format(DESC_DOWNLOAD_TOTAL_TIME,0);
btnDownload.Enabled = false;
Application.DoEvents();
AnalyzeAndDownload(url, saveDir);
}
/// 线程
/// </summary>
private void DoWork()
{
//网页路径
string url = txtUrl.Text.Trim();
//保存路径
string saveDir = txtSaveDir.Text.Trim();
lvLog.Items.Clear();
tsslStatus.Text = DESC_ANALYSISING;
tsslTime.Text = string.Format(DESC_SPAND_TIME,0);
tsslCount.Text = string.Format(DESC_IMAGES_COUNT,0);
tsslTotalTime.Text = string.Format(DESC_DOWNLOAD_TOTAL_TIME,0);
btnDownload.Enabled = false;
Application.DoEvents();
AnalyzeAndDownload(url, saveDir);
}
3)网页分析:
代码
/// <summary>
/// 根据网站URL获取CSS
/// 分析CSS获取背景图片地址
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
protected List<Uri> FetchCSSWithSite(string url)
{
StringBuilder sourceCSS = new StringBuilder();
List<Uri> list = new List<Uri>();
using (WebClient client = new WebClient())
{
_basicUri = new Uri(url);
string sourceHtml = client.DownloadString(_basicUri);
sourceCSS.Append(sourceHtml);
Regex regex = new Regex(LINK_PATTERN, RegexOptions.IgnoreCase);
MatchCollection collection = regex.Matches(sourceHtml);
if (collection == null) return null;
string extension = string.Empty;
string link = string.Empty;
foreach (Match match in collection)
{
link=match.Groups["link"].Value;
lvLog.Items.Add(new ListViewItem(new string[] { new Uri(_basicUri, link).AbsoluteUri, DateTime.Now.ToString(TIME_FORMAT), STATUS_ANALYSIS, string.Empty, link.Contains(".") ? link.Substring(link.LastIndexOf('.')) : string.Empty }));
if (!link.Contains(".")) continue;
extension = link.Substring(link.LastIndexOf('.'));
switch (extension.ToUpper())
{
case ".CSS":
sourceCSS.Append(client.DownloadString(new Uri(_basicUri, link)));
break;
case ".GIF":
case ".PNG":
case ".JPG":
case ".JPEG":
list.Add(new Uri(_basicUri, link));
break;
default:
break;
}
}
}
list.AddRange(FetchBGImageUrlsWithCSS(sourceCSS.ToString()));
return list;
}
/// 根据网站URL获取CSS
/// 分析CSS获取背景图片地址
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
protected List<Uri> FetchCSSWithSite(string url)
{
StringBuilder sourceCSS = new StringBuilder();
List<Uri> list = new List<Uri>();
using (WebClient client = new WebClient())
{
_basicUri = new Uri(url);
string sourceHtml = client.DownloadString(_basicUri);
sourceCSS.Append(sourceHtml);
Regex regex = new Regex(LINK_PATTERN, RegexOptions.IgnoreCase);
MatchCollection collection = regex.Matches(sourceHtml);
if (collection == null) return null;
string extension = string.Empty;
string link = string.Empty;
foreach (Match match in collection)
{
link=match.Groups["link"].Value;
lvLog.Items.Add(new ListViewItem(new string[] { new Uri(_basicUri, link).AbsoluteUri, DateTime.Now.ToString(TIME_FORMAT), STATUS_ANALYSIS, string.Empty, link.Contains(".") ? link.Substring(link.LastIndexOf('.')) : string.Empty }));
if (!link.Contains(".")) continue;
extension = link.Substring(link.LastIndexOf('.'));
switch (extension.ToUpper())
{
case ".CSS":
sourceCSS.Append(client.DownloadString(new Uri(_basicUri, link)));
break;
case ".GIF":
case ".PNG":
case ".JPG":
case ".JPEG":
list.Add(new Uri(_basicUri, link));
break;
default:
break;
}
}
}
list.AddRange(FetchBGImageUrlsWithCSS(sourceCSS.ToString()));
return list;
}
4)效果图