这几天发现一个很好的图片网站,美女特多

随便在网上查了查,就下载了一个名叫WebSpider的蜘蛛程序。我仔细研究了一下,感觉作者也是写着玩来着,意思意思,网页下载下来
基本就丢了,另外结构上感觉不太满意,所以改改了。
我大致想的采用双线程,一个UI,一个工作线程
1
public bool Process( WebPageState state )
2
{
3
state.ProcessStarted = true;
4
state.ProcessSuccessfull = false;
5
6
if(level==1)
7
m_baseUri = state.Uri;
8
try
9
{
10
Console.WriteLine( "Process Uri: {0}", state.Uri.AbsoluteUri );
11
12
WebRequest req = WebRequest.Create( state.Uri );
13
WebResponse res = null;
14
15
try
16
{
17
res = req.GetResponse( );
18
19
if ( res is HttpWebResponse )
20
{
21
state.StatusCode = ((HttpWebResponse)res).StatusCode.ToString( );
22
state.StatusDescription = ((HttpWebResponse)res).StatusDescription;
23
}
24
if ( res is FileWebResponse )
25
{
26
state.StatusCode = "OK";
27
state.StatusDescription = "OK";
28
}
29
30
if ( state.StatusCode.Equals( "OK" ) )
31
{
32
StreamReader sr = new StreamReader( res.GetResponseStream( ) );
33
34
state.Content = sr.ReadToEnd( );
35
36
37
MatchCollection m = RegExUtil.GetMatchRegEx(RegularExpression.SrcExtractor, state.Content, true);
38
string Address;
39
int k=0;
40
for (k = 0; k < m.Count;k++)
41
{
42
43
44
Address = m[k].Groups[1].ToString();
45
Uri uri = new Uri(state.Uri, m[k].Groups["url"].ToString());
46
// statusBar.Text = "Address: " + Address;
47
if (!m_pages.Contains(uri.AbsoluteUri))
48
{
49
m_pages.Add(uri.AbsoluteUri);
50
DownloadImage(state.Uri, Address);
51
if (this.ContentHandler != null)
52
{
53
state.mes.MaxProgress = m.Count;
54
55
state.mes.Progress = k+1;
56
state.mes.Result = state.Uri.AbsoluteUri;
57
state.mes.Status = TaskStatus.Running;
58
state.mes.Message = "当前共有图片下载数"+m.Count+" 现在正在下载第"+state.mes.Progress.ToString()+"图片" + Address;
59
ContentHandler.Invoke(state);
60
}
61
}
62
63
64
}
65
66
int counter = 0;
67
Match mm= RegExUtil.GetMatchRegEx(RegularExpression.UrlExtractor, state.Content);
68
69
while (mm.Success)
70
{
71
Uri uri = new Uri(state.Uri, mm.Groups["url"].ToString());
72
if (ValidPage(uri) && !m_pages.Contains(uri.AbsoluteUri))
73
{
74
if (level > 10)
75
return true;
76
counter++;
77
level++;
78
WebPageState statec = new WebPageState(uri);
79
m_pages.Add(uri.AbsoluteUri);
80
Process(statec);
81
}
82
83
84
mm = mm.NextMatch();
85
}
86
87
}
88
89
state.ProcessSuccessfull = true;
90
}
91
catch( Exception ex )
92
{
93
HandleException( ex, state );
94
}
95
finally
96
{
97
if ( res != null )
98
{
99
res.Close( );
100
}
101
}
102
}
103
catch (Exception ex)
104
{
105
Console.WriteLine( ex.ToString( ) );
106
}
107
Console.WriteLine( "Successfull: {0}", state.ProcessSuccessfull );
108
109
return state.ProcessSuccessfull;
110
}
111
#endregion
112
113
114
private void DownloadImage(Uri m_bb,string imgUri)
115
{
116
Uri imageUri = null;
117
string ext = null;
118
string outFile = null;
119
120
121
try
122
{
123
imageUri = new Uri(m_bb, imgUri);
124
125
ext = StrUtil.RightLastIndexOf(imageUri.AbsoluteUri, ".").ToLower();
126
outFile = "temp\\img" + (m_fileId++) + "." + ext;
127
128
if ("jpg|jpeg|swf".IndexOf(ext) > -1)
129
{
130
WebClient web = new WebClient();
131
web.DownloadFile(imageUri.AbsoluteUri,outFile);
132
// byte[] image=web.DownloadData(imageUri);
133
134
if (ext == "swf")
135
{
136
//m_graphicViewerWriter.WriteLine("<object classid='clsid:D27CDB6E-AE6D-11cf-96B8-444553540000' codebase='http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,29,0' width='150' height='100'>");
137
//m_graphicViewerWriter.WriteLine("<param name='movie' value='" + outFile + "'>");
138
//m_graphicViewerWriter.WriteLine("<param name=quality value=high>");
139
//m_graphicViewerWriter.WriteLine("<embed src='" + outFile + "' quality=high pluginspage='http://www.macromedia.com/shockwave/download/index.cgi?P1_Prod_Version=ShockwaveFlash' type='application/x-shockwave-flash' width='150' height='100'></embed>");
140
//m_graphicViewerWriter.WriteLine("</object>");
141
}
142
else
143
{
144
// m_graphicViewerWriter.WriteLine( "<img src='file://" + outFile + "' /><br />");
145
//img" + ( m_fileId++ ) + "." + ext;
146
//m_graphicViewerWriter.WriteLine("<img src='img" + (m_fileId - 1) + "." + ext + "' /><br />");
147
}
148
}
149
}
150
catch (Exception)
151
{
152
// m_graphicViewerWriter.WriteLine("could not download img: " + imageUri.AbsoluteUri);
153
}
154
}
现在基本可以下载图片了,不过感觉要优化的地方较多! 递归的层级暂时没有控制,性能也是一般,代码的结构还是比较乱,后续再重构了!
2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154
