zoukankan
html css js c++ java
获取新浪读书频道,书的列表程序
using
System;
using
System.Net;
using
System.IO;
using
System.Text;
using
System.Collections;
using
System.Text.RegularExpressions;
namespace
ConsoleApplication1
{
/**/
///
<summary>
///
Class1 的摘要说明。
///
</summary>
class
Class1
{
/**/
///
<summary>
///
应用程序的主入口点。
///
</summary>
[STAThread]
static
void
Main(
string
[] args)
{
string
url
=
"
http://book.sina.com.cn/nzt/lit/dixiashi/index.shtml
"
;
string
content
=
Gethtml(url);
string
[,] arr
=
new
string
[
85
,
85
];
arr[
0
,
0
]
=
""
;
for
(
int
i
=
1
;i
<=
84
;i
++
)
{
url
=
"
<a href=/nzt/lit/dixiashi/
"
+
i
+
"
.shtml target=_blank class=a03>
"
;
string
temp_1
=
GetChinese(
"
[\u4e00-\u9fa5]
"
,MID(content,url,
80
));
//
链接文字
string
temp_2
=
"
Books_zg_
"
+
i
+
"
.html
"
;
arr[i,
0
]
=
temp_1;
arr[
0
,i]
=
temp_2;
}
content
=
Getlist(
4
,
700
,arr);
Writefile(
@"
C:\Documents and Settings\Administrator\桌面\index.html
"
,content);
}
private
static
string
MID(
string
Content,
string
StartString,
int
length)
{
string
Intercept
=
Content;
int
a
=
Intercept.IndexOf(StartString);
string
aa
=
Intercept.Substring(a,length);
return
aa;
}
/**/
///
<summary>
///
获取网页html代码
///
</summary>
///
<param name="url"></param>
///
<returns></returns>
private
static
string
Gethtml(
string
url)
{
WebClient wc
=
new
WebClient();
Stream str
=
wc.OpenRead(url);
StreamReader sr
=
new
StreamReader(str,System.Text.Encoding.GetEncoding(
"
GB2312
"
));
return
sr.ReadToEnd();
}
//
获取指定网页的HTML代码
static
string
GetPageSource(
string
URL)
{
Uri uri
=
new
Uri(URL);
HttpWebRequest hwReq
=
(HttpWebRequest)WebRequest.Create(uri);
HttpWebResponse hwRes
=
(HttpWebResponse)hwReq.GetResponse();
hwReq.Method
=
"
Get
"
;
hwReq.KeepAlive
=
false
;
StreamReader reader
=
new
StreamReader(hwRes.GetResponseStream(),System.Text.Encoding.GetEncoding(
"
GB2312
"
));
return
reader.ReadToEnd();
}
/**/
///
<summary>
///
获取列表
///
</summary>
///
<param name="col">
列数
</param>
///
<param name="ww">
table的宽度
</param>
///
<param name="arr">
数组
</param>
///
<returns></returns>
static
string
Getlist(
int
col,
int
ww,
string
[,] arr)
{
int
temp_1
=
arr.GetLength(
0
);
int
row
=
(
int
)System.Math.Ceiling(temp_1
/
Convert.ToDouble(col));
//
行数
int
temp_2
=
0
;
int
temp_3
=
(
int
)System.Math.Floor(ww
/
Convert.ToDouble(col));
//
得到每列的宽度
StringBuilder sb
=
new
StringBuilder();
sb.Append(
"
<html><head><meta http-equiv=\
"
Content
-
Type\
"
content=\
"
text
/
html; charset
=
gb2312\
"
><title>书的列表</title><style type=\
"
text
/
css\
"
><!--BODY {SCROLLBAR-FACE-COLOR: #c5c5c5; MARGIN: 0px; FONT: 12px 宋体; SCROLLBAR-HIGHLIGHT-COLOR: #c5c5c5; SCROLLBAR-SHADOW-COLOR: #c5c5c5; SCROLLBAR-3DLIGHT-COLOR: #c5c5c5; SCROLLBAR-ARROW-COLOR: #ffffff; SCROLLBAR-TRACK-COLOR: #fffffd; SCROLLBAR-DARKSHADOW-COLOR: #c5c5c5;font-size:13px;}A.a03:link {COLOR: #1E1E9C; TEXT-DECORATION: underline}A.a03:visited { COLOR: #6d6e71; TEXT-DECORATION: none}A.a03:active {COLOR: #ff0000; TEXT-DECORATION: none}A.a03:hover {COLOR: #ff0000; TEXT-DECORATION: none}td{font-size:13px;}--></style></head><body><table width=\
""
+ww+
"
\
"
border=\
"
0
\
"
cellspacing=\
"
0
\
"
cellpadding=\
"
0
\
"
>
"
);
for
(
int
i
=
1
;i
<=
row;i
++
)
{
sb.Append(
"
<tr>
"
);
for
(
int
j
=
0
;j
<
col;j
++
)
{
temp_2
++
;
sb.Append(
"
<td width=\
""
+temp_3+
"
\
"
>
"
);
try
{
sb.Append(
"
<a href=\
""
+arr[0,temp_2]+
"
\
"
target=\
"
_blank\
"
class=\
"
a03\
"
>
"
+
arr[temp_2,
0
]
+
"
</a>
"
);
}
catch
{
}
sb.Append(
"
</td>
"
);
}
sb.Append(
"
</tr>
"
);
sb.Append(
"
<tr><td colspan=\
""
+col+
"
\
"
height=\
"
7
\
"
></td></tr>
"
);
}
sb.Append(
"
</table></body></html>
"
);
return
sb.ToString();
}
/**/
///
<summary>
///
写文件
///
</summary>
///
<param name="path"></param>
///
<param name="text"></param>
private
static
void
Writefile(
string
path,
string
text)
{
using
(StreamWriter sw
=
new
StreamWriter(path,
false
,System.Text.Encoding.GetEncoding(
"
gb2312
"
)))
//
中文,QuickCHM支持
{
sw.Write(text);
}
}
//
提取HTML代码中的网址
static
ArrayList GetHyperLinks(
string
htmlCode)
{
ArrayList al
=
new
ArrayList();
string
strRegex
=
@"
http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?
"
;
Regex r
=
new
Regex(strRegex,RegexOptions.IgnoreCase);
MatchCollection m
=
r.Matches(htmlCode);
for
(
int
i
=
0
; i
<=
m.Count
-
1
; i
++
)
{
bool
rep
=
false
;
string
strNew
=
m[i].ToString();
//
过滤重复的URL
foreach
(
string
str
in
al)
{
if
(strNew
==
str)
{
rep
=
true
;
break
;
}
}
if
(
!
rep) al.Add(strNew);
}
al.Sort();
return
al;
}
static
string
GetChinese(
string
reg,
string
str)
{
int
temp
=
str.IndexOf(
"
(
"
);
string
temp_1
=
str.Substring(temp
+
1
,
1
);
Regex r
=
new
Regex(reg,RegexOptions.IgnoreCase);
MatchCollection m
=
r.Matches(str);
string
strNew
=
""
;
for
(
int
i
=
0
; i
<=
m.Count
-
1
; i
++
)
{
strNew
+=
m[i].ToString();
}
return
strNew
+
"
(
"
+
temp_1
+
"
)
"
;
}
}
}
查看全文
相关阅读:
PowerDesigner与Eclipse同步开发
postdrop: create file maildrop/xxx: Permission denied
mysql导出数据mysqldump用法
mysql 存储过程中 limit之后使用分页变量,传入分页参数. 类似于微博游标分批次获取信息;问号参数类似于c#的string.format;问号占位符
mysql 获取本周一的日期,本周日的日期
mysql 游标 ,嵌套游标
asp.net调用mysql 存储过程 带 out 返回值,返回刚插入数据库中的自增的ID,LAST_INSERT_ID() 的使用
跨服务器,跨数据库,多表联合查询 / 如何用sql语句来查询表中哪些记录是重复的
asp.net 使用mysql数据库,OUT parameter返回值为null的bug
mysql 类型转换 cast 将 float 转换为 decimal
原文地址:https://www.cnblogs.com/wang123/p/602777.html
最新文章
.NET 中的正则表达式
模式 结构型
模式创建型
工厂模式
测试是否和数据库连接成功! vbCode
从 C10K 到 C500K
Facebook 架构学习
负载均衡策略深入剖析
Memcached内存管理机制浅析
程序分text, data (initialized), bss, stack, heap几个段
热门文章
Memcached的线程模型及状态机
Nginx负载均衡和LVS负载均衡的比较分析
解决TCP网络传输“粘包”问题
浅议DAS、NAS、SAN区别(转载)
The C10K problem翻译
fixed 定位兼容ie6
php标记风格
shell程序设计的语法lt;条件测试>
用 VBA 实现在 PPT 最下边加个进度条
nginx虚拟目录
Copyright © 2011-2022 走看看