zoukankan
html css js c++ java
C#采集数据类
using
System;
using
System.Data;
using
System.Configuration;
using
System.Web;
using
System.Web.Security;
using
System.Web.UI;
using
System.Web.UI.WebControls;
using
System.Web.UI.WebControls.WebParts;
using
System.Web.UI.HtmlControls;
using
System.Drawing;
using
MSXML2;
using
System.Text.RegularExpressions;
namespace
wenweifeng
{
/**/
///
<summary>
///
CaiJi 的摘要说明
///
</summary>
public
class
CaiJi
{
public
CaiJi()
{
//
//
TODO: 在此处添加构造函数逻辑
//
}
~
CaiJi()
{
Dispose();
}
public
void
Dispose()
{
GC.SuppressFinalize(
this
);
}
日期随机函数
#region
日期随机函数
/**/
/*
*********************************
* 函数名称:DateRndName
* 功能说明:日期随机函数
* 参 数:ra:随机数
* 调用示例:
* GetRemoteObj o = new GetRemoteObj();
* Random ra = new Random();
* string s = o.DateRndName(ra);
* Response.Write(s);
* o.Dispose();
* *******************************
*/
/**/
///
<summary>
///
日期随机函数
///
</summary>
///
<param name="ra">
随机数
</param>
///
<returns></returns>
public
string
DateRndName(Random ra)
{
DateTime d
=
DateTime.Now;
string
s
=
null
, y, m, dd, h, mm, ss;
y
=
d.Year.ToString();
m
=
d.Month.ToString();
if
(m.Length
<
2
) m
=
"
0
"
+
m;
dd
=
d.Day.ToString();
if
(dd.Length
<
2
) dd
=
"
0
"
+
dd;
h
=
d.Hour.ToString();
if
(h.Length
<
2
) h
=
"
0
"
+
h;
mm
=
d.Minute.ToString();
if
(mm.Length
<
2
) mm
=
"
0
"
+
mm;
ss
=
d.Second.ToString();
if
(ss.Length
<
2
) ss
=
"
0
"
+
ss;
s
+=
y
+
'
,
'
+
m
+
'
,
'
+
dd
+
'
,
'
+
h
+
"
-
"
+
mm
+
"
-
"
+
ss;
s
+=
ra.Next(
1000000
,
9999999
).ToString();
return
s;
}
#endregion
取得文件后缀
#region
取得文件后缀
/**/
/*
*********************************
* 函数名称:GetFileExtends
* 功能说明:取得文件后缀
* 参 数:filename:文件名称
* 调用示例:
* GetRemoteObj o = new GetRemoteObj();
* string url = @"
http://www.baidu.com/img/logo.gif
";
* string s = o.GetFileExtends(url);
* Response.Write(s);
* o.Dispose();
* *******************************
*/
/**/
///
<summary>
///
取得文件后缀
///
</summary>
///
<param name="filename">
文件名称
</param>
///
<returns></returns>
public
string
GetFileExtends(
string
filename)
{
string
ext
=
null
;
if
(filename.IndexOf(
'
.
'
)
>
0
)
{
string
[] fs
=
filename.Split(
'
.
'
);
ext
=
fs[fs.Length
-
1
];
}
return
ext;
}
#endregion
获取远程文件源代码
#region
获取远程文件源代码
/**/
/*
*********************************
* 函数名称:GetRemoteHtmlCode
* 功能说明:获取远程文件源代码
* 参 数:Url:远程url
* 调用示例:
* GetRemoteObj o = new GetRemoteObj();
* string url = @"
http://www.baidu.com
";
* string s = o.GetRemoteHtmlCode(url);
* Response.Write(s);
* o.Dispose();
* *******************************
*/
/**/
///
<summary>
///
获取远程文件源代码
///
</summary>
///
<param name="url">
远程url
</param>
///
<returns></returns>
public
string
GetRemoteHtmlCode(
string
Url)
{
string
s
=
""
;
MSXML2.XMLHTTP _xmlhttp
=
new
MSXML2.XMLHTTPClass();
_xmlhttp.open(
"
GET
"
, Url,
false
,
null
,
null
);
_xmlhttp.send(
""
);
if
(_xmlhttp.readyState
==
4
)
{
s
=
System.Text.Encoding.Default.GetString((
byte
[])_xmlhttp.responseBody);
}
return
s;
}
#endregion
保存远程文件
#region
保存远程文件
/**/
/*
*********************************
* 函数名称:RemoteSave
* 功能说明:保存远程文件
* 参 数:Url:远程url;Path:保存到的路径
* 调用示例:
* GetRemoteObj o = new GetRemoteObj();
* string s = "";
* string url = @"
http://www.baidu.com/img/logo.gif
";
* string path =Server.MapPath("Html/");
* s = o.RemoteSave(url,path);
* Response.Write(s);
* o.Dispose();
* *****************************
*/
/**/
///
<summary>
///
保存远程文件
///
</summary>
///
<param name="Url">
远程url
</param>
///
<param name="Path">
保存到的路径
</param>
///
<returns></returns>
public
string
RemoteSave(
string
Url,
string
Path)
{
Random ra
=
new
Random();
string
newfilename
=
string
.Empty;
string
StringFileName
=
DateRndName(ra)
+
"
.
"
+
GetFileExtends(Url);
if
(GetFileExtends(Url).Trim().ToLower()
!=
"
jpg
"
)
newfilename
=
StringFileName;
else
newfilename
=
DateRndName(ra)
+
"
Addpic.
"
+
GetFileExtends(Url);
string
StringFilePath
=
Path
+
StringFileName;
string
newfilepath
=
Path
+
newfilename;
string
retname
=
string
.Empty;
try
{
MSXML2.XMLHTTP _xmlhttp
=
new
MSXML2.XMLHTTPClass();
_xmlhttp.open(
"
GET
"
, Url,
false
,
null
,
null
);
_xmlhttp.send(
""
);
if
(_xmlhttp.readyState
==
4
)
{
if
(System.IO.File.Exists(StringFilePath))
System.IO.File.Delete(StringFilePath);
System.IO.FileStream fs
=
new
System.IO.FileStream(StringFilePath, System.IO.FileMode.CreateNew);
System.IO.BinaryWriter w
=
new
System.IO.BinaryWriter(fs);
w.Write((
byte
[])_xmlhttp.responseBody);
w.Close();
fs.Close();
if
(GetFileExtends(Url).Trim().ToLower()
==
"
jpg
"
)
{
function myfunction
=
new
function();
//
myfunction.AddShuiYinWord(StringFilePath, newfilepath);
service myserver
=
new
service();
myfunction.AddShuiYinPic(StringFilePath, newfilepath, HttpContext.Current.Server.MapPath(myserver.myweblogo));
}
}
else
throw
new
Exception(_xmlhttp.statusText);
}
catch
(Exception ex)
{
}
return
newfilename;
}
#endregion
替换网页中的换行和引号
#region
替换网页中的换行和引号
/**/
/*
*********************************
* 函数名称:ReplaceEnter
* 功能说明:替换网页中的换行和引号
* 参 数:HtmlCode:html源代码
* 调用示例:
* GetRemoteObj o = new GetRemoteObj();
* string Url = @"
http://www.baidu.com
";
* string HtmlCode = o.GetRemoteHtmlCode(Url);
* string s = o.ReplaceEnter(HtmlCode);
* Response.Write(s);
* o.Dispose();
* *******************************
*/
/**/
///
<summary>
///
替换网页中的换行和引号
///
</summary>
///
<param name="HtmlCode">
HTML源代码
</param>
///
<returns></returns>
public
string
ReplaceEnter(
string
HtmlCode)
{
string
s
=
""
;
if
(HtmlCode
==
null
||
HtmlCode
==
""
)
s
=
""
;
else
s
=
HtmlCode.Replace(
"
\
""
,
""
);
s
=
s.Replace(
"
\r
"
,
""
);
s
=
s.Replace(
"
\n
"
,
""
);
return
s;
}
#endregion
执行正则提取出值
#region
执行正则提取出值
/**/
/*
*********************************
* 函数名称:GetRegValue
* 功能说明:执行正则提取出值
* 参 数:HtmlCode:html源代码
* 调用示例:
* GetRemoteObj o = new GetRemoteObj();
* string Url = @"
http://www.baidu.com
";
* string HtmlCode = o.GetRemoteHtmlCode(Url);
* string s = o.ReplaceEnter(HtmlCode);
* string Reg="<title>.+?</title>";
* string GetValue=o.GetRegValue(Reg,HtmlCode)
* Response.Write(GetValue);
* o.Dispose();
* *******************************
*/
/**/
///
<summary>
///
执行正则提取出值
///
</summary>
///
<param name="RegexString">
正则表达式
</param>
///
<param name="RemoteStr">
HtmlCode源代码
</param>
///
<returns></returns>
public
string
GetRegValue(
string
RegexString,
string
RemoteStr)
{
string
MatchVale
=
""
;
Regex r
=
new
Regex(RegexString);
Match m
=
r.Match(RemoteStr);
if
(m.Success)
{
MatchVale
=
m.Value;
}
return
MatchVale;
}
#endregion
替换HTML源代码
#region
替换HTML源代码
/**/
/*
*********************************
* 函数名称:RemoveHTML
* 功能说明:替换HTML源代码
* 参 数:HtmlCode:html源代码
* 调用示例:
* GetRemoteObj o = new GetRemoteObj();
* string Url = @"
http://www.baidu.com
";
* string HtmlCode = o.GetRemoteHtmlCode(Url);
* string s = o.ReplaceEnter(HtmlCode);
* string Reg="<title>.+?</title>";
* string GetValue=o.GetRegValue(Reg,HtmlCode)
* Response.Write(GetValue);
* o.Dispose();
* *******************************
*/
/**/
///
<summary>
///
替换HTML源代码
///
</summary>
///
<param name="HtmlCode">
html源代码
</param>
///
<returns></returns>
public
string
RemoveHTML(
string
HtmlCode)
{
string
MatchVale
=
HtmlCode;
foreach
(Match s
in
Regex.Matches(HtmlCode,
"
<.+?>
"
))
{
MatchVale
=
MatchVale.Replace(s.Value,
""
);
}
return
MatchVale;
}
#endregion
匹配页面的链接
#region
匹配页面的链接
/**/
/*
*********************************
* 函数名称:GetHref
* 功能说明:匹配页面的链接
* 参 数:HtmlCode:html源代码
* 调用示例:
* GetRemoteObj o = new GetRemoteObj();
* string Url = @"
http://www.baidu.com
";
* string HtmlCode = o.GetRemoteHtmlCode(Url);
* string s = o.GetHref(HtmlCode);
* Response.Write(s);
* o.Dispose();
* *******************************
*/
/**/
///
<summary>
///
获取页面的链接正则
///
</summary>
///
<param name="HtmlCode"></param>
///
<returns></returns>
public
string
GetHref(
string
HtmlCode)
{
string
MatchVale
=
""
;
string
Reg
=
@"
(h|H)(r|R)(e|E)(f|F) *= *('|"")?((\w|\\|\/|\.|:|-|_)+)[\S]*
"
;
foreach
(Match m
in
Regex.Matches(HtmlCode,Reg))
{
MatchVale
+=
(m.Value).ToLower().Replace(
"
href=
"
,
""
).Trim()
+
"
|
"
;
}
return
MatchVale;
}
#endregion
匹配页面的图片地址
#region
匹配页面的图片地址
/**/
/*
*********************************
* 函数名称:GetImgSrc
* 功能说明:匹配页面的图片地址
* 参 数:HtmlCode:html源代码;imgHttp:要补充的http.当比如:<img src="bb/x.gif">则要补充http://www.baidu.com/,当包含http信息时,则可以为空
* 调用示例:
* GetRemoteObj o = new GetRemoteObj();
* string Url = @"
http://www.baidu.com
";
* string HtmlCode = o.GetRemoteHtmlCode(Url);
* string s = o.GetImgSrc(HtmlCode,"
http://www.baidu.com/
");
* Response.Write(s);
* o.Dispose();
* *******************************
*/
/**/
///
<summary>
///
匹配页面的图片地址
///
</summary>
///
<param name="HtmlCode"></param>
///
<param name="imgHttp">
要补充的http://路径信息
</param>
///
<returns></returns>
public
string
GetImgSrc(
string
HtmlCode,
string
imgHttp)
{
string
MatchVale
=
""
;
string
Reg
=
@"
<img.+?>
"
;
foreach
(Match m
in
Regex.Matches(HtmlCode.ToLower(), Reg))
{
MatchVale
+=
GetImg((m.Value).ToLower().Trim(), imgHttp)
+
"
|
"
;
}
return
MatchVale;
}
/**/
///
<summary>
///
匹配
<img src="" />
中的图片路径实际链接
///
</summary>
///
<param name="ImgString"><img src="" />
字符串
</param>
///
<returns></returns>
public
string
GetImg(
string
ImgString,
string
imgHttp)
{
string
MatchVale
=
""
;
string
Reg
=
@"
src=.+\.(bmp|jpg|gif|png|)
"
;
foreach
(Match m
in
Regex.Matches(ImgString.ToLower(), Reg))
{
MatchVale
+=
(m.Value).ToLower().Trim().Replace(
"
src=
"
,
""
);
}
if
(MatchVale.IndexOf(
"
.net
"
)
!=
-
1
||
MatchVale.IndexOf(
"
.com
"
)
!=
-
1
||
MatchVale.IndexOf(
"
.org
"
)
!=
-
1
||
MatchVale.IndexOf(
"
.cn
"
)
!=
-
1
||
MatchVale.IndexOf(
"
.cc
"
)
!=
-
1
||
MatchVale.IndexOf(
"
.info
"
)
!=
-
1
||
MatchVale.IndexOf(
"
.biz
"
)
!=
-
1
||
MatchVale.IndexOf(
"
.tv
"
)
!=
-
1
)
return
(MatchVale);
else
return
(imgHttp
+
MatchVale);
}
#endregion
替换通过正则获取字符串所带的正则首尾匹配字符串
#region
替换通过正则获取字符串所带的正则首尾匹配字符串
/**/
/*
*********************************
* 函数名称:GetHref
* 功能说明:匹配页面的链接
* 参 数:HtmlCode:html源代码
* 调用示例:
* GetRemoteObj o = new GetRemoteObj();
* string Url = @"
http://www.baidu.com
";
* string HtmlCode = o.GetRemoteHtmlCode(Url);
* string s = o.RegReplace(HtmlCode,"<title>","</title>");
* Response.Write(s);
* o.Dispose();
* *******************************
*/
/**/
///
<summary>
///
替换通过正则获取字符串所带的正则首尾匹配字符串
///
</summary>
///
<param name="RegValue">
要替换的值
</param>
///
<param name="regStart">
正则匹配的首字符串
</param>
///
<param name="regEnd">
正则匹配的尾字符串
</param>
///
<returns></returns>
public
string
RegReplace(
string
RegValue,
string
regStart,
string
regEnd)
{
string
s
=
RegValue;
if
(RegValue
!=
""
&&
RegValue
!=
null
)
{
if
(regStart
!=
""
&&
regStart
!=
null
)
{
s
=
s.Replace(regStart,
""
);
}
if
(regEnd
!=
""
&&
regEnd
!=
null
)
{
s
=
s.Replace(regEnd,
""
);
}
}
return
s;
}
#endregion
}
}
查看全文
相关阅读:
eclipse export runnable jar(导出可执行jar包) runnable jar可以执行的
mave常用指令
771. Jewels and Stones珠宝数组和石头数组中的字母对应
624. Maximum Distance in Arrays二重数组中的最大差值距离
724. Find Pivot Index 找到中轴下标
605. Can Place Flowers零一间隔种花
581. Shortest Unsorted Continuous Subarray连续数组中的递增异常情况
747. Largest Number At Least Twice of Others比所有数字都大两倍的最大数
643. Maximum Average Subarray I 最大子数组的平均值
414. Third Maximum Number数组中第三大的数字
原文地址:https://www.cnblogs.com/duoe/p/996664.html
最新文章
设计模式-适配器模式
设计模式-原型模式
01day-webpack
vue中如何使用echarts,使用axios获取数据
面试
vue全局路由守卫beforeEach+token验证+node
vue+elementUI+node实现登录模块--验证用户名是否正确
vue+node+elementUI实现分页功能
解构如何运用的解构--报错 throw new TypeError('Router.use() requires a middleware function but got a ' + gettype(fn))
vue+node+elementUI实现注册功能
热门文章
bootstrap如何去除自带的样式----导航栏中的菜单实现平滑的过渡到对应的菜单区域-------动态跟换模态框中的内容
汽车系统实现--搜索功能
JS中把其他类型转换成字符串的三种方法
ES6 Map
springmvc controller层接收List类型的参数
@GetMapping、@PostMapping和@RequestMapping的区别
添加jar包到本地Maven仓库
编写自己的SpringBoot-starter
eclipse---之Console窗口命令行输入
eclipse maven 打war包的几种方式
Copyright © 2011-2022 走看看