修改版,由于取的网站多对应加了修正.我就没发新贴了.
1,同一个用户(或浏览器)的不同请求共享同一个变量储存空间,就称其为Session(会话)
Session内的变量保存在服务器中。
用SessionID以区别不同的Session(会话),因为不是一个用户在请求。
2, 众所周知HTTP是无连接的,所以服务端和客户端交流时协商好使用些什么标志来保持Session(会话)状态,于是出现了Cookie(可能你更熟悉 Cookie的其他用途)。这便是HTTP协议的状态保持机制,是标准,所有的浏览器都应该支持。为什么不用IP标识?呵呵,因为有人用代理啊,早期的代 理服务器是很普遍的连到Internet的方式。
So, 在Cookie中放置一个类似于SessionID的Value,同一个客户端在与服务端交互式,该ID便被来回传递,服务端便可依此建立若干变量,就是 Session变量。不同的客户端因为SessionID不同,所以访问服务器时得到的Session变量就不同。
验证原理服务器的Session会在客户端产生临时COOKIESS.
APS.NET网站就会生成ASP.NET_SessionId=hjguqsn34ai3h3aattozr2fy
PHP,JSP网站生成的JSESSIONID=4D9D2F0B65C77385C43CE4EE22D0536E
就是看什么服务器而以
取了登陆成功后的COOKIESS,只要电脑不清空COOKIESS,那就可以做成免登陆了.
换句话A电脑的COOKIESS,不能在B电脑上用.
工具WPE之类的网络探嗅器.
先记录一次成功登陆时所发送的信息.
找信息中的POST,和GET
这网上打资料都没有完整的资料特别是验证码登陆。
其实我这都是从网上收集整理出来的的。做的不是太好。
使用的两个WebClient,HttpWebRequest。NET的类分别做出一个静态类和动态类。
静态类GetHtml 支持一般的网页取数据和POET提交,但不能支持验证码,自动识别网页编码也可以手动输入网页编码。不过最好是手动输入那样会让程序少做运行代码。
System.Collections.Specialized.NameValueCollection PostVars =new System.Collections.Specialized.NameValueCollection()
PostVars.Add("uid","name");
PostVars.Add("pwd","123456");
string tmphtml= GetStrHtmlPost(url,PostVars);
动态类PostWeb 支持验证码、验证用户、登陆过会产生COOKIES字符串,第二次运行程序时可通过COOKIES而不用再次登陆。
PostWeb web=new PostWeb();
web.GetCode(验证码地址);
string tmplogin=web.LoginPost("http://www.mystand.com.cn/");
if(tmplogin.Contains(条件))
{
string cookie= web.cookieHeader;//保存到文件中下次直接付到类就可免登陆
web.GetPage("http://www.mystand.com.cn/", "http://www.mystand.com.cn/");
}
PostWeb web=new PostWeb();
web.cookieHeader=cookie;//把保存文件中的cookie付到类中
web.GetPage("http://www.mystand.com.cn/", "http://www.mystand.com.cn/");
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;
using System.IO.Compression;
namespace Manager.Net.Html
{
#region HTML相关类
/// <summary>
/// HTML相关
/// </summary>
public class CHtml
{
#region 过滤 Sql 语句字符串中的注入脚本
/// <summary>
/// 过滤 Sql 语句字符串中的注入脚本
/// </summary>
/// <param name="source">传入的字符串</param>
/// <returns></returns>
public static string FilterSql(string source)
{
//单引号替换成两个单引号
source = source.Replace("'", "''");
source = source.Replace("\"", "“");
source = source.Replace("|", "|");
//半角封号替换为全角封号,防止多语句执行
source = source.Replace(";", ";");
//半角括号替换为全角括号
source = source.Replace("(", "(");
source = source.Replace(")", ")");
/**/
///////////////要用正则表达式替换,防止字母大小写得情况////////////////////
//去除执行存储过程的命令关键字
source = source.Replace("Exec", "");
source = source.Replace("Execute", "");
//去除系统存储过程或扩展存储过程关键字
source = source.Replace("xp_", "x p_");
source = source.Replace("sp_", "s p_");
//防止16进制注入
source = source.Replace("0x", "0 x");
return source;
}
#endregion
#region 输出HTML
/// <summary>
/// 输出HTML
/// </summary>
/// <param name="Stream">流</param>
/// <param name="Encod">编码</param>
/// <returns></returns>
public static string HtmlStr(System.IO.Stream Stream, Encoding Encod)
{
System.IO.StreamReader sr;
if (Encod != null)
{
sr = new System.IO.StreamReader(Stream, Encod);
return sr.ReadToEnd();
}
else
{
sr = new System.IO.StreamReader(Stream, Encoding.Default);
return sr.ReadToEnd();
}
}
#endregion
#region 检验用户提交的URL参数字符里面是否有非法字符,如果有则返回True.防止SQL注入.
/// <summary>
/// 检验用户提交的URL参数字符里面是否有非法字符,如果有则返回True.防止SQL注入.
/// </summary>
/// <param name="str">(string)</param>
/// <returns>bool</returns>
public static bool VerifyString(string str)
{
string strTmp = str.ToUpper();
if (strTmp.IndexOf("SELECT ") >= 0 || strTmp.IndexOf(" AND ") >= 0 || strTmp.IndexOf(" OR ") >= 0 ||
strTmp.IndexOf("EXEC ") >= 0 || strTmp.IndexOf("CHAR(") >= 0)
{
return true;
}
strTmp.Replace("'", "'").Replace(";", ";");
return false;
}
#endregion
#region 匹配页面的图片地址
/// <summary>
/// 匹配页面的图片地址
/// </summary>
/// <param name="HtmlCode"></param>
/// <param name="imgHttp">要补充的http://路径信息</param>
/// <returns></returns>
public static string GetImgSrc(string HtmlCode, string imgHttp)
{
string MatchVale = "";
string Reg = @"<img.+?>";
foreach (Match m in Regex.Matches(HtmlCode.ToLower(), Reg))
{
MatchVale += GetImg((m.Value).ToLower().Trim(), imgHttp) + "|";
}
return MatchVale;
}
#endregion
#region 匹配<img src="" />中的图片路径实际链接
/// <summary>
/// 匹配<img src="" />中的图片路径实际链接
/// </summary>
/// <param name="ImgString"><img src="" />字符串</param>
/// <returns></returns>
public static string GetImg(string ImgString, string imgHttp)
{
string MatchVale = "";
string Reg = @"src=.+\.(bmp|jpg|gif|png|)";
foreach (Match m in Regex.Matches(ImgString.ToLower(), Reg))
{
MatchVale += (m.Value).ToLower().Trim().Replace("src=", "");
}
if (MatchVale.IndexOf(".net") != -1 || MatchVale.IndexOf(".com") != -1 || MatchVale.IndexOf(".org") != -1 || MatchVale.IndexOf(".cn") != -1 || MatchVale.IndexOf(".cc") != -1 || MatchVale.IndexOf(".info") != -1 || MatchVale.IndexOf(".biz") != -1 || MatchVale.IndexOf(".tv") != -1)
return (MatchVale);
else
return (imgHttp + MatchVale);
}
#endregion
#region 获取页面的链接正则
/// <summary>
/// 获取页面的链接正则
/// </summary>
/// <param name="HtmlCode"></param>
/// <returns></returns>
public static string GetHref(string HtmlCode)
{
string MatchVale = "";
string Reg = @"(h|H)(r|R)(e|E)(f|F) *= *('|"")?((\w|\\|\/|\.|:|-|_)+)[\S]*";
foreach (Match m in Regex.Matches(HtmlCode, Reg))
{
MatchVale += (m.Value).ToLower().Replace("href=", "").Trim() + "|";
}
return MatchVale;
}
#endregion
#region 去HTML标记
/// <summary>
/// 去HTML标记
/// </summary>
/// <param name="strhtml">HTML页面</param>
/// <returns></returns>
public static string RemoveHTML(string strhtml)
{
string stroutput = strhtml;
Regex regex = new Regex(@"<[^>]+>|</[^>]+>");
stroutput = regex.Replace(stroutput, "");
return stroutput.Trim();
}
#endregion
/// <summary>
/// 去中文
/// </summary>
/// <param name="strhtml">HTML页面</param>
/// <returns></returns>
public static string RemoveChinese(string strhtml)
{
string stroutput = strhtml;
Regex regex = new Regex(@"[\u4e00-\u9fa5]");
stroutput = regex.Replace(stroutput, "");
return stroutput.Trim();
}
#region 去UBB标记
/// <summary>
/// 去UBB标记
/// </summary>
/// <param name="strhtml">HTML页面</param>
/// <returns></returns>
public static string RemoveUBB(string strhtml)
{
string stroutput = strhtml;
Regex regex = new Regex(@"\[[^[\[\]]*]");
stroutput = regex.Replace(stroutput, "");
return stroutput.Trim();
}
#endregion
#region 去UBB和HTML标记
/// <summary>
/// 去UBB和HTML标记
/// </summary>
/// <param name="strhtml">HTML页面</param>
/// <returns></returns>
public static string RemoveUBBHTML(string strhtml)
{
return RemoveUBB(RemoveHTML(strhtml));
}
#endregion
#region 取网页编码
/// <summary>
/// 取网页编码
/// </summary>
/// <param name="strHtml">HTML页面</param>
/// <returns>返回编码</returns>
public static Encoding GetEncoding(string strHtml)
{
string pattern = @"(?i)\bcharset=(?<charset>[-a-zA-Z_0-9]+)";
string charset = Regex.Match(strHtml, pattern).Groups["charset"].Value;
if (charset.Length <= 0)
{
if (strHtml.Contains("charset=\""))
charset = Manager.Text.StringEx.GetStringMiddle(strHtml, "charset=\"", "\"");
}
if (charset.Length <= 0)
{
if (strHtml.Contains("charset="))
charset = Manager.Text.StringEx.GetStringMiddle(strHtml, "charset=", "\"");
}
if (charset.Length <= 0)
{
charset = Encoding.UTF8.BodyName;
}
try
{
return Encoding.GetEncoding(charset);
}
catch (Exception)
{
return Encoding.Default;
}
}
#endregion
#region 取IE版本
/// <summary>
/// 取IE版本
/// </summary>
/// <returns></returns>
public static string GetIEVersion()
{
using (Microsoft.Win32.RegistryKey versionKey = Microsoft.Win32.Registry.LocalMachine.OpenSubKey(@"Software\Microsoft\Internet Explorer"))
{
String version = versionKey.GetValue("Version").ToString();
return version;
}
}
#endregion
}
#endregion
#region 模拟网页提交数据类
/// <summary>
/// 模拟网页提交数据
/// </summary>
public class PostWeb
{
属性
相关设置
#region 验证码
/// <summary>
/// 验证码,并保存文件
/// </summary>
/// <param name="strURL">验证码地址</param>
/// <param name="dir">目录</param>
/// <param name="filename">文件</param>
/// <param name="imageFormat">格式</param>
public void GetCode(string strURL, string dir, string filename, System.Drawing.Imaging.ImageFormat imageFormat)
{
string path = "";
if (!String.IsNullOrEmpty(dir))
{
System.IO.Directory.CreateDirectory(dir);
path = dir + "\\";
}
string FileNamePath = path + filename;
System.Drawing.Image code = GetCode(strURL);
code.Save(FileNamePath, imageFormat);
code.Dispose();
}
/// <summary>
/// 验证码,返回Bitmap
/// </summary>
/// <param name="strURL">验证码地址</param>
/// <returns></returns>
public System.Drawing.Image GetCode(string strURL)
{
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(strURL);
ProxySetting(myHttpWebRequest);
NetworkCredentialSetting(myHttpWebRequest);
myHttpWebRequest.Method = "GET";
myHttpWebRequest.KeepAlive = true;
CookieCollection myCookies = null;
CookieContainer myCookieContainer = new CookieContainer();
myHttpWebRequest.CookieContainer = myCookieContainer;
try
{
using (HttpWebResponse response = (HttpWebResponse)myHttpWebRequest.GetResponse())
{
cookieHeader = myHttpWebRequest.CookieContainer.GetCookieHeader(new Uri(strURL));
myCookies = response.Cookies;
System.Drawing.Image code = System.Drawing.Image.FromStream(response.GetResponseStream(), false, false);
return code;
}
}
catch
{
return null;
}
}
#endregion
public string GetCookies(string strURL)
{
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(strURL);
myHttpWebRequest.Method = "Get";
CookieCollection myCookies = null;
CookieContainer myCookieContainer = new CookieContainer();
myHttpWebRequest.CookieContainer = new CookieContainer();
try
{
using (HttpWebResponse response = (HttpWebResponse)myHttpWebRequest.GetResponse())
{
cookieHeader = myHttpWebRequest.CookieContainer.GetCookieHeader(myHttpWebRequest.RequestUri);
myCookies = myHttpWebRequest.CookieContainer.GetCookies(myHttpWebRequest.RequestUri);
SetEncod(response.CharacterSet);
return System.Web.HttpUtility.HtmlDecode(System.Web.HttpUtility.UrlDecode(CHtml.HtmlStr(response.GetResponseStream(), tmpEncod))).Replace("\r", "").Replace("\n", "").Replace("\t", "").Replace(" ", "");
}
}
catch
{
cookieHeader = "";
return "";
}
}
#region 登陆
/// <summary>
/// 功能描述:模拟登录页面,提交登录数据进行登录,并记录Header中的cookie
/// </summary>
/// <param name="strURL">登录数据提交的页面地址</param>
/// <param name="strArgs">用户登录数据</param>
/// <param name="strReferer">引用地址</param>
/// <returns>可以返回页面内容或不返回</returns>
public string LoginGet(string strURL, string strReferer)
{
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(strURL);
ProxySetting(myHttpWebRequest);
NetworkCredentialSetting(myHttpWebRequest);
WebConfig(myHttpWebRequest);
myHttpWebRequest.Referer = strReferer;
myHttpWebRequest.Method = "Get";
CookieCollection myCookies = null;
CookieContainer myCookieContainer = new CookieContainer();
myHttpWebRequest.CookieContainer = new CookieContainer();
if (cookieHeader.Length > 0)
{
myCookieContainer.SetCookies(new Uri(strURL), cookieHeader);
myHttpWebRequest.CookieContainer = myCookieContainer;
}
try
{
using (HttpWebResponse response = (HttpWebResponse)myHttpWebRequest.GetResponse())
{
cookieHeader = myHttpWebRequest.CookieContainer.GetCookieHeader(myHttpWebRequest.RequestUri);
myCookies = myHttpWebRequest.CookieContainer.GetCookies(myHttpWebRequest.RequestUri);
SetEncod(response.CharacterSet);
return System.Web.HttpUtility.HtmlDecode(System.Web.HttpUtility.UrlDecode(CHtml.HtmlStr(response.GetResponseStream(), tmpEncod))).Replace("\r", "").Replace("\n", "").Replace("\t", "").Replace(" ", "");
}
}
catch
{
cookieHeader = "";
return "";
}
}
/// <summary>
/// 功能描述:模拟登录页面,提交登录数据进行登录,并记录Header中的cookie
/// LoginPost("http://www.mystand.com.cn/login/submit.jsp","userid=hgj0000&password=06045369","http://www.mystand.com.cn/");
/// </summary>
/// <param name="strURL">登录数据提交的页面地址</param>
/// <param name="strArgs">用户登录数据</param>
/// <param name="strReferer">引用地址</param>
/// <returns>可以返回页面内容或不返回</returns>
public string LoginPost(string strURL, string strArgs, string strReferer)
{
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(strURL);
ProxySetting(myHttpWebRequest);
NetworkCredentialSetting(myHttpWebRequest);
WebConfig(myHttpWebRequest);
myHttpWebRequest.Referer = strReferer;
myHttpWebRequest.Method = "Post";
myHttpWebRequest.ContentType = "application/x-www-form-urlencoded";
byte[] bs = Encoding.ASCII.GetBytes(strArgs);
myHttpWebRequest.ContentLength = bs.Length;
try
{
CookieCollection myCookies = null;
CookieContainer myCookieContainer = new CookieContainer();
myHttpWebRequest.CookieContainer = new CookieContainer();
if (cookieHeader.Length > 0)
{
myCookieContainer.SetCookies(new Uri(strURL), cookieHeader);
myHttpWebRequest.CookieContainer = myCookieContainer;
}
using (Stream reqStream = myHttpWebRequest.GetRequestStream())
{
reqStream.Write(bs, 0, bs.Length);
}
using (HttpWebResponse response = (HttpWebResponse)myHttpWebRequest.GetResponse())
{
cookieHeader = myHttpWebRequest.CookieContainer.GetCookieHeader(myHttpWebRequest.RequestUri);
myCookies = myHttpWebRequest.CookieContainer.GetCookies(myHttpWebRequest.RequestUri);
SetEncod(response.CharacterSet);
return System.Web.HttpUtility.HtmlDecode(System.Web.HttpUtility.UrlDecode(CHtml.HtmlStr(response.GetResponseStream(), tmpEncod))).Replace("\r", "").Replace("\n", "").Replace("\t", "").Replace(" ", "");
}
}
catch(Exception ex){ cookieHeader = ""; return ""; }
}
#endregion
#region 取数据
/// <summary>
/// 功能描述:在PostLogin成功登录后记录下Headers中的cookie,然后获取此网站上其他页面的内容
/// </summary>
/// <param name="strURL">获取网站的某页面的地址</param>
/// <param name="strReferer">引用的地址</param>
/// <returns>返回页面内容</returns>
public string GetPage(string strURL, string strReferer)
{
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(strURL);
ProxySetting(myHttpWebRequest);
NetworkCredentialSetting(myHttpWebRequest);
WebConfig(myHttpWebRequest);
myHttpWebRequest.Method = "Get";
if (!string.IsNullOrEmpty(strReferer))
myHttpWebRequest.Referer = strReferer;
myHttpWebRequest.Headers.Add("cookie:" + cookieHeader);
CookieContainer myCookieContainer = new CookieContainer();
try
{
using (HttpWebResponse response = (HttpWebResponse)myHttpWebRequest.GetResponse())
{
SetEncod(response.CharacterSet);
return System.Web.HttpUtility.HtmlDecode(System.Web.HttpUtility.UrlDecode(CHtml.HtmlStr(response.GetResponseStream(), tmpEncod))).Replace("\r", "").Replace("\n", "").Replace("\t", "").Replace(" ", "");
}
}
catch
{
return "";
}
}
/// <summary>
/// 功能描述:在PostLogin成功登录后记录下Headers中的cookie,然后获取此网站上其他页面的内容
/// </summary>
/// <param name="strURL">获取网站的某页面的地址</param>
/// <param name="strReferer">引用的地址</param>
/// <returns>返回页面内容</returns>
public string PostPage(string strURL, string strArgs, string strReferer)
{
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(strURL);
ProxySetting(myHttpWebRequest);
NetworkCredentialSetting(myHttpWebRequest);
WebConfig(myHttpWebRequest);
myHttpWebRequest.Referer = strReferer;
myHttpWebRequest.Method = "Post";
myHttpWebRequest.ContentType = "application/x-www-form-urlencoded";
byte[] bs = Encoding.ASCII.GetBytes(strArgs);
myHttpWebRequest.ContentLength = bs.Length;
if (cookieHeader.Length > 0)
myHttpWebRequest.Headers.Add("cookie:" + cookieHeader);
CookieContainer myCookieContainer = new CookieContainer();
try
{
using (Stream reqStream = myHttpWebRequest.GetRequestStream())
{
reqStream.Write(bs, 0, bs.Length);
}
using (HttpWebResponse response = (HttpWebResponse)myHttpWebRequest.GetResponse())
{
SetEncod(response.CharacterSet);
return System.Web.HttpUtility.HtmlDecode(System.Web.HttpUtility.UrlDecode(CHtml.HtmlStr(response.GetResponseStream(), tmpEncod))).Replace("\r", "").Replace("\n", "").Replace("\t", "").Replace(" ", "");
}
}
catch
{
return "";
}
}
#endregion
}
#endregion
#region 表态取网页数据
/// <summary>
/// 表态取网页数据
/// </summary>
public class GetHtml
{
/// <summary>
/// 模拟提交表单POST
/// System.Collections.Specialized.NameValueCollection PostVars =new System.Collections.Specialized.NameValueCollection()
/// PostVars.Add("uid","name");
/// PostVars.Add("pwd","123456");
/// GetStrHtmlPost(url,PostVars)
/// </summary>
/// <param name="url">地址</param>
/// <param name="PostVars">PostValue</param>
/// <returns></returns>
public static string GetStrHtmlPost(String url, System.Collections.Specialized.NameValueCollection PostVars)
{
if (PostVars == null)
return "";
System.Net.WebClient WebClientObj = new System.Net.WebClient();
string html;
try
{
byte[] buf = WebClientObj.UploadValues(url, "POST", PostVars);
//下面都没用啦,就上面一句话就可以了
html = System.Text.Encoding.Default.GetString(buf);
Encoding encoding = CHtml.GetEncoding(html);
if (encoding == Encoding.UTF8) return html;
return encoding.GetString(buf);
}
catch
{
}
return "";
}
/// <summary>
/// 获取网页的HTML内容
/// </summary>
/// <param name="url">url</param>
/// <returns></returns>
public static string GetStrHtml(string url)
{
return GetStrHtml(url, null);
}
/// <summary>
/// 获取网页的HTML内容
/// </summary>
/// <param name="url">URL</param>
/// <param name="encoding">Encoding</param>
/// <returns></returns>
public static string GetStrHtml(string url, Encoding encoding)
{
byte[] buf = new WebClient().DownloadData(url);
if (encoding != null) return encoding.GetString(buf);
string html = Encoding.UTF8.GetString(buf);
encoding = CHtml.GetEncoding(html);
if (encoding == Encoding.UTF8) return html;
return encoding.GetString(buf);
}
}
#endregion
}