using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Net; using System.IO; namespace xsharp { class Program { static string sDir = "G:\notex\"; static WebClient MyWebClient = new WebClient(); static string sMain = ""; static int writeContent(string sHtml, int idx) { int iBgnIdx = sHtml.IndexOf("<meta name="keywords" content=""); if (iBgnIdx <= 0) return -1; int iEndIdx = sHtml.IndexOf(""", iBgnIdx + 31); string sTitle = sHtml.Substring(iBgnIdx + 31, iEndIdx - iBgnIdx - 31); Console.WriteLine(sTitle); iBgnIdx = sHtml.IndexOf("<div id="chaptercontent"); if (iBgnIdx <= 0) return -1; iEndIdx = sHtml.IndexOf("</div>", iBgnIdx + 1); if (iEndIdx <= 0) return -1; string sDivSub = sHtml.Substring(iBgnIdx, iEndIdx - iBgnIdx); iBgnIdx = sDivSub.IndexOf("</p>"); if (iBgnIdx <= 0) return -1; iEndIdx = sDivSub.IndexOf("<p style", iBgnIdx + 5); if (iEndIdx <= 0) return -1; string sContentSub = sDivSub.Substring(iBgnIdx + 5, iEndIdx - iBgnIdx - 5); sContentSub = sContentSub.Replace(" ", " "); sContentSub = sContentSub.Replace("<br />", Environment.NewLine); Console.WriteLine(sContentSub); string sHtmlPath = sDir + idx.ToString() + ".html"; using (StreamWriter sw = new StreamWriter(sHtmlPath))//将获取的内容写入文本 { sw.Write(sContentSub); } return 0; } static int downPage(string sUrl, ref string pageHtml) { Byte[] pageData = MyWebClient.DownloadData(sUrl); //从指定网站下载数据 //string pageHtml = Encoding.Default.GetString(pageData); //如果获取网站页面采用的是GB2312,则使用这句 pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句 //Console.WriteLine(pageHtml);//在控制台输入获取的内容 return 0; } static string getNextPageUrl(string se) { int iLastPage = se.IndexOf("下一页</a>"); string sCut = se.Substring(iLastPage-12, 12); if (sCut.CompareTo("="disabled">") == 0) //最后一页 return ""; int iHrefBgn = se.LastIndexOf("a href=", iLastPage); if (iHrefBgn <= 0) return ""; iHrefBgn = iHrefBgn + 8; iLastPage = se.IndexOf('"', iHrefBgn); if (iLastPage <= 0) return ""; iHrefBgn = se.LastIndexOf('/', iLastPage); if (iHrefBgn <= 0) return ""; return se.Substring(iHrefBgn, iLastPage - iHrefBgn); } static int getContentUrl(string sKeys, ref string sHref, ref string sTitle) { int iHrefBgn = sKeys.IndexOf("a href="); if (iHrefBgn <= 0) return -1; iHrefBgn = iHrefBgn + 8; int iLastPage = sKeys.IndexOf('"', iHrefBgn); if (iLastPage <= 0) return -2; sHref = sKeys.Substring(iHrefBgn, iLastPage - iHrefBgn); iHrefBgn = iLastPage + 2; iLastPage = sKeys.IndexOf("</a>"); if (iLastPage <= 0) return -3; sTitle = sKeys.Substring(iHrefBgn, iLastPage - iHrefBgn); return 0; } static int dealIndexPage(string sIndexUrl) { string pageHtml = ""; downPage(sMain + sIndexUrl, ref pageHtml); string sRecommendKey = "<div class="recommend">"; int iBgnIdx = pageHtml.IndexOf(sRecommendKey); if (iBgnIdx <= 0) return -1; iBgnIdx = pageHtml.IndexOf(sRecommendKey, iBgnIdx + 10); if (iBgnIdx <= 0) return -2; string sNoteKey = "<p class="note">"; int iEndIdx = pageHtml.IndexOf(sNoteKey, iBgnIdx + 10); if (iEndIdx <= 0) return -3; string sHrefArray = pageHtml.Substring(iBgnIdx, iEndIdx - iBgnIdx); //sHrefArray.Split(new string[] { "\r\n", }); string[] sTmpArray = sHrefArray.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); foreach (string se in sTmpArray) { if (se.IndexOf("a href=") > 0) { Console.Write(se + " "); int iLastPage = se.IndexOf("下一页</a>"); if (iLastPage > 0) //下一页 { string sNext = getNextPageUrl(se); if (sNext != "") { Console.Write("nextpage "+sNext+" "); dealIndexPage(sNext); } else Console.Write("Finish.......... "); } else //普通链接页 { string sUrl = ""; string sTitle = ""; if ( 0 == getContentUrl(se, ref sUrl, ref sTitle) ) Console.Write(" "+ sUrl + " " + sTitle + " "); else Console.Write(" deal...error "); } } } Console.Write("aaaaaaaaaaaaa "); return 0; } static void write2File(ref string pageContext, ref string sPath) { using (StreamWriter sw = new StreamWriter(sPath))//将获取的内容写入文本 { sw.Write(pageContext); } } static void Main(string[] args) { try { MyWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于对向Internet资源的请求进行身份验证的网络凭据。 //string pageHtml = ""; //downPage("http://wap.xxbiquge.com/59_59865/index_49.html", ref pageHtml); //downPage("http://wap.xxbiquge.com/59_59865/3184122.html", ref pageHtml); sMain = "http://wap.xxbiquge.com/59_59865/"; dealIndexPage(""); //write2File(pageHtml, "H:\page\ouput.html"); //writeContent(pageHtml, 0); } catch (WebException webEx) { Console.WriteLine(webEx.Message.ToString()); } Console.ReadLine(); //让控制台暂停,否则一闪而过了 } } }
///<summary>
/// 序列化
/// </summary>
/// <param name="data">要序列化的对象</param>
/// <returns>返回存放序列化后的数据缓冲区</returns>
public byte[] Serialize(object data)
{
BinaryFormatter formatter = new BinaryFormatter();
MemoryStream rems = new MemoryStream();
formatter.Serialize(rems, data);
return rems.GetBuffer();
}
/// <summary>
/// 反序列化
/// </summary>
/// <param name="data">数据缓冲区</param>
/// <returns>对象</returns>
public object Deserialize(byte[] data)
{
BinaryFormatter formatter = new BinaryFormatter();
MemoryStream rems = new MemoryStream(data);
data = null;
object obj = new object();
try
{
obj = formatter.Deserialize(rems);
}
catch (Exception ex)
{
Console.Write("BaseAction序列化bug:" + ex.ToString());
}
return obj;
}