using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Net;
using System.IO;
namespace xsharp
{
class Program
{
static string sDir = "G:\notex\";
static WebClient MyWebClient = new WebClient();
static string sMain = "";
static int writeContent(string sHtml, int idx)
{
int iBgnIdx = sHtml.IndexOf("<meta name="keywords" content="");
if (iBgnIdx <= 0)
return -1;
int iEndIdx = sHtml.IndexOf(""", iBgnIdx + 31);
string sTitle = sHtml.Substring(iBgnIdx + 31, iEndIdx - iBgnIdx - 31);
Console.WriteLine(sTitle);
iBgnIdx = sHtml.IndexOf("<div id="chaptercontent");
if (iBgnIdx <= 0)
return -1;
iEndIdx = sHtml.IndexOf("</div>", iBgnIdx + 1);
if (iEndIdx <= 0)
return -1;
string sDivSub = sHtml.Substring(iBgnIdx, iEndIdx - iBgnIdx);
iBgnIdx = sDivSub.IndexOf("</p>");
if (iBgnIdx <= 0)
return -1;
iEndIdx = sDivSub.IndexOf("<p style", iBgnIdx + 5);
if (iEndIdx <= 0)
return -1;
string sContentSub = sDivSub.Substring(iBgnIdx + 5, iEndIdx - iBgnIdx - 5);
sContentSub = sContentSub.Replace(" ", " ");
sContentSub = sContentSub.Replace("<br />", Environment.NewLine);
Console.WriteLine(sContentSub);
string sHtmlPath = sDir + idx.ToString() + ".html";
using (StreamWriter sw = new StreamWriter(sHtmlPath))//将获取的内容写入文本
{
sw.Write(sContentSub);
}
return 0;
}
static int downPage(string sUrl, ref string pageHtml)
{
Byte[] pageData = MyWebClient.DownloadData(sUrl); //从指定网站下载数据
//string pageHtml = Encoding.Default.GetString(pageData); //如果获取网站页面采用的是GB2312,则使用这句
pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句
//Console.WriteLine(pageHtml);//在控制台输入获取的内容
return 0;
}
static string getNextPageUrl(string se)
{
int iLastPage = se.IndexOf("下一页</a>");
string sCut = se.Substring(iLastPage-12, 12);
if (sCut.CompareTo("="disabled">") == 0) //最后一页
return "";
int iHrefBgn = se.LastIndexOf("a href=", iLastPage);
if (iHrefBgn <= 0)
return "";
iHrefBgn = iHrefBgn + 8;
iLastPage = se.IndexOf('"', iHrefBgn);
if (iLastPage <= 0)
return "";
iHrefBgn = se.LastIndexOf('/', iLastPage);
if (iHrefBgn <= 0)
return "";
return se.Substring(iHrefBgn, iLastPage - iHrefBgn);
}
static int getContentUrl(string sKeys, ref string sHref, ref string sTitle)
{
int iHrefBgn = sKeys.IndexOf("a href=");
if (iHrefBgn <= 0)
return -1;
iHrefBgn = iHrefBgn + 8;
int iLastPage = sKeys.IndexOf('"', iHrefBgn);
if (iLastPage <= 0)
return -2;
sHref = sKeys.Substring(iHrefBgn, iLastPage - iHrefBgn);
iHrefBgn = iLastPage + 2;
iLastPage = sKeys.IndexOf("</a>");
if (iLastPage <= 0)
return -3;
sTitle = sKeys.Substring(iHrefBgn, iLastPage - iHrefBgn);
return 0;
}
static int dealIndexPage(string sIndexUrl)
{
string pageHtml = "";
downPage(sMain + sIndexUrl, ref pageHtml);
string sRecommendKey = "<div class="recommend">";
int iBgnIdx = pageHtml.IndexOf(sRecommendKey);
if (iBgnIdx <= 0)
return -1;
iBgnIdx = pageHtml.IndexOf(sRecommendKey, iBgnIdx + 10);
if (iBgnIdx <= 0)
return -2;
string sNoteKey = "<p class="note">";
int iEndIdx = pageHtml.IndexOf(sNoteKey, iBgnIdx + 10);
if (iEndIdx <= 0)
return -3;
string sHrefArray = pageHtml.Substring(iBgnIdx, iEndIdx - iBgnIdx);
//sHrefArray.Split(new string[] { "\r\n", });
string[] sTmpArray = sHrefArray.Split("
".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
foreach (string se in sTmpArray)
{
if (se.IndexOf("a href=") > 0)
{
Console.Write(se + "
");
int iLastPage = se.IndexOf("下一页</a>");
if (iLastPage > 0) //下一页
{
string sNext = getNextPageUrl(se);
if (sNext != "")
{
Console.Write("nextpage "+sNext+"
");
dealIndexPage(sNext);
}
else
Console.Write("Finish..........
");
}
else //普通链接页
{
string sUrl = "";
string sTitle = "";
if ( 0 == getContentUrl(se, ref sUrl, ref sTitle) )
Console.Write(" "+ sUrl + " " + sTitle + "
");
else
Console.Write(" deal...error
");
}
}
}
Console.Write("aaaaaaaaaaaaa
");
return 0;
}
static void write2File(ref string pageContext, ref string sPath)
{
using (StreamWriter sw = new StreamWriter(sPath))//将获取的内容写入文本
{
sw.Write(pageContext);
}
}
static void Main(string[] args)
{
try
{
MyWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于对向Internet资源的请求进行身份验证的网络凭据。
//string pageHtml = "";
//downPage("http://wap.xxbiquge.com/59_59865/index_49.html", ref pageHtml);
//downPage("http://wap.xxbiquge.com/59_59865/3184122.html", ref pageHtml);
sMain = "http://wap.xxbiquge.com/59_59865/";
dealIndexPage("");
//write2File(pageHtml, "H:\page\ouput.html");
//writeContent(pageHtml, 0);
}
catch (WebException webEx)
{
Console.WriteLine(webEx.Message.ToString());
}
Console.ReadLine(); //让控制台暂停,否则一闪而过了
}
}
}
///<summary>
/// 序列化
/// </summary>
/// <param name="data">要序列化的对象</param>
/// <returns>返回存放序列化后的数据缓冲区</returns>
public byte[] Serialize(object data)
{
BinaryFormatter formatter = new BinaryFormatter();
MemoryStream rems = new MemoryStream();
formatter.Serialize(rems, data);
return rems.GetBuffer();
}
/// <summary>
/// 反序列化
/// </summary>
/// <param name="data">数据缓冲区</param>
/// <returns>对象</returns>
public object Deserialize(byte[] data)
{
BinaryFormatter formatter = new BinaryFormatter();
MemoryStream rems = new MemoryStream(data);
data = null;
object obj = new object();
try
{
obj = formatter.Deserialize(rems);
}
catch (Exception ex)
{
Console.Write("BaseAction序列化bug:" + ex.ToString());
}
return obj;
}