zoukankan      html  css  js  c++  java
  • 简易网页采集器的实现

    --------------------------------------------------------------------------
    -----------------------------Cryking 原创-----------------------------
    -----------------------转载请注明出处,谢谢!------------------------ 

    自己写的一个扫描网址标题的小工具.

    功能:遍历指定范围的IP,根据IP扫描网页的标题,并记录(支持二级重定向网页的扫描)

             自动记录采集日志到D盘的net_collect.log文件中.

    类型:控制台程序

    实现语言:C#

    需要的环境: .NET 3.5

    可选的环境:Oracle数据库

    相关的缺省值说明:

    缺省(直接按回车即是缺省值)Oracle数据库用户:scott

    缺省Oracle数据库密码:tigger

    缺省Oracle数据库连接标识符:orcl  (即TNSNAME名称)

    缺省的http连接超时时间:6秒

    缺省启用数据库来记录采集到的信息

    缺省不启用扫描完成后自动关机

     (篇幅原因,数据库连接类这里就不贴了)

    主函数代码如下:

    static void Main(string[] args)
            {
                try
                {
                    string user = "scott";
                    string pwd = "tigger";
                    string tns = "orcl";
    
                    Console.WriteLine("***************简易网址扫描器V1.0*****************");
                    Console.WriteLine("**************Created  By Cryking*****************");
                    Console.WriteLine("******************QQ:278676125********************");
                    Console.WriteLine("**************************************************");
                    Console.WriteLine("请设置超时时间(若网络环境较差,建议设大一点,如100秒)(单位/秒):");
                    timeOut = Int32.Parse(Console.ReadLine());
                    Console.WriteLine("扫描完成后是否自动关机(Y/N)?");
                    if (Regex.IsMatch(Console.ReadLine(), "(?i)[y]")) shutDownFlag = 1;
                    Console.WriteLine("是否启用数据库支持(不启用则只写日志文件),Y/N?:");
                    if (Regex.IsMatch(Console.ReadLine(), "(?i)[n]")) DBFlag = 0;
                    if (DBFlag == 1)
                    {
                        Console.WriteLine("请输入Oracle数据库连接用户名:");
                        user = Console.ReadLine();
                        user = user == string.Empty ? "scott" : user;
                        Console.WriteLine("请输入Oracle数据库连接密码:");
                        pwd = string.Empty;
                        ConsoleKeyInfo info;
                        do
                        {
                            info = Console.ReadKey(true);
                            if (info.Key != ConsoleKey.Enter && info.Key != ConsoleKey.Backspace && info.Key != ConsoleKey.Escape && info.Key != ConsoleKey.Tab && info.KeyChar != '\0')
                            { pwd += info.KeyChar; Console.Write('*'); }
                        } while (info.Key != ConsoleKey.Enter);
                        pwd = pwd == string.Empty ? "tigger" : pwd;
                        Console.WriteLine();
                        Console.WriteLine("请输入Oracle数据库连接标识符(TNSNAME):");
                        tns = Console.ReadLine();
                        tns = tns == string.Empty ? "orcl" : tns;
                        if (!DBAccess.DBConnect(user, pwd, tns))
                        {
                            MessageBox.Show("数据库连接失败!", "错误001", MessageBoxButtons.OK, MessageBoxIcon.Error);
                            System.Diagnostics.Process.GetCurrentProcess().Kill();
                        }
                        Console.WriteLine("数据库连接成功!");
                        if (DBAccess.selectStr("select count(*) from user_objects where object_name='NET_COLLECT' ") == "0")
                        {
                            Console.WriteLine("开始创建表(NET_COLLECT),请等待...");
                            if (0 == DBAccess.DBExecSql(@"create table NET_COLLECT(
        IP          VARCHAR2(30) not null,
        PORT        NUMBER default 80,
        TITLE       VARCHAR2(4000),
        URL         VARCHAR2(2000),
        COLLECTDATE DATE default sysdate
    )"))
                                Console.WriteLine("表(NET_COLLECT)创建成功!");
                            else
                            {
                                Console.WriteLine("表(NET_COLLECT)创建失败,请参照说明,先手工创建表(NET_COLLECT)!");
                                System.Diagnostics.Process.GetCurrentProcess().Kill();
                            }
                        }
                    }
                    string Scan = "";
                    Console.WriteLine("请输入扫描范围(如:0.0.0.0-10.10.10.10)");
                    Scan = Console.ReadLine();
                    string[] tmpIp = Scan.Trim().Split('-');
                    string[] ipScanScop = allocaIncreament(tmpIp[0], tmpIp[1]);//平均分配IP范围给8个线程
                    logFile = new StreamWriter("d:\\net_collect.log", true);
                    DateTime startTime = DateTime.Now;
                    logFile.WriteLine("开始时间:" + DateTime.Now.ToString());
                    //开8个线程跑
                    Thread t = new Thread(new ParameterizedThreadStart(ipScan));
                    t.Start(ipScanScop[0]);
                    Thread t1 = new Thread(new ParameterizedThreadStart(ipScan));
                    t1.Start(ipScanScop[1]);
                    Thread t2 = new Thread(new ParameterizedThreadStart(ipScan));
                    t2.Start(ipScanScop[2]);
                    Thread t3 = new Thread(new ParameterizedThreadStart(ipScan));
                    t3.Start(ipScanScop[3]);
                    Thread t4 = new Thread(new ParameterizedThreadStart(ipScan));
                    t4.Start(ipScanScop[4]);
                    Thread t5 = new Thread(new ParameterizedThreadStart(ipScan));
                    t5.Start(ipScanScop[5]);
                    Thread t6 = new Thread(new ParameterizedThreadStart(ipScan));
                    t6.Start(ipScanScop[6]);
                    Thread t7 = new Thread(new ParameterizedThreadStart(ipScan));
                    t7.Start(ipScanScop[7]);
    
                    while (true) { if (8 == flag) break; };
                    DBAccess.DBClose();
                    TimeSpan ts = DateTime.Now - startTime;
                    logFile.WriteLine("结束时间:" + DateTime.Now.ToString());
                    logFile.Close();
                    Console.WriteLine("总共花费时间:" + ts.ToString());
                    if(1==shutDownFlag)
                    Process.Start("Shutdown.exe", " -s -t 0"); //完成后自动关机
                    Console.ReadKey();
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message);
                }
    
            }


     扫描功能函数:

    static void ipScan1(object obj)
            {
                try
                {
                    string[] scope = obj.ToString().Split('-');
                    if (string.Compare(scope[0].ToString(), scope[1].ToString()) > 0)//交换
                    {
                        string tmp = "";
                        tmp = scope[0];
                        scope[0] = scope[1];
                        scope[1] = tmp;
                    }
                    string[] ipStart = scope[0].ToString().Split('.');
                    int i = Int32.Parse(ipStart[0]);
                    int j = Int32.Parse(ipStart[1]);
                    int k = Int32.Parse(ipStart[2]);
                    int g = Int32.Parse(ipStart[3]);
    
                    string[] ipEnd = scope[1].ToString().Split('.');
                    int ei = Int32.Parse(ipEnd[0]);
                    int ej = Int32.Parse(ipEnd[1]);
                    int ek = Int32.Parse(ipEnd[2]);
                    int eg = Int32.Parse(ipEnd[3]);
                    string html;
                    string ip;
                    string logBuffer = "";
                    for (; i <= ei; i++)
                    {
                        if (10 == i || 127 == i) continue;//私有地址
                        if (g == eg && k == ek && j == ej && i == ei) break;
                        for (; j <= 255; j++)
                        {
                            if (192 == i && 168 == j) continue;//私有地址
                            for (; k <= 255; k++)
                            {
                                for (; g <= 255; g++)
                                {
                                    {
                                        
                                        ip = i.ToString() + "." + j.ToString() + "." + k.ToString() + "." + g.ToString();
    
                                        html = GetHtmlInfo(ip, timeOut*1000, Encoding.Default);//采用缺省的编码方式,可能会获得乱码
                                        string title=GetTitle(html);
                                        title = title == string.Empty ? (html.Length > 1000 ? html.Substring(0, 1000) : html) : title;
                                        if (html != string.Empty && html != "无法连接到远程服务器")
                                            if(DBFlag==1)
                                            DBAccess.DBExecSql("insert into net_collect values('" + ip + "',default,'" + title + "','',default)");
                                        Console.WriteLine(ip + " --" + title);
                                        if (logBuffer != html)
                                        {
                                            lock (logFile)
                                            {
                                                myMutex.WaitOne();
                                                logFile.WriteLine("ip:" + ip + " [MSG:]" + title);                                          logBuffer = html;
                                                myMutex.ReleaseMutex();
                                            }
                                        }
                                        logFile.Flush();
                                        countPort++;
                                    }
                                    count++;
    
                                }
                                g = 0;
                            }
                            k = 0;
                        }
                        j = 0;
                    }
                    flag++;
                }
                catch (Exception e) { Console.WriteLine(e.Message); }
                
            }


     

     网页信息获取函数

    static string GetHtmlInfo(string url, int timeout, Encoding EnCodeType)
            {
    
                if (!url.StartsWith("http://") && !url.StartsWith("https://")) { url = "http://" + url; }
                string result = "";
                StreamReader reader = null;
                string temp = "";
                HttpWebRequest request = null;
                HttpWebResponse response = null;
                try
                {
                     request = (HttpWebRequest)HttpWebRequest.Create(url);//初始化WebRequest
                    request.Timeout = timeout;
                    request.UserAgent = "User-Agent:Mozilla/5.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 2.0.40607; .NET CLR 1.1.4322; .NET CLR 3.5.30729;.NET CLR 1.0.3705)";
                    request.Accept = "*/*";
                    request.AllowAutoRedirect = false;
                    request.KeepAlive = true;
                    request.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
                     response = (HttpWebResponse)request.GetResponse();//返回来自Internet的响应
                     if (response.StatusCode == System.Net.HttpStatusCode.MovedPermanently)//获取重定向的网页
                     {
                         request = (HttpWebRequest)HttpWebRequest.Create(response.Headers["Location"]);//初始化WebRequest
                         response = (HttpWebResponse)request.GetResponse();//返回来自Internet的响应
                     }
    
                     if (response.StatusCode == System.Net.HttpStatusCode.OK)
                    {
                        StringBuilder builder = new StringBuilder();
                        Stream stream = response.GetResponseStream();
                        reader = new StreamReader(stream, EnCodeType);
                        string tmp = "";
                        while ((temp = reader.ReadLine()) != null){
                            builder.Append(temp);
                            tmp = builder.ToString();
                            if (tmp.IndexOf("</title>") > 0) { break; }//ReadLine是读取整行,所以有时在它后面的很多字符串也会读取
                            builder.Append("\r\n");
                        }
    
                        result = builder.ToString();
                        response.Close();
                        request.Abort();
                        return result;
                    }
                    response.Close();
                    request.Abort();
                    return string.Empty;
                }
    
                catch (Exception ex){
                    return ex.Message;
                }
                finally { if (reader != null) { reader.Close(); } if (response != null) { response.Close(); } if (request != null) { request.Abort(); } }
            }


     

     IP范围分配函数(分配给各个线程)

    static string[] allocaIncreament(string tmpIp0, string tmpIp1)//平均分配IP范围给各个线程
            {
                string[] ipResult=new string[8];
                if (string.Compare(tmpIp0,tmpIp1)>0)//交换
                {
                    string tmp = "";
                    tmp = tmpIp0;
                    tmpIp0 = tmpIp1;
                    tmpIp1 = tmp;
                }
                string[] startip=tmpIp0.Split('.');
                string[] endip = tmpIp1.Split('.');
                int incre = (Int32.Parse(endip[3]) - Int32.Parse(startip[3]) +
                    (Int32.Parse(endip[2]) - Int32.Parse(startip[2])) * 256 +
                    (Int32.Parse(endip[1]) - Int32.Parse(startip[1])) * 256 * 256 +
                    (Int32.Parse(endip[0]) - Int32.Parse(startip[0])) * 256 * 256 * 256) / 8;
    
                string tmpIp0End = calcIp(startip, incre);
                ipResult[0] = tmpIp0 + "-" + tmpIp0End;
    
                string[] t1 = tmpIp0End.Split('.');
                t1[3] = (Int32.Parse(t1[3]) + 1).ToString();
                string tmpIp1End = calcIp(t1, incre);
                if (string.Compare(string.Join(".", t1), tmpIp1End) >= 0)
                {
                    ipResult[1] = tmpIp0End + "-" + tmpIp1;
                    ipResult[2] = tmpIp1 + "-" + tmpIp1;
                    ipResult[3] = ipResult[2];
                    ipResult[4] = ipResult[2];
                    ipResult[5] = ipResult[2];
                    ipResult[6] = ipResult[2];
                    ipResult[7] = ipResult[2];
                }
                else
                {
                    ipResult[1] = string.Join(".", t1) + "-" + tmpIp1End;
    
                    string[] t2 = tmpIp1End.Split('.');
                    t2[3] = (Int32.Parse(t2[3]) + 1).ToString();
                    string tmpIp2End = calcIp(t2, incre);
                    if (string.Compare(string.Join(".", t2), tmpIp2End) >= 0)
                    {
                        ipResult[2] = tmpIp1End + "-" + tmpIp1;
                        ipResult[3] = tmpIp1 + "-" + tmpIp1;
                        ipResult[4] = ipResult[3];
                        ipResult[5] = ipResult[3];
                        ipResult[6] = ipResult[3];
                        ipResult[7] = ipResult[3];
                    }
                    else
                    {
                        ipResult[2] = string.Join(".", t2) + "-" + tmpIp2End;
    
                        string[] t3 = tmpIp2End.Split('.');
                        t3[3] = (Int32.Parse(t3[3]) + 1).ToString();
                        string tmpIp3End = calcIp(t3, incre);
                        if (string.Compare(string.Join(".", t3), tmpIp3End) >= 0)
                        {
                            ipResult[3] = tmpIp2End + "-" + tmpIp1; ipResult[4] = tmpIp1 + "-" + tmpIp1;
                            ipResult[5] = ipResult[4];
                            ipResult[6] = ipResult[4];
                            ipResult[7] = ipResult[4];
                        }
                        else
                        {
                            ipResult[3] = string.Join(".", t3) + "-" + tmpIp3End;
    
                            string[] t4 = tmpIp3End.Split('.');
                            t4[3] = (Int32.Parse(t4[3]) + 1).ToString();
                            string tmpIp4End = calcIp(t4, incre);
                            if (string.Compare(string.Join(".", t4), tmpIp4End) >= 0)
                            {
                                ipResult[4] = tmpIp3End + "-" + tmpIp1; ipResult[5] = tmpIp1 + "-" + tmpIp1;
                                ipResult[6] = ipResult[5];
                                ipResult[7] = ipResult[5];
                            }
                           else
                            {
                                ipResult[4] = string.Join(".", t4) + "-" + tmpIp4End;
    
                                string[] t5 = tmpIp4End.Split('.');
                                t5[3] = (Int32.Parse(t5[3]) + 1).ToString();
                                string tmpIp5End = calcIp(t5, incre);
                                if (string.Compare(string.Join(".", t5), tmpIp5End) >= 0)
                                {
                                    ipResult[5] = tmpIp4End + "-" + tmpIp1; ipResult[6] = tmpIp1 + "-" + tmpIp1;
                                    ipResult[7] = ipResult[6];
                                }
                                else
                                {
                                    ipResult[5] = string.Join(".", t5) + "-" + tmpIp5End;
    
                                    string[] t6 = tmpIp5End.Split('.');
                                    t6[3] = (Int32.Parse(t6[3]) + 1).ToString();
                                    string tmpIp6End = calcIp(t6, incre);
                                    if (string.Compare(string.Join(".", t6), tmpIp6End) >= 0)
                                    {
                                        ipResult[6] = tmpIp5End + "-" + tmpIp1; ipResult[7] = tmpIp1 + "-" + tmpIp1;
                                    }
                                    else
                                    {
                                        ipResult[6] = string.Join(".", t6) + "-" + tmpIp6End;
    
                                        string[] t7 = tmpIp6End.Split('.');
                                        t7[3] = (Int32.Parse(t7[3]) + 1).ToString();
                                        string tmpIp7End = calcIp(t7, incre);
                                        if (string.Compare(string.Join(".", t7), tmpIp7End) >= 0) ipResult[7] = tmpIp6End + "-" + tmpIp1;
                                        else
                                            ipResult[7] = string.Join(".", t7) + "-" + tmpIp1;
                                    }
                                }
                            }
                        }
                    }
                }
                return ipResult;
            }


     运行的界面如下:

    ---

    工具下载地址:http://pan.baidu.com/share/link?shareid=657915&uk=2449788611

    有任何问题及建议,请联系我QQ:278676125

  • 相关阅读:
    网络安全分析
    java实现 洛谷 P1464 Function
    java实现 洛谷 P1464 Function
    java实现 洛谷 P1014 Cantor表
    java实现 洛谷 P1014 Cantor表
    java实现 洛谷 P1014 Cantor表
    java实现 洛谷 P1014 Cantor表
    java实现 洛谷 P1014 Cantor表
    java实现 洛谷 P1540 机器
    java实现 洛谷 P1540 机器
  • 原文地址:https://www.cnblogs.com/javawebsoa/p/3074758.html
Copyright © 2011-2022 走看看