zoukankan      html  css  js  c++  java
  • C#分析搜索引擎URL得到搜索关键字,并判断页面停留时间以及来源页面

    前台代码:

     1 var start;
     2 var end;
     3 var state;
     4 var lasturl = document.referrer;
     5 start = new Date($.ajax({ async: false }).getResponseHeader("Date"));//start是用户进入页面时间,此当前时间为服务器时间主要是处理客户端本地机器时间错误问题
     6 $(document).ready(function () {//用户页面加载完毕
     7     $(window).unload(function () {//页面卸载,就是用户关闭页面、点击链接跳转到其他页面或者刷新页面都会执行  
     8         end = new Date($.ajax({ async: false }).getResponseHeader("Date"));//用户退出时间  
     9         state = (end.getTime() - start.getTime()) / 1000;//停留时间=退出时间-开始时间(得到的是一个整数,毫秒为单位,1秒=1000)  
    10         if (window.location.pathname == "/default.htm") {
    11             $.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "首页访问", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
    12         }
    13         else if (window.location.pathname == "/GeRenKuanDai/gerenkuandai.htm" || window.location.pathname.indexOf("GeRenKuanDai") > 0) {
    14             $.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "家庭宽带信息资费查看", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
    15         }
    16         else if (window.location.pathname == "/boxdetail/default.htm" || window.location.pathname.indexOf("boxdetail") > 0) {
    17             $.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "查看大卖系列", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
    18         }
    19         else if (window.location.pathname == "/cesu/default.htm" || window.location.pathname.indexOf("cesu") > 0) {
    20             $.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "测速", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
    21         }
    22         else if (window.location.pathname == "/government/default.htm" || window.location.pathname.indexOf("government") > 0) {
    23             $.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "企业宽带查看", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
    24         }
    25 
    26         else if (window.location.pathname == "/GwbnNewApply/default.htm" || window.location.pathname.indexOf("GwbnNewApply") > 0) {
    27             $.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "宽带新开户页面", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
    28         }
    29         else if (window.location.pathname == "/wodechangkuan/default.htm" || window.location.pathname.indexOf("wodechangkuan") > 0) {
    30             $.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "网上营业厅-用户登录", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
    31         }
    32         else if (window.location.pathname == "/xinwen/default.htm" || window.location.pathname.indexOf("xinwen") > 0) {
    33             $.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "新闻浏览", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
    34         }
    35         else {
    36             $.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
    37         }
    38     });
    39 });
    40 Date.prototype.Format = function (fmt) {
    41     var o = {
    42         "M+": this.getMonth() + 1, //月份 
    43         "d+": this.getDate(), //
    44         "h+": this.getHours(), //小时 
    45         "m+": this.getMinutes(), //
    46         "s+": this.getSeconds(), //
    47         "q+": Math.floor((this.getMonth() + 3) / 3), //季度 
    48         "S": this.getMilliseconds() //毫秒 
    49     };
    50     if (/(y+)/.test(fmt)) fmt = fmt.replace(RegExp.$1, (this.getFullYear() + "").substr(4 - RegExp.$1.length));
    51     for (var k in o)
    52         if (new RegExp("(" + k + ")").test(fmt)) fmt = fmt.replace(RegExp.$1, (RegExp.$1.length == 1) ? (o[k]) : (("00" + o[k]).substr(("" + o[k]).length)));
    53     return fmt;
    54 }

    后台ashx页面:

      1 <%@ WebHandler Language="C#" Class="SiteStats" %>
      2 
      3 
      4 using System;
      5 using System.Collections.Generic;
      6 using System.Text;
      7 using System.Text.RegularExpressions;
      8 using System.Web;
      9 using Newtonsoft.Json;
     10 using Newtonsoft.Json.Linq;
     11 using System.Web.Script.Serialization;
     12 using System.Net;
     13 using System.IO;
     14 public class SiteStats : IHttpHandler
     15 {
     16     string Keyword = String.Empty;
     17     string Engine = String.Empty;
     18     Maticsoft.BLL.tb_SiteStats tbSiteStatesBLL = new Maticsoft.BLL.tb_SiteStats();
     19     Maticsoft.Model.tb_SiteStats tbSiteStatesModle = new Maticsoft.Model.tb_SiteStats();
     20     public void ProcessRequest(HttpContext context)
     21     {
     22         context.Response.ContentType = "text/plain";
     23         //#region 防止钓鱼
     24         //string host = HttpContext.Current.Request.UrlReferrer.Host;
     25         //if (string.Compare(host, HttpContext.Current.Request.Url.Host) > 0)
     26         //{
     27         //    return;
     28         //}
     29         //#endregion
     30         //取得来源url地址
     31         //string url = HttpContext.Current.Request.UrlReferrer == null ? "" : HttpContext.Current.Request.UrlReferrer.ToString();
     32         string url = context.Request["last_url"];
     33         //判断是否搜索引擎链接
     34         if (IsSearchEnginesGet(url))
     35         {
     36             //取得搜索关键字
     37             Keyword = SearchKey(url);
     38             //取得搜索引擎名称
     39             Engine = EngineName;
     40         }
     41         tbSiteStatesModle.ID = Guid.NewGuid().ToString();
     42         //tbSiteStatesModle.VisitTime = Convert.ToDateTime(context.Request["start_time"]);
     43         tbSiteStatesModle.State_time = context.Request["state_time"];
     44         tbSiteStatesModle.VisitTime = DateTime.Now.AddSeconds(-(Convert.ToDouble(tbSiteStatesModle.State_time.ToString())));
     45         tbSiteStatesModle.State_url = context.Request["state_url"];
     46         tbSiteStatesModle.State_time = context.Request["state_time"];
     47         tbSiteStatesModle.State_title = context.Request["state_title"];
     48         tbSiteStatesModle.UserAction = context.Request["action"];
     49         tbSiteStatesModle.IpAddress = ReadIP();
     50         tbSiteStatesModle.Last_title = string.IsNullOrEmpty(url) ? "" : GetTitle(url);
     51         #region -跨域请求JSON数据-
     52         //HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://ip.chinaz.com/getip.aspx");
     53         //request.Method = "get";
     54         //request.ContentType = "application/x-www-form-urlencoded";
     55         //request.Timeout = 60 * 1000; // 第三方的超时时间请设为60秒
     56         //Encoding encoding = Encoding.GetEncoding("gb2312");
     57         //string json = "";
     58         //using (Stream requestStream = request.GetRequestStream())
     59         //{
     60         //    using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
     61         //    {
     62         //        using (Stream myResponseStream = response.GetResponseStream())
     63         //        {
     64         //            using (StreamReader myStreamReader = new StreamReader(myResponseStream, encoding))
     65         //            {
     66         //                json = myStreamReader.ReadToEnd();
     67         //                myStreamReader.Close();
     68         //                myResponseStream.Close();
     69         //            }
     70         //        }
     71         //    }
     72         //}
     73         //var str = serializer.DeserializeObject(json);
     74         WebClient wc = new WebClient();
     75         byte[] bytes = wc.DownloadData("http://ip.taobao.com/service/getIpInfo.php?ip=" + ReadIP());
     76         wc.Encoding = Encoding.UTF8;
     77         string jsonText = Encoding.UTF8.GetString(bytes);
     78         JObject jo = (JObject)JsonConvert.DeserializeObject(jsonText);
     79         #endregion
     80         //string area = UnicodeToStr(jo["data"]["country"].ToString()) + UnicodeToStr(jo["data"]["area"].ToString() + UnicodeToStr(jo["data"]["city"].ToString()) + UnicodeToStr(jo["data"]["region"].ToString()));国家 地区 省份 市区 地区 ISP公司
     81         if (UnicodeToStr(jo["code"].ToString()) == "0")//返回成功
     82             tbSiteStatesModle.ExtStr1 = UnicodeToStr(UnicodeToStr(jo["data"]["region"].ToString()) + UnicodeToStr(jo["data"]["city"].ToString()) + UnicodeToStr(jo["data"]["county"].ToString()) + UnicodeToStr(jo["data"]["isp"].ToString()));
     83         HttpCookie cookie = context.Request.Cookies["unid"];
     84         if (null == cookie && tbSiteStatesBLL.ExistsByIP(tbSiteStatesModle.IpAddress) != true)
     85         {
     86             tbSiteStatesModle.IsNewUser = "新访客";
     87             cookie = new HttpCookie("unid", Guid.NewGuid().ToString());//获取用户唯一标识
     88             cookie.Expires = DateTime.Now.AddYears(10);//设置cookie过期时间
     89             context.Response.Cookies.Add(cookie);//将cookie写入客户端
     90         }
     91         if (null == cookie && tbSiteStatesBLL.ExistsByIP(tbSiteStatesModle.IpAddress) == true)
     92         {
     93             tbSiteStatesModle.IsNewUser = "老访客";
     94             cookie = new HttpCookie("unid", Guid.NewGuid().ToString());//获取用户唯一标识
     95             cookie.Expires = DateTime.Now.AddYears(10);//设置cookie过期时间
     96             context.Response.Cookies.Add(cookie);//将cookie写入客户端
     97         }
     98         else
     99             tbSiteStatesModle.IsNewUser = "老访客";
    100         tbSiteStatesModle.Last_url = url;
    101         tbSiteStatesModle.ExtStr2 = cookie != null ? cookie.Value : "";
    102         tbSiteStatesModle.VisitWay = Engine;
    103         tbSiteStatesModle.KeyWords = Keyword;
    104         tbSiteStatesBLL.Add(tbSiteStatesModle);
    105         //context.Response.Write(Keyword + Engine);
    106     }
    107     /// <summary>
    108     /// 读取客户端IP地址
    109     /// </summary>
    110     /// <returns></returns>
    111     public static string ReadIP()
    112     {
    113         HttpRequest request = HttpContext.Current.Request;
    114         string IP = request.ServerVariables["HTTP_X_FORWARDED_FOR"];
    115         if (string.IsNullOrEmpty(IP))
    116         {
    117             IP = request.ServerVariables["REMOTE_ADDR"];
    118         }
    119         if (string.IsNullOrEmpty(IP))
    120         {
    121             IP = request.UserHostAddress;
    122         }
    123         if (string.IsNullOrEmpty(IP))
    124         {
    125             IP = "0.0.0.0";
    126         }
    127         return IP;
    128     }
    129     /// <summary>
    130     /// 根据URL提取网站Title
    131     /// </summary>
    132     /// <param name="Url"></param>
    133     /// <returns></returns>
    134     public string GetTitle(string Url)
    135     {
    136         string strHtml;
    137         StreamReader sr = null; //用来读取流       
    138         Encoding code = Encoding.GetEncoding("utf-8"); //定义编码      
    139         //构造web请求,发送请求,获取响应     
    140         WebRequest HttpWebRequest = null;
    141         WebResponse HttpWebResponse = null;
    142         HttpWebRequest = WebRequest.Create(Url);
    143         HttpWebResponse = HttpWebRequest.GetResponse();        //获得流   
    144         sr = new StreamReader(HttpWebResponse.GetResponseStream(), code);
    145         strHtml = sr.ReadToEnd();
    146         int start = strHtml.IndexOf("<title>") + 7;
    147 
    148         strHtml = strHtml.Substring(start);
    149 
    150         int end = strHtml.LastIndexOf("</title>");
    151 
    152         string title = strHtml.Substring(0, end); ;
    153 
    154         return title;
    155     }
    156 
    157     /// <summary>
    158     /// Unicode转中文-支持非Unicode编码字符串
    159     /// </summary>
    160     /// <param name="str"></param>
    161     /// <returns></returns>
    162     public static string UnicodeToStr(string str)
    163     {
    164         string outStr = "";
    165         Regex reg = new Regex(@"(?i)\u([0-9a-f]{4})");
    166         outStr = reg.Replace(str, delegate (Match m1)
    167         {
    168             return ((char)Convert.ToInt32(m1.Groups[1].Value, 16)).ToString();
    169         });
    170         return outStr;
    171     }
    172     /// <summary>  
    173     /// GMT时间转成本地时间  
    174     /// </summary>  
    175     /// <param name="gmt">字符串形式的GMT时间</param>  
    176     /// <returns></returns>  
    177     public static DateTime GmtToLocal(string gmt)
    178     {
    179         DateTime dt = DateTime.MinValue;
    180         try
    181         {
    182             string pattern = "";
    183             if (gmt.IndexOf("+0") != -1)
    184             {
    185 
    186 
    187 
    188                 gmt = gmt.Replace("GMT", "");
    189                 pattern = "ddd, dd MMM yyyy HH':'mm':'ss zzz";
    190             }
    191             if (gmt.ToUpper().IndexOf("GMT") != -1)
    192             {
    193                 pattern = "ddd, dd MMM yyyy HH':'mm':'ss 'GMT'";
    194             }
    195             if (pattern != "")
    196             {
    197                 dt = DateTime.ParseExact(gmt, pattern, System.Globalization.CultureInfo.InvariantCulture, System.Globalization.DateTimeStyles.AdjustToUniversal);
    198                 dt = dt.ToLocalTime();
    199             }
    200             else
    201             {
    202                 dt = Convert.ToDateTime(gmt);
    203             }
    204         }
    205         catch
    206         {
    207         }
    208         return dt;
    209     }
    210     #region -搜索引擎-
    211     //搜索引擎特征
    212     private string[][] _Enginers = new string[][]
    213     {
    214             new string[]{"google","utf8","q"},
    215             new string[]{"baidu", "gb2312", "wd"},
    216             new string[]{"yahoo","utf8","p"},
    217             new string[]{"yisou","utf8","search"},
    218             new string[]{"live","utf8","q"},
    219             new string[]{"tom","gb2312","word"},
    220             new string[]{"163","gb2312","q"},
    221             new string[]{"iask","gb2312","k"},
    222             new string[]{"soso","gb2312","w"},
    223             new string[]{"sogou","gb2312","query"},
    224             new string[]{"zhongsou","gb2312","w"},
    225             new string[]{"so", "utf8", "q"},
    226             new string[]{"openfind","utf8","q"},
    227             new string[]{"alltheweb","utf8","q"},
    228             new string[]{"lycos","utf8","query"},
    229             new string[]{"onseek","utf8","q"},
    230             new string[]{"youdao","utf8","q"},
    231             new string[]{"bing","utf8","q"},
    232             new string[]{"118114","gb2312","kw"}
    233     };
    234     //搜索引擎名称
    235     private string _EngineName = "";
    236     public string EngineName
    237     {
    238         get
    239         {
    240             return _EngineName;
    241         }
    242     }
    243     //搜索引擎编码
    244     private string _Coding = "utf8";
    245     public string Coding
    246     {
    247         get
    248         {
    249             return _Coding;
    250         }
    251     }
    252     //搜索引擎关键字查询参数名称
    253     private string _RegexWord = "";
    254     public string RegexWord
    255     {
    256         get
    257         {
    258             return _RegexWord;
    259         }
    260     }
    261 
    262     private string _Regex = @"(";
    263 
    264     //搜索引擎关键字
    265     //建立搜索关键字正则表达式
    266     public void EngineRegEx(string myString)
    267     {
    268         for (int i = 0, j = _Enginers.Length; i < j; i++)
    269         {
    270             if (myString.Contains(_Enginers[i][0]))
    271             {
    272                 _EngineName = _Enginers[i][0];
    273                 _Coding = _Enginers[i][1];
    274                 _RegexWord = _Enginers[i][2];
    275                 _Regex += _EngineName + @".+.*[?/ &]" + _RegexWord + @"[=:])(?<key>[^&]*)";
    276                 break;
    277             }
    278         }
    279     }
    280     //得到搜索引擎关键字
    281     public string SearchKey(string myString)
    282     {
    283         EngineRegEx(myString.ToLower());
    284         if (_EngineName != "")
    285         {
    286             Regex myReg = new Regex(_Regex, RegexOptions.IgnoreCase);
    287             Match matche = myReg.Match(myString);
    288             myString = matche.Groups["key"].Value;
    289             //去处表示为空格的+
    290             myString = myString.Replace("+", " ");
    291             if (_Coding == "gb2312")
    292             {
    293                 //myString = GetUTF8String(myString);
    294                 myString = System.Web.HttpUtility.UrlDecode(myString);
    295             }
    296             else
    297             {
    298                 myString = Uri.UnescapeDataString(myString);
    299             }
    300         }
    301         return myString;
    302     }
    303     //整句转码
    304     public string GetUTF8String(string myString)
    305     {
    306         Regex myReg = new Regex("(?< key >%..%..)", RegexOptions.IgnoreCase);
    307 
    308 
    309         MatchCollection matches = myReg.Matches(myString);
    310         string myWord;
    311         for (int i = 0, j = matches.Count; i < j; i++)
    312         {
    313             myWord = matches[i].Groups["key"].Value.ToString();
    314             myString = myString.Replace(myWord, GB2312ToUTF8(myWord));
    315         }
    316         return myString;
    317     }
    318     //单字GB2312转UTF8 URL编码
    319     public string GB2312ToUTF8(string myString)
    320     {
    321         string[] myWord = myString.Split('%');
    322         byte[] myByte = new byte[] { Convert.ToByte(myWord[1], 16), Convert.ToByte(myWord[2], 16) };
    323         Encoding GB = Encoding.GetEncoding("GB2312");
    324         Encoding U8 = Encoding.UTF8;
    325         myByte = Encoding.Convert(GB, U8, myByte);
    326         char[] Chars = new char[U8.GetCharCount(myByte, 0, myByte.Length)];
    327         U8.GetChars(myByte, 0, myByte.Length, Chars, 0);
    328         return new string(Chars);
    329     }
    330 
    331     //判断是否为搜索引擎爬虫,并返回其类型
    332     public string isCrawler(string SystemInfo)
    333     {
    334         string[] BotList = new string[] { "Google", "Baidu", "yisou", "MSN", "Yahoo", "live",
    335 "tom", "163", "TMCrawler", "iask", "Sogou", "soso", "youdao", "zhongsou", "so",
    336 "openfind", "alltheweb", "lycos", "bing", "118114" };
    337         foreach (string Bot in BotList)
    338         {
    339             if (SystemInfo.ToLower().Contains(Bot.ToLower()))
    340             {
    341                 return Bot;
    342             }
    343         }
    344         return "null";
    345     }
    346     /// <summary>
    347     /// 判断是否搜索引擎链接
    348     /// </summary>
    349     /// <param name="str"></param>
    350     /// <returns></returns>
    351     public bool IsSearchEnginesGet(string str)
    352     {
    353         string[] strArray = new string[] { "Google", "Baidu", "yisou", "MSN", "Yahoo", "live", "tom"
    354 , "163", "TMCrawler", "iask", "Sogou", "soso", "youdao", "zhongsou", "so", "openfind",
    355 "alltheweb", "lycos", "bing", "118114" };
    356         str = str.ToLower();
    357         for (int i = 0; i < strArray.Length; i++)
    358         {
    359             if (str.IndexOf(strArray[i].ToLower()) >= 0)
    360             {
    361                 return true;
    362             }
    363         }
    364         return false;
    365     }
    366     #endregion -搜索引擎-
    367     public bool IsReusable
    368     {
    369         get
    370         {
    371             return false;
    372         }
    373     }
    374 }
  • 相关阅读:
    js几个常用的弹层
    ubuntu 更新源 或者 apt-get install 出错404 not found ,Failed to fetch
    vmware ubuntu 解决 宿主机与虚拟机互相ping不通,虚拟机无线上网的解决办法
    mediawiki资料
    mediawiki问题
    JavaEE异常
    前端网站收藏
    依赖注入--setting注入和构造器注入
    Spring注入Bean
    Spring简介
  • 原文地址:https://www.cnblogs.com/soulmate/p/5622369.html
Copyright © 2011-2022 走看看