zoukankan      html  css  js  c++  java
  • Linguistic corpora 种子语料库待分析对象分析与更新语料库

    Computational Linguistics

    http://matplotlib.org/

    https://github.com/matplotlib/matplotlib/blob/master/INSTALL#L59

    http://www.nltk.org/book/ch01.html#id9

     1 C:\Users\w>python -m pip install --upgrade pip
     2 Collecting pip
     3   Retrying (Retry(total=4, connect=None, read=None, redirect=None)) after connection broken by 'ConnectTimeoutError(<pip._vendor.reque
     4   Retrying (Retry(total=3, connect=None, read=None, redirect=None)) after connection broken by 'ReadTimeoutError("HTTPSConnectionPool(
     5   Downloading pip-9.0.1-py2.py3-none-any.whl (1.3MB)
     6     100% |████████████████████████████████| 1.3MB 14kB/s
     7 Installing collected packages: pip
     8   Found existing installation: pip 8.1.1
     9     Uninstalling pip-8.1.1:
    10       Successfully uninstalled pip-8.1.1
    11 Successfully installed pip-9.0.1
    12 
    13 C:\Users\w>python -m pip install matplotlib
    14 Collecting matplotlib
    15   Downloading matplotlib-1.5.3-cp35-cp35m-win_amd64.whl (6.5MB)
    16     100% |████████████████████████████████| 6.5MB 30kB/s
    17 Collecting pytz (from matplotlib)
    18   Downloading pytz-2016.10-py2.py3-none-any.whl (483kB)
    19     100% |████████████████████████████████| 491kB 35kB/s
    20 Collecting pyparsing!=2.0.4,!=2.1.2,>=1.5.6 (from matplotlib)
    21   Downloading pyparsing-2.1.10-py2.py3-none-any.whl (56kB)
    22     100% |████████████████████████████████| 61kB 29kB/s
    23 Collecting numpy>=1.6 (from matplotlib)
    24   Downloading numpy-1.11.2-cp35-none-win_amd64.whl (7.6MB)
    25     100% |████████████████████████████████| 7.6MB 32kB/s
    26 Collecting cycler (from matplotlib)
    27   Downloading cycler-0.10.0-py2.py3-none-any.whl
    28 Collecting python-dateutil (from matplotlib)
    29   Downloading python_dateutil-2.6.0-py2.py3-none-any.whl (194kB)
    30     100% |████████████████████████████████| 194kB 46kB/s
    31 Collecting six (from cycler->matplotlib)
    32   Downloading six-1.10.0-py2.py3-none-any.whl
    33 Installing collected packages: pytz, pyparsing, numpy, six, cycler, python-dateutil, matplotlib
    34 Successfully installed cycler-0.10.0 matplotlib-1.5.3 numpy-1.11.2 pyparsing-2.1.10 python-dateutil-2.6.0 pytz-2016.10 six-1.10.0
     text4.dispersion_plot(["kate","he","she","jack"])
    
     1 <p id="w_last" style="color: red; font-size: 6em;">w-WAITING---</p><br>
     2 <?php
     3 include('conn.php');
     4 //http://www.baidu.com/s?wd=%E5%8F%96%E8%8B%B1%E6%96%87%E5%90%8D
     5 $w_db_incr_girl = 0;
     6 for ($w = 0; $w < 153; $w++) {
     7     $wgirl = 'http://api.open.baidu.com/pae/channel/data/asyncqury?appid=4036&srcid=4036&from_mid=1&format=json&ie=utf-8&oe=utf-8&subtitle=%E8%8B%B1%E6%96%87%E5%90%8D&query=%E8%8B%B1%E6%96%87%E5%90%8D&rn=5&stat1=%E5%A5%B3%E7%94%9F&pn=' . (5 * $w) . '&srcid=4036&cb=jQuery110205654252001601794_1481004786057&_=' . (1481004786059 + $w);
     8     $w_file = file_get_contents($wgirl);
     9     $partten = '/\"englishname\"\:\"\w{0,}\"/';
    10     $w_name = preg_match_all($partten, $w_file, $matches, PREG_SET_ORDER);
    11     $tmp = 0;
    12     foreach ($matches AS $one) {
    13         if ($tmp % 3 == 2) {
    14             $given_name = substr($one[0], 15, strlen($one[0]) - 15 - 1);
    15             $sql = 'INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $wgirl . '",0)';
    16             if (mysqli_query($link, $sql)) {
    17                 $w_db_incr_girl++;
    18             };
    19         }
    20         $tmp++;
    21     }
    22 }
    23 
    24 
    25 $w_db_incr_boy = 0;
    26 for ($w = 0; $w < 153; $w++) {
    27 
    28     //  $wgirl = 'http://api.open.baidu.com/pae/channel/data/asyncqury?appid=4036&srcid=4036&from_mid=1&format=json&ie=utf-8&oe=utf-8&subtitle=%E8%8B%B1%E6%96%87%E5%90%8D&query=%E8%8B%B1%E6%96%87%E5%90%8D&rn=5&stat1=%E5%A5%B3%E7%94%9F&pn='.(5*$w).'&srcid=4036&cb=jQuery110205654252001601794_1481004786057&_='.(1481004786059+$w);
    29 
    30     $wboy = 'http://api.open.baidu.com/pae/channel/data/asyncqury?appid=4036&srcid=4036&from_mid=1&format=json&ie=utf-8&oe=utf-8&subtitle=%E8%8B%B1%E6%96%87%E5%90%8D&query=%E8%8B%B1%E6%96%87%E5%90%8D&rn=5&pn=' . (5 * $w) . '&srcid=4036&stat1=%E7%94%B7%E7%94%9F&cb=jQuery1102017382318514491035_1481005337608&_=' . (1481004786059 + $w);
    31     $w_file = file_get_contents($wboy);
    32     $partten = '/\"englishname\"\:\"\w{0,}\"/';
    33     $w_name = preg_match_all($partten, $w_file, $matches, PREG_SET_ORDER);
    34 
    35     $tmp = 0;
    36     foreach ($matches AS $one) {
    37         if ($tmp % 3 == 2) {
    38             $given_name = substr($one[0], 15, strlen($one[0]) - 15 - 1);
    39             $sql = 'INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $wboy . '",1)';
    40             if (mysqli_query($link, $sql)) {
    41                 $w_db_incr_boy++;
    42             };
    43         }
    44         $tmp++;
    45     }
    46 }
    47 
    48 
    49 $w_arr = array_merge(range('A', 'Z'));
    50 //http://ename.dict.cn/list/female/R/2
    51 foreach ($w_arr AS $w_range) {
    52     for ($w = 1; $w < 8; $w++) {
    53         $wgirl = 'http://ename.dict.cn/list/female/' . $w_range . '/' . $w;
    54         $w_file = file_get_contents($wgirl);
    55         $partten = '/' . 'href=\"\/\w{0,}\"\>' . '/';
    56         $w_name = preg_match_all($partten, $w_file, $matches, PREG_SET_ORDER);
    57         foreach ($matches AS $one) {
    58             $given_name = substr($one[0], 7, strlen($one[0]) - 7 - 2);
    59             $sql = 'INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $wgirl . '",0)';
    60             if (mysqli_query($link, $sql)) {
    61                 $w_db_incr_girl++;
    62             };
    63         }
    64     }
    65 
    66 
    67     for ($w = 1; $w < 8; $w++) {
    68         $wboy = 'http://ename.dict.cn/list/male/' . $w_range . '/' . $w;
    69         $w_file = file_get_contents($wboy);
    70         $partten = '/' . 'href=\"\/\w{0,}\"\>' . '/';
    71         $w_name = preg_match_all($partten, $w_file, $matches, PREG_SET_ORDER);
    72         foreach ($matches AS $one) {
    73             $given_name = substr($one[0], 7, strlen($one[0]) - 7 - 2);
    74             $sql = 'INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $wboy . '",1)';
    75             if (mysqli_query($link, $sql)) {
    76                 $w_db_incr_boy++;
    77             };
    78         }
    79     }
    80 }
    81 
    82 ?>
    83 <script>
    84     document.getElementById('w_last').innerHTML = 'w_db_incr_girl\'s=<?= $w_db_incr_girl?>,w_db_incr_boy\'s=' +<?= $w_db_incr_boy?>;
    85 </script>
     1             $sql_db_check = 'SEELCT isboy FROM namelist WHERE given_name="'.$given_name.'"';
     2             $check = db_multiple_rows_link($link, $sql_db_check);
     3             if(count($check)==2){
     4                 $isboy = 2;
     5             }elseif(count($check)==1){
     6                 $isboy = $check['isboy'];
     7             }elseif(count($check)==0){
     8                 $w_arr = w_cross_domian_name_isboy($given_name);
     9                 //var_dump($w_arr);
    10                 $isboy = $w_arr['w_code'];
    11                 $grab_url = $w_arr['w_url'];
    12                 if($isboy!=4){
    13                     if($isboy==1){
    14                         $sql_w ='INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $grab_url . '",1)';
    15                     }elseif($isboy==0){
    16                         $sql_w ='INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $grab_url . '",0)';
    17                     }elseif($isboy==2){
    18                         $sql_w ='INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $grab_url . '",1)'.';';
    19                         $sql_w .='INSERT INTO namelist (given_name,grab_url,isboy) VALUES ("' . $given_name . '","' . $grab_url . '",0)';
    20                     }
    21                   //  var_dump($sql_w);
    22                     mysqli_multi_query($link,$sql_w);
     1 function w_given_name($wstr)
     2 {
     3     $given_name = strstr($wstr, ' ', TRUE);
     4     if (empty($given_name)) $given_name = ltrim($wstr);
     5     $given_name = strtoupper(substr($given_name, 0, 1)) . strtolower(substr($given_name, 1));
     6     RETURN $given_name;
     7 }
     8 
     9 //http://dict.youdao.com/w/eng/Tommy/#keyfrom=dict2.index
    10 //http://dict.youdao.com/w/eng/Chris/#keyfrom=dict2.index
    11 //http://dict.youdao.com/w/eng/Billie/#keyfrom=dict2.index
    12 //http://dict.youdao.com/w/eng/Mikhael/#keyfrom=dict2.index
    13 function w_cross_domian_name_isboy($name)
    14 {
    15     $url = 'http://dict.youdao.com/w/eng/' . $name . '/#keyfrom=dict2.index';
    16     $w_file = file_get_contents($url);
    17     // $wfile = fopen('w.w', 'w');
    18     //fwrite($wfile, $w_file);
    19 
    20     $partten = '/' . '您要找的是不是' . '/';
    21     preg_match_all($partten, $w_file, $matches_spell, PREG_SET_ORDER);
    22     if (!empty($matches_spell)) {
    23     } else {
    24         $partten = '/' . '男子名' . '/';
    25         preg_match_all($partten, $w_file, $matches_boy, PREG_SET_ORDER);
    26         $partten = '/' . '女子名' . '/';
    27         preg_match_all($partten, $w_file, $matches_girl, PREG_SET_ORDER);
    28     }
    29 
    30     $w = array();
    31     $w['w_url'] = $url;
    32     $w['w_code'] = 4;
    33     if (!empty($matches_spell) || (empty($matches_boy) && empty($matches_girl))) {
    34     } elseif (!empty($matches_boy) && !empty($matches_girl)) {
    35         $w['w_code'] = 2;
    36     } elseif (!empty($matches_boy)) {
    37         $w['w_code'] = 1;
    38     } elseif (!empty($matches_girl)) {
    39         $w['w_code'] = 0;
    40     }
    41     RETURN $w;
    42 }
  • 相关阅读:
    HDU1862 EXCEL排序
    结构体+字符串比较大小(HDU1234开门人和关门人 )
    两个大数字相加
    Ksusha and Array (vector)
    Ksenia and Pan Scales(思维题)
    最大公约数问题(Wolf and Rabbit )
    HDU2044一只小蜜蜂(递推)
    HDU2043密码
    基础贪心算法(HDU2037今年暑假不AC)
    c# winform 关于DataGridView的一些操作(很全,绝对够用)
  • 原文地址:https://www.cnblogs.com/rsapaper/p/6145410.html
Copyright © 2011-2022 走看看