zoukankan      html  css  js  c++  java
  • 初练pandas实现数据处理

    import urllib.request;
    from pandas import DataFrame;
    from pandas import Series;
    from bs4 import BeautifulSoup;
    
    response = urllib.request.urlopen('file:///F:/python/untitled1/core/do_data/2month.html');
    html = response.read();
    soup = BeautifulSoup(html,"html.parser")
    trs = soup.find_all('tr')
    ths = trs[0].find_all('th');
    
    index_d = []
    for th in ths:
        index_d.append(th.getText())
    data = DataFrame(columns=index_d)
    print(index_d)
    
    for tr in trs :
        tds = tr.find_all('td')
        td_datas = []
        for td in tds:
            td_datas.append(td.getText())
        if len(td_datas) != 0:
            data=data.append(
                Series(
                    td_datas,
                    index=index_d
                ), ignore_index=True
            )
    
    print(len(data))
    
    str2s = []
    
    for i in range(len(data["股票全码"])):
        str2 =str(data["股票全码"][i])
        str2 = str2.replace("SZ","0|")
        str2 = str2.replace("SH","1|")
        str2 = str2 + "|" + data["涨停时间"][i] +" "+ data["历史涨停原因"][i] +" "+ data["涨停选原因"][i]
        str2s.append(str2)
    
    data["new"] = str2s
    data=data.drop_duplicates(subset=['股票代码'],keep='last',inplace=False)
    print(len(data))
    df2 = data["new"].values
    #print(type(df2))
    
    file = open('data.txt', 'w')
    file.writelines("
    ".join(df2));
    file.close()
  • 相关阅读:
    n的阶乘
    二叉树遍历
    二分查找练习
    字符串中最长回文序列求解
    复数集合
    AppCrawler自动化遍历使用详解(版本2.1.0 )(转)
    谷歌驱动下载链接
    谷歌浏览器插件
    Pycharm破解方法
    go学习链接
  • 原文地址:https://www.cnblogs.com/rongye/p/12466584.html
Copyright © 2011-2022 走看看