zoukankan      html  css  js  c++  java
  • 2.1 python使用MongoDB 示例代码

    import pymongo
    
    
    client = pymongo.MongoClient('localhost', 27017)  # MongoDB 客户端
    walden = client['walden']  # 数据库中创建的名称
    sheet_tab = walden['sheet_tab']  # 创建Table
    
    # 演示代码1
    # path = '/Users/qiongyanzhu/Documents/Plan-for-combating-master/week2/2_1/2_1code_of_video/walden.txt'
    # with open(path, 'r') as f:
    #     lines = f.readlines()
    #     for index, line in enumerate(lines):
    #         data = {
    #             'index': index,
    #             'line': line,
    #             'words': len(line.split())
    #         }
    #         print(data)
    #         sheet_tab.insert_one(data)
    
    # 演示代码2
    # for item in sheet_tab.find({'words': 0}):
    #     print(item)
    
    # 演示代码3
    # $lt/$lte/$gt/$gte/$ne
    for item in sheet_tab.find({'words': {'$lt': 5}}):
        print(item)
    
    # 演示代码4
    for item in sheet_tab.find():
        print(item['line'])
    

      

    from bs4 import BeautifulSoup
    import requests
    import pymongo
    
    
    client = pymongo.MongoClient('localhost', 27017)
    xiaozhu = client['xiaozhu']
    sheet_tab = xiaozhu['sheet_tab']
    
    url_as = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(number)) for number in range(1, 4)]
    
    
    def insert_house_info(url_s):
        for url_a in url_s:
            # 获取页面数据
            wb_data = requests.get(url_a)
            # 采用lxml解析引擎,解析数据
            soup = BeautifulSoup(wb_data.text, 'lxml')
            prices = soup.select('span.result_price')
            titles = soup.select('#page_list > ul > li > div.result_btm_con.lodgeunitname > div > a > span')
            urls = soup.select('#page_list > ul > li > div.result_btm_con.lodgeunitname')
    
            for price, title, url in zip(prices, titles, urls):
                info = {
                    'price': int(price.get_text()[1:len(price.get_text())-2]),
                    'title': title.get_text(),
                    'url': url.get('detailurl')
                }
                # print(info)
                sheet_tab.insert_one(info)
    
    
    def find_house():
        for info in sheet_tab.find({'price': {'$gt': 500}}):
            print(info)
    
    
    insert_house_info(url_as)
    find_house()
    

      

  • 相关阅读:
    关于使用Java Mail进行邮件发送,抛出Could not connect to SMTP host: xx@xxx.com, port: 25的异常可能
    百度地图和solr展示资源和附近等功能的实现 四
    Python爬虫入门-3
    Python爬虫入门-2
    Python爬虫入门-1
    Python装饰器专题-限制函数调用次数(10s调用一次)
    32个Python爬虫项目让你一次吃到撑
    时间复杂度趣图分析
    各类数据库默认端口总结
    ansible使用三(ansible roles)
  • 原文地址:https://www.cnblogs.com/mspeer/p/5634232.html
Copyright © 2011-2022 走看看