zoukankan      html  css  js  c++  java
  • nodejs的爬虫

    //爬取静态页面
    const request = require ('request');
    const cheerio = require('cheerio');
    const mysql = require('mysql');
    var conn = mysql.createConnection({
        host:'localhost',
        user:'root',
        password:'root',
        port:'3306',
        database:'xiaomi'
    });
    // conn.connect();
    request('https://www.epet.com/cleargoodsmdog.html',function(err,res,body){
        var $ = cheerio.load(body);
        var list = $('.qcGoodsBox.bgwhite .fl.rela');
        console.log(list);
        request(imgsrc).pipe(fs.createWriteStream(__dirname+"/downloadimg/"+path.parse(imgsrc).base));
        list.each(function(index){
            var pic = $(this).find('.cloud-zoom img').attr('src0');
            var title = $(this).find('.qcGoodsTit a').text();
            var price = $(this).find('.qcPriceBox .ft20').text();
            var yprice = $(this).find('.qcPriceBox .ft12').text();
            conn.query('insert into goods(goodsname,price,pic,goodsclass) values(?,?,?,?)',[title,price,pic,yprice],function(err,results,fields){
                console.log(results);
            })
        });
        conn.end();
    })
    //爬取动态数据用的是phantom
    const phantom = require ('phantom');
    const cheerio = require ('cheerio');
     
    (async function() {
      const instance = await phantom.create();
      const page = await instance.createPage();
      await page.on('onResourceRequested', function(requestData) {
        console.info('Requesting', requestData.url);
      });
     
      const status = await page.open('http://you.163.com/item/list?categoryId=1065000&subCategoryId=1065001');
      const content = await page.property('content');
    //   console.log(content);
      var $ = cheerio.load(content);
      var list = $('.m-itemList.m-itemList-level2Category .item');
      list.each(function(index){
            var title = $(this).find('.name a span:nth-of-type(3)').text();
            console.log(title);
        });
      
    
      await instance.exit();
    })();
  • 相关阅读:
    flask全栈开发3 模板
    flask全栈开发2 URL与视图
    flask全栈开发1 课程简介
    微信公众号开发中遇到的问题总结
    python web学习路线
    内存数据库Memcached和redis基本使用
    2019年8月12号成长题目
    2019年8月10号成长题目
    2019年8月7号成长题目
    SpringCloud简介与5大常用组件
  • 原文地址:https://www.cnblogs.com/bao2333/p/10142910.html
Copyright © 2011-2022 走看看