zoukankan      html  css  js  c++  java
  • 从搜狐下载每日交易数据的爬虫程序

    网易不行有搜狐,搜狐提供的每日股票交易数据可比网易的强多了,近四千支股票4月的交易数据八万余条一气呵成.看来以后要靠它当主力.

    程序:

    package com.ufo.hy.agumaster.crawler.daytransact;
    
    import java.util.ArrayList;
    import java.util.Iterator;
    import java.util.List;
    
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    
    import com.fasterxml.jackson.databind.JsonNode;
    import com.fasterxml.jackson.databind.ObjectMapper;
    import com.ufo.hy.agumaster.entity.DayTransact;
    
    public class SohuDTCrawler {
        private List<DayTransact> dtList;
        
        public List<DayTransact> getDtList(){
            return dtList;
        }
        
        public void download(String originalCode,String name,String fromDate,String toDate) {
            dtList=new ArrayList<>();
            try {
                Document doc=Jsoup.connect(getReqUrl(originalCode,fromDate,toDate)).ignoreContentType(true)
                        .data("query", "Java")
                        .userAgent("Mozilla")
                        .cookie("auth", "token")
                        .timeout(30000)
                        .get();
                String rawText=doc.text();
                String json=rawText.substring(22, rawText.length()-2);
                
                ObjectMapper mapper = new ObjectMapper();
                JsonNode node = mapper.readTree(json);
                
                JsonNode listNode=node.path("hq");
                Iterator<JsonNode> iterator = listNode.elements();
    
                while (iterator.hasNext()) {
                    JsonNode transNode = iterator.next();
                    
                    DayTransact dt=new DayTransact();
                    dt.setCode(originalCode);
                    dt.setName(name);
                    dt.setDay(transNode.get(0).asText());
                    dt.setTopen(Double.parseDouble(transNode.get(1).asText()));
                    dt.setTclose(Double.parseDouble(transNode.get(2).asText()));
                    dt.setChg(Double.parseDouble(transNode.get(3).asText()));
                    dt.setPchg(Double.parseDouble(transNode.get(4).asText().replace("%", "")));
                    dt.setLow(Double.parseDouble(transNode.get(5).asText()));
                    dt.setHigh(Double.parseDouble(transNode.get(6).asText()));
                    dt.setVoturnover(Long.parseLong(transNode.get(7).asText()));
                    dt.setVaturnover(Double.parseDouble(transNode.get(8).asText()));
                    dt.setTurnover(Double.parseDouble(transNode.get(9).asText().replace("%", "")));
                    
                    dtList.add(dt);
                }
            }catch(Exception ex) {
                ex.printStackTrace();
            }
        }
        
        private String getReqUrl(String code,String startDate,String endDate) {
            return "http://q.stock.sohu.com/hisHq?code=cn_"+code+"&start="+startDate+"&end="+endDate+"&stat=1&order=D&period=d&callback=historySearchHandler&rt=jsonp";
        }
        
        public static void main(String[] args) {
            SohuDTCrawler n=new SohuDTCrawler();
            n.download("002101","广东鸿图", "20200401", "20200410");
            
            for(DayTransact dt:n.getDtList()) {
                System.out.println(dt);
            }
        }
    }

    用到的实体类:

    package com.ufo.hy.agumaster.entity;
    
    import com.fasterxml.jackson.databind.JsonNode;
    
    /**
     * 每日交易数据实体类
     * @author ufo
     *
     */
    public class DayTransact {
        private long    id;            // ID
        private String  day;        // 日期
        private String  code;        // 代号
        private String  name;        // 名称
        private double  tclose;        // 收盘价
        private double  high;        // 最高价
        private double  low;        // 最低价
        private double  topen;        // 开盘价
        private double  lclose;        // 前日收盘价
        private double  chg;        // 涨跌额
        private double  pchg;        // 涨跌幅
        private double  turnover;    // 换手率
        private long    voturnover;    // 成交量
        private double  vaturnover;    // 成交金额
        private double  tcap;        // 总市值
        private double  mcap;        // 流通市值
        
        public DayTransact() {
            
        }
        
        public DayTransact(JsonNode transNode) {
            
        }
        
        public DayTransact(String[] arr) {
            if(arr.length!=15) {
                throw new ArrayIndexOutOfBoundsException("Array size should be 15 but now it is "+arr.length);
            }
            
            String dataLine=String.join(",", arr);
            
            day=arr[0];
    
            try {
                tclose=Double.parseDouble(arr[3]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get tclose from string:"+arr[3]+" dataLine:"+dataLine);
            }
            
            try {
                high=Double.parseDouble(arr[4]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get high from string:"+arr[4]+" dataLine:"+dataLine);
            }
            
            try {
                low=Double.parseDouble(arr[5]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get low from string:"+arr[5]+" dataLine:"+dataLine);
            }
            
            try {
                topen=Double.parseDouble(arr[6]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get topen from string:"+arr[6]+" dataLine:"+dataLine);
            }
            
            try {
                lclose=Double.parseDouble(arr[7]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get lclose from string:"+arr[7]+" dataLine:"+dataLine);
            }
        
            try {
                chg=Double.parseDouble(arr[8]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get chg from string:"+arr[8]+" dataLine:"+dataLine);
            }
            
            try {
                pchg=Double.parseDouble(arr[9]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get pchg from string:"+arr[9]+" dataLine:"+dataLine);
            }
            
            try {
                turnover=Double.parseDouble(arr[10]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get turnover from string:"+arr[10]+" dataLine:"+dataLine);
            }
            
            try {
                voturnover=Long.parseLong(arr[11]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get voturnover from string:"+arr[11]+" dataLine:"+dataLine);
            }
            
            try {
                vaturnover=Double.parseDouble(arr[12]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get vaturnover from string:"+arr[12]+" dataLine:"+dataLine);
            }
            
            try {
                tcap=Double.parseDouble(arr[13]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get tcap from string:"+arr[13]+" dataLine:"+dataLine);
            }
            
            try {
                mcap=Double.parseDouble(arr[14]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get mcap from string:"+arr[14]+" dataLine:"+dataLine);
            }
        
        }
        
        public String toString() {
            StringBuilder sb=new StringBuilder();
            sb.append("id:"+id);
            sb.append(" 日期day:"+day);
            sb.append(" 代号code:"+code);
            sb.append(" 名称name:"+name);
            sb.append(" 收盘价tclose:"+tclose);
            sb.append(" 最高价high:"+high);
            sb.append(" 最低价low:"+low);
            sb.append(" 开盘价topen:"+topen);
            sb.append(" 前日收盘价lclose:"+lclose);
            sb.append(" 涨跌额chg:"+chg);
            sb.append(" 涨跌幅pchg:"+pchg);
            sb.append(" 换手率turnover:"+turnover);
            sb.append(" 成交量voturnover:"+voturnover);
            sb.append(" 成交金额vaturnover:"+vaturnover);
            sb.append(" 总市值tcap:"+tcap);
            sb.append(" 流通市值mcap:"+mcap);
            
            return sb.toString();//"code:"+code+" name:"+name+" date:"+day+" tclose:"+tclose;
        }
        
        public long getId() {
            return id;
        }
        public void setId(long id) {
            this.id = id;
        }
        public String getDay() {
            return day;
        }
        public void setDay(String day) {
            this.day = day;
        }
        public String getCode() {
            return code;
        }
        public void setCode(String code) {
            this.code = code;
        }
        public String getName() {
            return name;
        }
        public void setName(String name) {
            this.name = name;
        }
        public double getTclose() {
            return tclose;
        }
        public void setTclose(double tclose) {
            this.tclose = tclose;
        }
        public double getHigh() {
            return high;
        }
        public void setHigh(double high) {
            this.high = high;
        }
        public double getLow() {
            return low;
        }
        public void setLow(double low) {
            this.low = low;
        }
        public double getTopen() {
            return topen;
        }
        public void setTopen(double topen) {
            this.topen = topen;
        }
        public double getLclose() {
            return lclose;
        }
        public void setLclose(double lclose) {
            this.lclose = lclose;
        }
        public double getChg() {
            return chg;
        }
        public void setChg(double chg) {
            this.chg = chg;
        }
        public double getPchg() {
            return pchg;
        }
        public void setPchg(double pchg) {
            this.pchg = pchg;
        }
        public double getTurnover() {
            return turnover;
        }
        public void setTurnover(double turnover) {
            this.turnover = turnover;
        }
        public long getVoturnover() {
            return voturnover;
        }
        public void setVoturnover(long voturnover) {
            this.voturnover = voturnover;
        }
        public double getVaturnover() {
            return vaturnover;
        }
        public void setVaturnover(double vaturnover) {
            this.vaturnover = vaturnover;
        }
        public double getTcap() {
            return tcap;
        }
        public void setTcap(double tcap) {
            this.tcap = tcap;
        }
        public double getMcap() {
            return mcap;
        }
        public void setMcap(double mcap) {
            this.mcap = mcap;
        }
    }

    执行情况:

    id:0 日期day:2020-04-10 代号code:002101 名称name:广东鸿图 收盘价tclose:7.95 最高价high:8.23 最低价low:7.81 开盘价topen:8.11 前日收盘价lclose:0.0 涨跌额chg:-0.15 涨跌幅pchg:-1.85 换手率turnover:2.2 成交量voturnover:93679 成交金额vaturnover:7500.99 总市值tcap:0.0 流通市值mcap:0.0
    id:0 日期day:2020-04-09 代号code:002101 名称name:广东鸿图 收盘价tclose:8.1 最高价high:8.18 最低价low:7.94 开盘价topen:8.13 前日收盘价lclose:0.0 涨跌额chg:0.0 涨跌幅pchg:0.0 换手率turnover:2.75 成交量voturnover:116902 成交金额vaturnover:9441.65 总市值tcap:0.0 流通市值mcap:0.0
    id:0 日期day:2020-04-08 代号code:002101 名称name:广东鸿图 收盘价tclose:8.1 最高价high:8.27 最低价low:8.03 开盘价topen:8.06 前日收盘价lclose:0.0 涨跌额chg:-0.13 涨跌幅pchg:-1.58 换手率turnover:2.75 成交量voturnover:116971 成交金额vaturnover:9499.67 总市值tcap:0.0 流通市值mcap:0.0
    id:0 日期day:2020-04-07 代号code:002101 名称name:广东鸿图 收盘价tclose:8.23 最高价high:8.28 最低价low:7.9 开盘价topen:8.04 前日收盘价lclose:0.0 涨跌额chg:0.33 涨跌幅pchg:4.18 换手率turnover:3.76 成交量voturnover:159804 成交金额vaturnover:12937.74 总市值tcap:0.0 流通市值mcap:0.0
    id:0 日期day:2020-04-03 代号code:002101 名称name:广东鸿图 收盘价tclose:7.9 最高价high:8.11 最低价low:7.82 开盘价topen:8.11 前日收盘价lclose:0.0 涨跌额chg:-0.3 涨跌幅pchg:-3.66 换手率turnover:3.24 成交量voturnover:138091 成交金额vaturnover:10978.95 总市值tcap:0.0 流通市值mcap:0.0
    id:0 日期day:2020-04-02 代号code:002101 名称name:广东鸿图 收盘价tclose:8.2 最高价high:8.2 最低价low:7.58 开盘价topen:7.7 前日收盘价lclose:0.0 涨跌额chg:0.45 涨跌幅pchg:5.81 换手率turnover:4.54 成交量voturnover:193364 成交金额vaturnover:15326.84 总市值tcap:0.0 流通市值mcap:0.0
    id:0 日期day:2020-04-01 代号code:002101 名称name:广东鸿图 收盘价tclose:7.75 最高价high:8.07 最低价low:7.62 开盘价topen:7.62 前日收盘价lclose:0.0 涨跌额chg:0.31 涨跌幅pchg:4.17 换手率turnover:3.7 成交量voturnover:157608 成交金额vaturnover:12279.22 总市值tcap:0.0 流通市值mcap:0.0

    希望此程序对你也有用.

    --2020年5月7日--

  • 相关阅读:
    redis集群登陆
    锁机制
    关系型数据库事务遵循ACID原则
    前端之Css
    Python之操作redis数据库
    前端之HTML
    Excel之批量改变特定字体颜色(转载)
    jmeter之批量修改请求路径
    Python之time模块
    Python之os模块
  • 原文地址:https://www.cnblogs.com/heyang78/p/12844871.html
Copyright © 2011-2022 走看看