zoukankan      html  css  js  c++  java
  • 从搜狐下载每日交易数据的爬虫程序

    网易不行有搜狐,搜狐提供的每日股票交易数据可比网易的强多了,近四千支股票4月的交易数据八万余条一气呵成.看来以后要靠它当主力.

    程序:

    package com.ufo.hy.agumaster.crawler.daytransact;
    
    import java.util.ArrayList;
    import java.util.Iterator;
    import java.util.List;
    
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    
    import com.fasterxml.jackson.databind.JsonNode;
    import com.fasterxml.jackson.databind.ObjectMapper;
    import com.ufo.hy.agumaster.entity.DayTransact;
    
    public class SohuDTCrawler {
        private List<DayTransact> dtList;
        
        public List<DayTransact> getDtList(){
            return dtList;
        }
        
        public void download(String originalCode,String name,String fromDate,String toDate) {
            dtList=new ArrayList<>();
            try {
                Document doc=Jsoup.connect(getReqUrl(originalCode,fromDate,toDate)).ignoreContentType(true)
                        .data("query", "Java")
                        .userAgent("Mozilla")
                        .cookie("auth", "token")
                        .timeout(30000)
                        .get();
                String rawText=doc.text();
                String json=rawText.substring(22, rawText.length()-2);
                
                ObjectMapper mapper = new ObjectMapper();
                JsonNode node = mapper.readTree(json);
                
                JsonNode listNode=node.path("hq");
                Iterator<JsonNode> iterator = listNode.elements();
    
                while (iterator.hasNext()) {
                    JsonNode transNode = iterator.next();
                    
                    DayTransact dt=new DayTransact();
                    dt.setCode(originalCode);
                    dt.setName(name);
                    dt.setDay(transNode.get(0).asText());
                    dt.setTopen(Double.parseDouble(transNode.get(1).asText()));
                    dt.setTclose(Double.parseDouble(transNode.get(2).asText()));
                    dt.setChg(Double.parseDouble(transNode.get(3).asText()));
                    dt.setPchg(Double.parseDouble(transNode.get(4).asText().replace("%", "")));
                    dt.setLow(Double.parseDouble(transNode.get(5).asText()));
                    dt.setHigh(Double.parseDouble(transNode.get(6).asText()));
                    dt.setVoturnover(Long.parseLong(transNode.get(7).asText()));
                    dt.setVaturnover(Double.parseDouble(transNode.get(8).asText()));
                    dt.setTurnover(Double.parseDouble(transNode.get(9).asText().replace("%", "")));
                    
                    dtList.add(dt);
                }
            }catch(Exception ex) {
                ex.printStackTrace();
            }
        }
        
        private String getReqUrl(String code,String startDate,String endDate) {
            return "http://q.stock.sohu.com/hisHq?code=cn_"+code+"&start="+startDate+"&end="+endDate+"&stat=1&order=D&period=d&callback=historySearchHandler&rt=jsonp";
        }
        
        public static void main(String[] args) {
            SohuDTCrawler n=new SohuDTCrawler();
            n.download("002101","广东鸿图", "20200401", "20200410");
            
            for(DayTransact dt:n.getDtList()) {
                System.out.println(dt);
            }
        }
    }

    用到的实体类:

    package com.ufo.hy.agumaster.entity;
    
    import com.fasterxml.jackson.databind.JsonNode;
    
    /**
     * 每日交易数据实体类
     * @author ufo
     *
     */
    public class DayTransact {
        private long    id;            // ID
        private String  day;        // 日期
        private String  code;        // 代号
        private String  name;        // 名称
        private double  tclose;        // 收盘价
        private double  high;        // 最高价
        private double  low;        // 最低价
        private double  topen;        // 开盘价
        private double  lclose;        // 前日收盘价
        private double  chg;        // 涨跌额
        private double  pchg;        // 涨跌幅
        private double  turnover;    // 换手率
        private long    voturnover;    // 成交量
        private double  vaturnover;    // 成交金额
        private double  tcap;        // 总市值
        private double  mcap;        // 流通市值
        
        public DayTransact() {
            
        }
        
        public DayTransact(JsonNode transNode) {
            
        }
        
        public DayTransact(String[] arr) {
            if(arr.length!=15) {
                throw new ArrayIndexOutOfBoundsException("Array size should be 15 but now it is "+arr.length);
            }
            
            String dataLine=String.join(",", arr);
            
            day=arr[0];
    
            try {
                tclose=Double.parseDouble(arr[3]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get tclose from string:"+arr[3]+" dataLine:"+dataLine);
            }
            
            try {
                high=Double.parseDouble(arr[4]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get high from string:"+arr[4]+" dataLine:"+dataLine);
            }
            
            try {
                low=Double.parseDouble(arr[5]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get low from string:"+arr[5]+" dataLine:"+dataLine);
            }
            
            try {
                topen=Double.parseDouble(arr[6]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get topen from string:"+arr[6]+" dataLine:"+dataLine);
            }
            
            try {
                lclose=Double.parseDouble(arr[7]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get lclose from string:"+arr[7]+" dataLine:"+dataLine);
            }
        
            try {
                chg=Double.parseDouble(arr[8]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get chg from string:"+arr[8]+" dataLine:"+dataLine);
            }
            
            try {
                pchg=Double.parseDouble(arr[9]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get pchg from string:"+arr[9]+" dataLine:"+dataLine);
            }
            
            try {
                turnover=Double.parseDouble(arr[10]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get turnover from string:"+arr[10]+" dataLine:"+dataLine);
            }
            
            try {
                voturnover=Long.parseLong(arr[11]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get voturnover from string:"+arr[11]+" dataLine:"+dataLine);
            }
            
            try {
                vaturnover=Double.parseDouble(arr[12]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get vaturnover from string:"+arr[12]+" dataLine:"+dataLine);
            }
            
            try {
                tcap=Double.parseDouble(arr[13]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get tcap from string:"+arr[13]+" dataLine:"+dataLine);
            }
            
            try {
                mcap=Double.parseDouble(arr[14]);
            }catch(NumberFormatException ex) {
                throw new NumberFormatException("Can not get mcap from string:"+arr[14]+" dataLine:"+dataLine);
            }
        
        }
        
        public String toString() {
            StringBuilder sb=new StringBuilder();
            sb.append("id:"+id);
            sb.append(" 日期day:"+day);
            sb.append(" 代号code:"+code);
            sb.append(" 名称name:"+name);
            sb.append(" 收盘价tclose:"+tclose);
            sb.append(" 最高价high:"+high);
            sb.append(" 最低价low:"+low);
            sb.append(" 开盘价topen:"+topen);
            sb.append(" 前日收盘价lclose:"+lclose);
            sb.append(" 涨跌额chg:"+chg);
            sb.append(" 涨跌幅pchg:"+pchg);
            sb.append(" 换手率turnover:"+turnover);
            sb.append(" 成交量voturnover:"+voturnover);
            sb.append(" 成交金额vaturnover:"+vaturnover);
            sb.append(" 总市值tcap:"+tcap);
            sb.append(" 流通市值mcap:"+mcap);
            
            return sb.toString();//"code:"+code+" name:"+name+" date:"+day+" tclose:"+tclose;
        }
        
        public long getId() {
            return id;
        }
        public void setId(long id) {
            this.id = id;
        }
        public String getDay() {
            return day;
        }
        public void setDay(String day) {
            this.day = day;
        }
        public String getCode() {
            return code;
        }
        public void setCode(String code) {
            this.code = code;
        }
        public String getName() {
            return name;
        }
        public void setName(String name) {
            this.name = name;
        }
        public double getTclose() {
            return tclose;
        }
        public void setTclose(double tclose) {
            this.tclose = tclose;
        }
        public double getHigh() {
            return high;
        }
        public void setHigh(double high) {
            this.high = high;
        }
        public double getLow() {
            return low;
        }
        public void setLow(double low) {
            this.low = low;
        }
        public double getTopen() {
            return topen;
        }
        public void setTopen(double topen) {
            this.topen = topen;
        }
        public double getLclose() {
            return lclose;
        }
        public void setLclose(double lclose) {
            this.lclose = lclose;
        }
        public double getChg() {
            return chg;
        }
        public void setChg(double chg) {
            this.chg = chg;
        }
        public double getPchg() {
            return pchg;
        }
        public void setPchg(double pchg) {
            this.pchg = pchg;
        }
        public double getTurnover() {
            return turnover;
        }
        public void setTurnover(double turnover) {
            this.turnover = turnover;
        }
        public long getVoturnover() {
            return voturnover;
        }
        public void setVoturnover(long voturnover) {
            this.voturnover = voturnover;
        }
        public double getVaturnover() {
            return vaturnover;
        }
        public void setVaturnover(double vaturnover) {
            this.vaturnover = vaturnover;
        }
        public double getTcap() {
            return tcap;
        }
        public void setTcap(double tcap) {
            this.tcap = tcap;
        }
        public double getMcap() {
            return mcap;
        }
        public void setMcap(double mcap) {
            this.mcap = mcap;
        }
    }

    执行情况:

    id:0 日期day:2020-04-10 代号code:002101 名称name:广东鸿图 收盘价tclose:7.95 最高价high:8.23 最低价low:7.81 开盘价topen:8.11 前日收盘价lclose:0.0 涨跌额chg:-0.15 涨跌幅pchg:-1.85 换手率turnover:2.2 成交量voturnover:93679 成交金额vaturnover:7500.99 总市值tcap:0.0 流通市值mcap:0.0
    id:0 日期day:2020-04-09 代号code:002101 名称name:广东鸿图 收盘价tclose:8.1 最高价high:8.18 最低价low:7.94 开盘价topen:8.13 前日收盘价lclose:0.0 涨跌额chg:0.0 涨跌幅pchg:0.0 换手率turnover:2.75 成交量voturnover:116902 成交金额vaturnover:9441.65 总市值tcap:0.0 流通市值mcap:0.0
    id:0 日期day:2020-04-08 代号code:002101 名称name:广东鸿图 收盘价tclose:8.1 最高价high:8.27 最低价low:8.03 开盘价topen:8.06 前日收盘价lclose:0.0 涨跌额chg:-0.13 涨跌幅pchg:-1.58 换手率turnover:2.75 成交量voturnover:116971 成交金额vaturnover:9499.67 总市值tcap:0.0 流通市值mcap:0.0
    id:0 日期day:2020-04-07 代号code:002101 名称name:广东鸿图 收盘价tclose:8.23 最高价high:8.28 最低价low:7.9 开盘价topen:8.04 前日收盘价lclose:0.0 涨跌额chg:0.33 涨跌幅pchg:4.18 换手率turnover:3.76 成交量voturnover:159804 成交金额vaturnover:12937.74 总市值tcap:0.0 流通市值mcap:0.0
    id:0 日期day:2020-04-03 代号code:002101 名称name:广东鸿图 收盘价tclose:7.9 最高价high:8.11 最低价low:7.82 开盘价topen:8.11 前日收盘价lclose:0.0 涨跌额chg:-0.3 涨跌幅pchg:-3.66 换手率turnover:3.24 成交量voturnover:138091 成交金额vaturnover:10978.95 总市值tcap:0.0 流通市值mcap:0.0
    id:0 日期day:2020-04-02 代号code:002101 名称name:广东鸿图 收盘价tclose:8.2 最高价high:8.2 最低价low:7.58 开盘价topen:7.7 前日收盘价lclose:0.0 涨跌额chg:0.45 涨跌幅pchg:5.81 换手率turnover:4.54 成交量voturnover:193364 成交金额vaturnover:15326.84 总市值tcap:0.0 流通市值mcap:0.0
    id:0 日期day:2020-04-01 代号code:002101 名称name:广东鸿图 收盘价tclose:7.75 最高价high:8.07 最低价low:7.62 开盘价topen:7.62 前日收盘价lclose:0.0 涨跌额chg:0.31 涨跌幅pchg:4.17 换手率turnover:3.7 成交量voturnover:157608 成交金额vaturnover:12279.22 总市值tcap:0.0 流通市值mcap:0.0

    希望此程序对你也有用.

    --2020年5月7日--

  • 相关阅读:
    Begin Example with Override Encoded SOAP XML Serialization
    State Machine Terminology
    How to: Specify an Alternate Element Name for an XML Stream
    How to: Publish Metadata for a WCF Service.(What is the Metadata Exchange Endpoint purpose.)
    Beginning Guide With Controlling XML Serialization Using Attributes(XmlSerializaiton of Array)
    Workflow 4.0 Hosting Extensions
    What can we do in the CacheMetaData Method of Activity
    How and Why to use the System.servicemodel.MessageParameterAttribute in WCF
    How to: Begin Sample with Serialization and Deserialization an Object
    A Test WCF Service without anything of config.
  • 原文地址:https://www.cnblogs.com/heyang78/p/12844871.html
Copyright © 2011-2022 走看看