zoukankan      html  css  js  c++  java
  • 物流轨迹抓取

    /**
     * Created by aixiaofeng on 17/2/6.
     */
    public class FedroadSpider extends ExpressSpider {
    
        private static final SimpleDateFormat FMT_COL_DATE = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
        private static final SimpleDateFormat FMT_DATE     = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    
        private static QueryTrackApi          queryTrackApi;
    
        @Override
        public Result<String> doQuery(String express, String expressNo, String attr) {
            String deliveryNo = null;
            if (StringUtils.isNotBlank(attr) || !StringUtils.lowerCase(expressNo).startsWith("ec")) {
                deliveryNo = queryDeliveryNoByApi(express, expressNo, attr);
            }
            return queryByPage(express, StringUtils.isNotBlank(deliveryNo) ? deliveryNo : expressNo, attr);
        }
    
        private String queryDeliveryNoByApi(String express, String expressNo, String attr) {
            if (queryTrackApi == null) {
                try {
                    queryTrackApi = ServiceFactory.getBean(QueryTrackApi.class);
                } catch (BeansException e) {
                    queryTrackApi = new QueryTrackApi();
                }
                if (queryTrackApi == null) {
                    queryTrackApi = new QueryTrackApi();
                }
            }
            QueryTrackReq reqTrack = new QueryTrackReq();
            reqTrack.getParameters().setPackageNo(expressNo);
            Result<QueryTrackRes> res = queryTrackApi.doRequest(reqTrack);
            if (res.isSuccess() && res.getData() != null && res.getData().getTrackList() != null
                    && CommonUtil.isNotEmpty(res.getData().getTrackList().getTrackList())) {
                return res.getData().getTrackList().getTrackList().get(0).getDeliveryNo();
            }
            return null;
        }
    
        private Result<String> queryByPage(String express, String expressNo, String attr) {
            Result<String> result = new Result<>();
            String res = "";
            String BOUNDARY = UUID.randomUUID().toString();
            String urlStr = "https://www.fedroad.com";//访问页面
            try {
                StringBuilder strBuilder = new StringBuilder();
                //请求链接,拿到document
                HttpURLConnection conn = null;
                Connection connection = HttpUtils.getConnection(urlStr);
                Connection.Response response = connection.method(Connection.Method.GET).execute();
                Document document = response.parse();
                //定位到form表单
                Elements formDocuments = document.select("#aspnetForm");
    
                //获取conn连接
                URL url = new URL(urlStr);
                conn = (HttpURLConnection) url.openConnection();
                conn.setConnectTimeout(5000);
                conn.setReadTimeout(30000);
                conn.setDoOutput(true);
                conn.setDoInput(true);
                conn.setUseCaches(false);
                conn.setRequestMethod("POST");
                conn.setRequestProperty("Connection", "Keep-Alive");
                conn.setRequestProperty("User-Agent",
                        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36");
                conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + BOUNDARY);
    
                OutputStream out = new DataOutputStream(conn.getOutputStream());
                //拼POST装请求参数
                for (int i = 0; i < formDocuments.select("input").size(); i++) {
                    if (formDocuments.select("input").get(i).attr("class").contains("user_loginout")) {
                        continue;
                    }
                    strBuilder.append("--" + BOUNDARY + "
    ");
                    strBuilder.append("Content-Disposition: form-data; name="" + formDocuments.select("input").get(i).attr("name") + """ + "
    
    ");
    
                    if (formDocuments.select("input").get(i).attr("name").contains("search_shippingorder")) {
                        strBuilder.append(expressNo + "
    ");
                    } else {
                        strBuilder.append(formDocuments.select("input").get(i).val() + "
    ");
                    }
                }
                strBuilder.append("--" + BOUNDARY + "--");
                out.write(strBuilder.toString().getBytes());
                byte[] endData = ("
    --" + BOUNDARY + "--
    ").getBytes();
                out.write(endData);
                out.flush();
                out.close();
    
                // 读取返回数据
                strBuilder = new StringBuilder();
                BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
                String line;
                while ((line = reader.readLine()) != null) {
                    strBuilder.append(line).append("
    ");
                }
                res = strBuilder.toString();
                //关闭
                reader.close();
                //获取返回的document(就是你需要的)
                document = Jsoup.parse(res);
                Elements trackinfo = document.select(".trackinfo tr");
                JSONObject json = new JSONObject();
                JSONArray arr = new JSONArray();
                 通过Jsoup 获取相应的字段 进行组装
                for (Element trElement : trackinfo) {
                    if (trElement.select("td").attr("class").contains("title")) {
                        continue;
                    }
                    Elements tdElement = trElement.getElementsByTag("td");
                    JSONObject item = new JSONObject();
                    if (tdElement.get(0).text().trim().isEmpty()) {
                        continue;
                    } else {
                        item.put("time", FMT_DATE.format(FMT_COL_DATE.parse(tdElement.get(0).text().trim())));
                    }
                    item.put("context", tdElement.get(1).text().trim());
                    arr.add(item);
                }
                json.put("data", arr);
                 //成功返回
                return result.setSuccess(true).setCode(ErrorConstants.SUCCESS).setData(json.toString());
            } catch (Exception e) {
                result.setCode(ErrorConstants.HTTP_ERR).setMessage(StackTraceUtil.getStackTrace(e));
                LOGGER.error(" - doQuery error,express = " + express + "," + expressNo, e);
                waitRandom();
            }
            //拿到抓取到的参数
            return result;
        }
    
        // 测试
        public static void main(String[] args) {
            FedroadSpider spider = new FedroadSpider();
            Result<String> ret = spider.doQuery(null, "EC000021436MY", null);
            System.out.print(ret);
        }
    }
    

      

  • 相关阅读:
    WCF 第八章 安全 确定替代身份(中)使用AzMan认证
    WCF 第八章 安全 总结
    WCF 第八章 安全 因特网上的安全服务(下) 其他认证模式
    WCF Membership Provider
    WCF 第八章 安全 确定替代身份(下)模仿用户
    WCF 第八章 安全 因特网上的安全服务(上)
    WCF 第九章 诊断
    HTTPS的七个误解(转载)
    WCF 第八章 安全 日志和审计
    基于比较的排序算法集
  • 原文地址:https://www.cnblogs.com/dreammyone/p/7071659.html
Copyright © 2011-2022 走看看