zoukankan      html  css  js  c++  java
  • 物流轨迹抓取

    /**
     * Created by aixiaofeng on 17/2/6.
     */
    public class FedroadSpider extends ExpressSpider {
    
        private static final SimpleDateFormat FMT_COL_DATE = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
        private static final SimpleDateFormat FMT_DATE     = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    
        private static QueryTrackApi          queryTrackApi;
    
        @Override
        public Result<String> doQuery(String express, String expressNo, String attr) {
            String deliveryNo = null;
            if (StringUtils.isNotBlank(attr) || !StringUtils.lowerCase(expressNo).startsWith("ec")) {
                deliveryNo = queryDeliveryNoByApi(express, expressNo, attr);
            }
            return queryByPage(express, StringUtils.isNotBlank(deliveryNo) ? deliveryNo : expressNo, attr);
        }
    
        private String queryDeliveryNoByApi(String express, String expressNo, String attr) {
            if (queryTrackApi == null) {
                try {
                    queryTrackApi = ServiceFactory.getBean(QueryTrackApi.class);
                } catch (BeansException e) {
                    queryTrackApi = new QueryTrackApi();
                }
                if (queryTrackApi == null) {
                    queryTrackApi = new QueryTrackApi();
                }
            }
            QueryTrackReq reqTrack = new QueryTrackReq();
            reqTrack.getParameters().setPackageNo(expressNo);
            Result<QueryTrackRes> res = queryTrackApi.doRequest(reqTrack);
            if (res.isSuccess() && res.getData() != null && res.getData().getTrackList() != null
                    && CommonUtil.isNotEmpty(res.getData().getTrackList().getTrackList())) {
                return res.getData().getTrackList().getTrackList().get(0).getDeliveryNo();
            }
            return null;
        }
    
        private Result<String> queryByPage(String express, String expressNo, String attr) {
            Result<String> result = new Result<>();
            String res = "";
            String BOUNDARY = UUID.randomUUID().toString();
            String urlStr = "https://www.fedroad.com";//访问页面
            try {
                StringBuilder strBuilder = new StringBuilder();
                //请求链接,拿到document
                HttpURLConnection conn = null;
                Connection connection = HttpUtils.getConnection(urlStr);
                Connection.Response response = connection.method(Connection.Method.GET).execute();
                Document document = response.parse();
                //定位到form表单
                Elements formDocuments = document.select("#aspnetForm");
    
                //获取conn连接
                URL url = new URL(urlStr);
                conn = (HttpURLConnection) url.openConnection();
                conn.setConnectTimeout(5000);
                conn.setReadTimeout(30000);
                conn.setDoOutput(true);
                conn.setDoInput(true);
                conn.setUseCaches(false);
                conn.setRequestMethod("POST");
                conn.setRequestProperty("Connection", "Keep-Alive");
                conn.setRequestProperty("User-Agent",
                        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36");
                conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + BOUNDARY);
    
                OutputStream out = new DataOutputStream(conn.getOutputStream());
                //拼POST装请求参数
                for (int i = 0; i < formDocuments.select("input").size(); i++) {
                    if (formDocuments.select("input").get(i).attr("class").contains("user_loginout")) {
                        continue;
                    }
                    strBuilder.append("--" + BOUNDARY + "
    ");
                    strBuilder.append("Content-Disposition: form-data; name="" + formDocuments.select("input").get(i).attr("name") + """ + "
    
    ");
    
                    if (formDocuments.select("input").get(i).attr("name").contains("search_shippingorder")) {
                        strBuilder.append(expressNo + "
    ");
                    } else {
                        strBuilder.append(formDocuments.select("input").get(i).val() + "
    ");
                    }
                }
                strBuilder.append("--" + BOUNDARY + "--");
                out.write(strBuilder.toString().getBytes());
                byte[] endData = ("
    --" + BOUNDARY + "--
    ").getBytes();
                out.write(endData);
                out.flush();
                out.close();
    
                // 读取返回数据
                strBuilder = new StringBuilder();
                BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
                String line;
                while ((line = reader.readLine()) != null) {
                    strBuilder.append(line).append("
    ");
                }
                res = strBuilder.toString();
                //关闭
                reader.close();
                //获取返回的document(就是你需要的)
                document = Jsoup.parse(res);
                Elements trackinfo = document.select(".trackinfo tr");
                JSONObject json = new JSONObject();
                JSONArray arr = new JSONArray();
                 通过Jsoup 获取相应的字段 进行组装
                for (Element trElement : trackinfo) {
                    if (trElement.select("td").attr("class").contains("title")) {
                        continue;
                    }
                    Elements tdElement = trElement.getElementsByTag("td");
                    JSONObject item = new JSONObject();
                    if (tdElement.get(0).text().trim().isEmpty()) {
                        continue;
                    } else {
                        item.put("time", FMT_DATE.format(FMT_COL_DATE.parse(tdElement.get(0).text().trim())));
                    }
                    item.put("context", tdElement.get(1).text().trim());
                    arr.add(item);
                }
                json.put("data", arr);
                 //成功返回
                return result.setSuccess(true).setCode(ErrorConstants.SUCCESS).setData(json.toString());
            } catch (Exception e) {
                result.setCode(ErrorConstants.HTTP_ERR).setMessage(StackTraceUtil.getStackTrace(e));
                LOGGER.error(" - doQuery error,express = " + express + "," + expressNo, e);
                waitRandom();
            }
            //拿到抓取到的参数
            return result;
        }
    
        // 测试
        public static void main(String[] args) {
            FedroadSpider spider = new FedroadSpider();
            Result<String> ret = spider.doQuery(null, "EC000021436MY", null);
            System.out.print(ret);
        }
    }
    

      

  • 相关阅读:
    io流
    JDBC-java数据库连接
    list接口、set接口、map接口、异常
    集合、迭代器、增强for
    math类和biginteger类
    基本包装类和System类
    正则表达式
    API-Object-equals方法和toString方法 Strinig字符串和StingBuffer类
    匿名对象 内部类 包 访问修饰符 代码块
    final 和 static 关键词
  • 原文地址:https://www.cnblogs.com/dreammyone/p/7071659.html
Copyright © 2011-2022 走看看