zoukankan      html  css  js  c++  java
  • 天猫、淘宝商品详情、库存、价格抓包

    如有侵权,请联系作者删除

    水平有限,还望大牛指点

    <dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.8.3</version>
    </dependency>

    import com.sun.tools.doclets.formats.html.SourceToHTMLConverter;
    import net.sf.json.JSONArray;
    import net.sf.json.JSONObject;
    import org.apache.commons.lang.StringUtils;
    import org.jsoup.Connection;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    import java.io.IOException;
    import java.io.UnsupportedEncodingException;
    import java.net.URLEncoder;
    import java.text.DateFormat;
    import java.text.SimpleDateFormat;
    import java.util.*;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    /**
     * Created with Chenquan.
     * Description: 淘宝抓包
     * Date: 2018-12-13
     * Time: 15:12
     */
    public class TaobaoCatch {
    
        public static void main(String[] args) {
            int i = 0;
    
    /*        String url = "https://acs.m.taobao.com/h5/mtop.taobao.wsearch.h5search/1.0/?jsv=2.3.16&appKey=12574478&t=1545023581359&sign=e3476c9041a75de0a9190da470204d93&api=mtop.taobao.wsearch.h5search&v=1.0&H5Request=true&ecode=1&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22q%22%3A%22%E4%BB%99%E6%B6%B5%E5%86%85%E8%A1%A3%22%2C%22search%22%3A%22%E6%8F%90%E4%BA%A4%22%2C%22tab%22%3A%22all%22%2C%22sst%22%3A%221%22%2C%22n%22%3A20%2C%22buying%22%3A%22buyitnow%22%2C%22m%22%3A%22api4h5%22%2C%22token4h5%22%3A%22%22%2C%22abtest%22%3A%221%22%2C%22wlsort%22%3A%221%22%2C%22page%22%3A1%7D";
    
            Connection con = Jsoup.connect(url);
            con.header("Cookie", "cna=TA+aFFGXQFUCAXQaRYGZVU8Q; t=efa81a9785cd86f885e13998b6d5f9cb; thw=cn; uc3=vt3=F8dByRzMU9X8Hvccr00%3D&id2=W8zLpWipxVFu&nk2=0PLo6GHZOM8%3D&lg2=V32FPkk%2Fw0dUvg%3D%3D; tracknick=%5Cu9648%5Cu94E81992; lgc=%5Cu9648%5Cu94E81992; _cc_=Vq8l%2BKCLiw%3D%3D; tg=0; enc=4rB%2FfKFx8DJKgPpoHlZjr824CEYw%2BlPaKBDWbFO4fnh6svGA97NoZNGERui4fOo2tXSnSVN1ygkfn5R5ekztTQ%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; mt=ci=0_1; _m_h5_tk=e501ac7690832934d663aef19ee36be5_1545033419107; _m_h5_tk_enc=5147579a652b4fb508dc886d59c37045; isg=BFVVgDOkpYNz64H7Z31pC9thZFHP-goqhI4h7tf6EUwbLnUgn6IZNGPv_DSYLiEc");
    //        con.header("referer", "https://item.taobao.com/item.htm ");
            Connection.Response resp = null;
            try {
                resp = con.method(Connection.Method.GET).ignoreContentType(true).execute();
            } catch (IOException e) {
                e.printStackTrace();
            }
            String body = resp.body();
    //        System.out.println(body);
            body = body.substring(12, body.length() - 1);
            JSONObject jb = JSONObject.fromObject(body);
            JSONArray jsonArray = jb.getJSONObject("data").getJSONArray("listItem");
    
    
    //        while(i<100){
                i++;
                for (int j = 0; j < jsonArray.size(); j++) {
                    JSONObject jsonObject = jsonArray.getJSONObject(j);
                    String item_id = jsonObject.getString("item_id");
                    System.out.println("item_id: "+item_id);
                    getAll(item_id);
                }
    //        }*/
    
            //传链接上的产品id
            getAll("577996531297");
    
    
        }
    
        public static void getAll(String item_id ) {
            try {
                Thread.sleep(2000);//一个休息5s,太快会被禁
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            System.out.println("开始时间:" + new Date());
            Date dateStart = new Date();
            Document doc = null;
            String id = "";
            try {
    //            int i = 0;
    //            while(i < 10000){
    //            i++;
                String url = "https://item.taobao.com/item.htm?id="+item_id;
                id = getParamByUrl(url, "id");
                doc = Jsoup.connect(url).ignoreContentType(true).get();
    
               /* String url = "https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?data=";//手机的html 5 页面 ,为了获取库存、价格
                String enc = "{"itemNumId":"582061497975"}";
                String gbk = URLEncoder.encode(enc, "utf-8");
                String sds = url + gbk;
                System.out.println("库存、价格"+sds);
                doc = Jsoup.connect(sds).ignoreContentType(true).get();*/
    
                //设置请求头
    //                    Connection con = Jsoup.connect(url);
    //                    con.header("Cookie", " enc=1LWJWtPGgf6MF1NVsn2rbeRb3%2FU1%2Fk5ZiiedHbVedmxmfvUUWDPmFeyKeLYl7NVchBB19JCIVnX0eFv4otK9HA%3D%3D;" +
    //                            "x5sec=7b2264657461696c736b69703b32223a226235653133353933646637396131353230343663346139633633653038326465434c6a4e7a654146454e447739724732716644534b426f4c4f4455774d7a51304e7a4d794f7a453d227d;" );
    //                    con.header("referer", "https://item.taobao.com/item.htm ");
    //                    Connection.Response resp=con.method(Connection.Method.GET).execute();
    //                    Map<String,String> cookies = resp.cookies();
    //                    Connection.Request request = con.request();
    //                    String body = resp.body();
    
    
            } catch (IOException e) {
                e.printStackTrace();
            }
            if (doc.baseUri().contains("tmall")) {
                System.out.println("商品名称:"+ doc.select("h1[data-spm="1000983"]").text());
            }else {
                System.out.println("商品名称:" + doc.select("h3[class="tb-main-title"]").text());
            }
            Elements imgSrcElement = doc.select("#J_UlThumb > li");
            for (Element element : imgSrcElement) {
                String imgSrc = "";
                if (element.baseUri().contains("tmall")){
                    imgSrc = element.getElementsByTag("img").attr("src");
                }else{
                    imgSrc = element.getElementsByTag("img").attr("data-src");
                }
                imgSrc = imgSrc.replaceFirst("//img.alicdn.com/imgextra/", "");
                imgSrc = imgSrc.substring(0, imgSrc.length() - 10);
    //            imgSrc = imgSrc.replaceAll("_60x60q90.jpg",""); //处理掉不必要的数据
                System.out.println("主图url:" + imgSrc);
            }
            // 规格参数
            Elements selectRules = doc.select(".J_TSaleProp");
    
            List<List<String>> liHashMap = new ArrayList<>();
            for (Element ulElement : selectRules) {
                String ul = ulElement.getElementsByTag("ul").attr("data-property");
                System.out.println("ul:" + ul);
    
                List<String> liString = new ArrayList<>();
    
                for (Element liElement : ulElement.getElementsByTag("li")) {
    
                    String liDataValue = liElement.getElementsByTag("li").attr("data-value");
                    System.out.println("liDataValue: " + liDataValue);
                    liString.add(liDataValue);
    
                    String aStyle = liElement.getElementsByTag("a").attr("style");
                    if (StringUtils.isNotBlank(aStyle)) {
                        aStyle = aStyle.replaceAll("background:url\(", "");
                        aStyle = aStyle.substring(0, aStyle.length() - 29);
    //                aStyle = aStyle.replaceAll("_40x40q90.jpg\) center no-repeat;", "");
                        System.out.println("aStyle: " + aStyle);
                    }
    
                    String spanText = liElement.getElementsByTag("span").text();
                    if (StringUtils.isNotBlank(spanText)) {
                        System.out.println("spanText: " + spanText);
                    }
                }
                liHashMap.add(liString);
            }
    
            List<String> combination = test.combination(liHashMap);
    
            //获取价格、库存
            Elements eles = doc.getElementsByTag("script");
            for (Element ele : eles) {
    
                    String s = ele.toString();
                if (!ele.baseUri().contains("tmall")) {//淘宝
                    String rgex = "";
                    String subUtilSimple = "";
                    if (s.contains("skuMap")) {
    
                        //获取sku的id
                        rgex = "skuMap(.*?)propertyMemoMap";
                        String skuId = s.replaceAll("\s*", "");
        //                System.out.println(s);
                        subUtilSimple = getSubUtilSimple(skuId, rgex);
                        subUtilSimple = subUtilSimple.substring(1, subUtilSimple.length() - 1);
        //
                        JSONObject jb = JSONObject.fromObject(subUtilSimple);
    
                        JSONObject finalJb = jb;
                        List<String> skuList = new ArrayList<>();
                        combination.forEach(p->{
    
                            JSONObject jsonObject = finalJb.getJSONObject(";" + p + ";");
                            if (!jsonObject.isNullObject()) {
    
                                String o = jsonObject.getString("skuId");
                                System.out.println("sku的id: " + o);
                                skuList.add(o);
    
                            }
                        });
    
    
                        String url = "https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?data=";//手机的html 5 页面 ,为了获取库存、价格
                        String enc = "{"itemNumId":"" + id + ""}";
                        String substore = "";
                        String store = "";
                        try {
                            String gbk = URLEncoder.encode(enc, "utf-8");
                            String sds = url + gbk;
                            System.out.println("库存、价格" + sds);
                            doc = Jsoup.connect(sds).ignoreContentType(true).get();
                            store = doc.toString();
                            rgex = "sku2info(.*?)skuItem";
                            substore = getSubUtilSimple(store, rgex);
                            substore = substore.substring(3, substore.length() - 3);
                            String sub = substore.replaceAll("\\", "").replaceAll("\s*", "");
                            JSONObject sb = JSONObject.fromObject(sub);
                            skuList.stream().forEach(p->{
                                if (sb.has(p)) {//判断是否有值,没值不取,不然会报错
                                    String string = sb.getString(p);
                                    System.out.println("淘宝的价格库存==============" + string);
                                }
                            });
    
    
                        } catch (Exception e) {
                            System.out.println("报错的地方store:" + store);
    //                        System.out.println("报错的地方substore:" + substore);
                            e.printStackTrace();
                            System.out.println("=====================================程序报错,提前结束===================================================" );
                            return;
                        }
    
    
                    }
                    if (s.contains("descUrl") && s.contains("counterApi")) {
        //                System.out.println(s);
                        //详情链接
                        rgex = "protocol(.*?)desc\.alicdn\.com";
                        subUtilSimple = getSubUtilSimple(s, rgex);
                        subUtilSimple = subUtilSimple.substring(14, subUtilSimple.length() - 7);
                        System.out.println("详情链接: " + subUtilSimple);
                        try {
                            doc = Jsoup.connect("http:" + subUtilSimple).get();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
    
                        Elements imgDetail = doc.getElementsByTag("img");
                        for (Element element : imgDetail) {
                            String imgSrc = element.getElementsByTag("img").attr("src");
        //                    imgSrc = imgSrc.replaceFirst("//img.alicdn.com/imgextra/","");
        //                    imgSrc = imgSrc.replaceAll("_60x60q90.jpg",""); //处理掉不必要的数据
                            if (StringUtils.isNotBlank(imgSrc)) {
                                System.out.println("详情图url:" + imgSrc);
                            }
                        }
                    }
                }else {//天猫的
                    if (s.contains("TShop.Setup")) {
    
    //                String rgex = "<bdocid>(.*?)</bdocid>";
                        String rgex = "skuMap(.*?)salesProp";
    
                        String subUtilSimple = getSubUtilSimple(s, rgex);
                        subUtilSimple = subUtilSimple.substring(2, subUtilSimple.length() - 2);
    
                        JSONObject jb = JSONObject.fromObject(subUtilSimple);
                        List<String> skuList = new ArrayList<>();
    
                        combination.forEach(p->{
                            JSONObject jsonObject = jb.getJSONObject(";" + p + ";");
                            if (!jsonObject.isNullObject()) {
                                String skuId = jsonObject.getString("skuId");
                                System.out.println(skuId);
                                skuList.add(skuId);
                            }
    
                        });
    
    
    
                        //库存、价格
                        String url = "https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?data=";//手机的html 5 页面 ,为了获取库存、价格
                        String enc = "{"itemNumId":"" + id + ""}";
                        String substore = "";
                        String store = "";
                        try {
                            String gbk = URLEncoder.encode(enc, "utf-8");
                            String sds = url + gbk;
                            System.out.println("库存、价格" + sds);
                            doc = Jsoup.connect(sds).ignoreContentType(true).get();
                            store = doc.toString();
                            rgex = "sku2info(.*?)skuItem";
                            substore = getSubUtilSimple(store, rgex);
                            substore = substore.substring(3, substore.length() - 3);
                            String sub = substore.replaceAll("\\", "").replaceAll("\s*", "");
                            JSONObject sb = JSONObject.fromObject(sub);
                            skuList.stream().forEach(p->{
                                if (sb.has(p)) {//判断是否有值,没值不取,不然会报错
                                    String string = sb.getString(p);
                                    System.out.println("天猫的价格库存==============" + string);
                                }
                            });
    
    
                        } catch (Exception e) {
                            System.out.println("报错的地方store:" + store);
    //                        System.out.println("报错的地方substore:" + substore);
                            e.printStackTrace();
                            System.out.println("=====================================程序报错,提前结束===================================================" );
                            return;
                        }
    
    
    
    
    
    
    
    
                        //详情链接
                        rgex = "httpsDescUrl(.*?)fetchDcUrl";
                        subUtilSimple = getSubUtilSimple(s, rgex);
                        subUtilSimple = subUtilSimple.substring(3, subUtilSimple.length() - 3);
                        System.out.println(subUtilSimple);
    
                        try {
                            doc = Jsoup.connect("http:"+subUtilSimple).get();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
    
                        Elements imgDetail = doc.getElementsByTag("img");
                        for (Element element :imgDetail) {
                            String imgSrc = element.getElementsByTag("img").attr("src");
    //                    imgSrc = imgSrc.replaceFirst("//img.alicdn.com/imgextra/","");
    //                    imgSrc = imgSrc.replaceAll("_60x60q90.jpg",""); //处理掉不必要的数据
                            System.out.println("详情图url:"+imgSrc);
                        }
    
                        break;
                    }
                }
    
    
    
            }
    
            System.out.println("结束时间:" + new Date());
            Date dateEnd = new Date();
            long number = dateEnd.getTime()-dateStart.getTime();
            //然后在将毫秒转换为date类型就可以了
            System.out.println("时间差为: "+number/1000);
        }
    
    
        /**
         * 返回单个字符串,若匹配到多个的话就返回第一个,方法与getSubUtil一样
         *
         * @param soap
         * @param rgex
         * @return
         */
        public static String getSubUtilSimple(String soap, String rgex) {
            Pattern pattern = Pattern.compile(rgex);// 匹配的模式
            Matcher m = pattern.matcher(soap);
            while (m.find()) {
                return m.group(1);
            }
            return "";
        }
    
    
        /**
         * 获取指定url中的某个参数
         *
         * @param url
         * @param name
         * @return
         */
        public static String getParamByUrl(String url, String name) {
            url += "&";
            String pattern = "(\?|&){1}#{0,1}" + name + "=[a-zA-Z0-9]*(&{1})";
    
            Pattern r = Pattern.compile(pattern);
    
            Matcher m = r.matcher(url);
            if (m.find()) {
    //            System.out.println(m.group(0));
                return m.group(0).split("=")[1].replace("&", "");
            } else {
                return null;
            }
        }
    
    
    }
    import com.google.gson.JsonObject;
    import net.sf.json.JSONObject;
    
    import java.util.ArrayList;
    import java.util.List;
    
    /**
     * Created with Chenquan.
     * Description:
     * Date: 2018-12-16
     * Time: 10:27
     */
    public class test {
        public static void main(String[] args) {
            List<String> li = new ArrayList<>();
            li.add("aa");
            li.add("bb");
            li.add("cc");
    
            List<String> bi = new ArrayList<>();
            bi.add("ee");
            bi.add("rr");
            bi.add("tt");
    
            List<String> ci = new ArrayList<>();
            ci.add("yy");
            ci.add("uu");
            ci.add("ii");
    
            List<List<String>> list = new ArrayList<>();
            list.add(li);
            list.add(bi);
            list.add(ci);
    
            List<String> vv = new ArrayList<>();
            List<String> combination = combination(list);
            System.out.println(combination);
        }
    
    
        /**
         * 若干个集合元素的组合
         *
         * @param groups 多个集合
         * @return 组合结果
         */
        public static List<String> combination(List<List<String>> groups) {
            if (invalid(groups) || invalid(groups.get(0))) {
                return null;
            }
            List<String> combine = groups.get(0);
            for (int i = 1; i < groups.size(); i++) {
                combine = cartesianProduct(combine, groups.get(i));
                if (combine == null) {
                    return null;
                }
            }
            return combine;
        }
    
        /**
         * 两个集合元素的组合
         *
         * @param c1 集合1
         * @param c2 集合2
         * @return 组合结果
         */
        public static List<String> cartesianProduct(List<String> c1, List<String> c2) {
            if (invalid(c1) || invalid(c2)) {
                return null;
            }
            List<String> combine = new ArrayList<>();
            for (String s : c1) {
                for (String t : c2) {
                    combine.add(String.format("%s;%s", s, t));
                    //combine.add(String.format("%s%s", t, s));
                }
            }
            return combine;
        }
    
        /**
         * 验证集合是否无效
         *
         * @param c 集合
         * @return true 无效
         */
        private static boolean invalid(List<?> c) {
            return c == null || c.isEmpty();
        }
    
    
    
    
    }
    

      

  • 相关阅读:
    原生js 实现 map
    前端安全学习
    前端性能优化
    前端的跨域请求方法使用场景及各自的局限性
    7-7 12-24小时制(15 分)
    7-4 BCD解密(10 分)
    7-2 然后是几点(15 分)
    厘米换算英尺英寸
    鸡兔同笼
    数组:经典计数
  • 原文地址:https://www.cnblogs.com/itchenfirst/p/10131526.html
Copyright © 2011-2022 走看看