zoukankan      html  css  js  c++  java
  • 一个爬喜马拉雅音频的例子

    不废话了,上代码

    import java.io.ByteArrayOutputStream;
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.net.URL;
    
    import org.apache.http.HttpResponse;
    import org.apache.http.client.ClientProtocolException;
    import org.apache.http.client.HttpClient;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.impl.client.DefaultHttpClient;
    import org.apache.http.util.EntityUtils;
    
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.List;
    import java.util.Map;
    
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    import java.net.URL;
    import java.net.URLConnection;
    
    import com.yeepay.g3.utils.common.json.JSONUtils;
    
    public class MyHttpClient {
        public static void main(String[] args) throws ClientProtocolException, IOException {
            HttpClient hClient = new DefaultHttpClient();
                //http://www.ximalaya.com/32160470/album/2881558/
            //http://www.ximalaya.com/tracks/61185755.json
            String mainU = "http://www.ximalaya.com/32160470/album/2881558?page=";
            for(int i=1;i<=1;i++){
                int m = mainU.indexOf("page=");
                mainU = mainU.substring(0,m+5);
                mainU = mainU+i;
                System.out.println(mainU);
                
                
                HttpGet hget = new HttpGet(mainU);
                HttpResponse response = hClient.execute(hget);
                // EntityUtils工具类把网页实体转换成字符串
                String content = EntityUtils.toString(response.getEntity(), "utf-8");
                Document doc = Jsoup.parse(content);
                Elements elements=doc.select("a[class='title']");
                
                for(Element ele:elements){
                    String dateStr = ele.nextElementSibling().text();
                    System.out.println(dateStr);  
                    String dirName = "/Users/yp-tc-m-2777/Desktop/testNewP/"+dateStr.substring(0,7);
                    System.out.println(dirName);
                    File filed = new File(dirName);
                    if(!filed.exists()){
                        filed.mkdir();
                    }
                    String id = ele.attr("href").split("/")[3];
                    System.out.println(id);
                        //      /32160470/sound/68215809/
                    
                    String url = "http://www.ximalaya.com/tracks/{id}.json";
                    url = url.replace("{id}", id);
                    System.out.println(url);
                    System.out.println(ele.text());
                    System.out.println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()));
                    String fileName = ele.text();
                    File file = new File(dirName+File.separator+dateStr+" "+fileName+".m4a");  
                    if(file.exists()){
                        continue;
                    }
                    //hClient = new DefaultHttpClient();
                    HttpGet r = new HttpGet(url);
                    HttpResponse r1 = hClient.execute(r);
                    String r2 = EntityUtils.toString(r1.getEntity(), "utf-8");
                    Map<String,String> map = JSONUtils.jsonToMap(r2, String.class, String.class);
                    String downUrl = map.get("play_path_64");
                    URL url1 = new URL(downUrl);;
                    URLConnection urlconn = url1.openConnection();
                    InputStream inputStream = urlconn.getInputStream();    
                    //获取自己数组  
                    byte[] getData = readInputStream(inputStream);      
              
                        
                    FileOutputStream fos = new FileOutputStream(file);       
                    fos.write(getData);   
                    if(fos!=null){  
                        fos.close();    
                    }  
                    if(inputStream!=null){  
                        inputStream.close();  
                    } 
                    
                }
            }
            
           // System.out.println(content);
        }
        
        public static  byte[] readInputStream(InputStream inputStream) throws IOException {    
            byte[] buffer = new byte[1024];    
            int len = 0;    
            ByteArrayOutputStream bos = new ByteArrayOutputStream();    
            while((len = inputStream.read(buffer)) != -1) {    
                bos.write(buffer, 0, len);    
            }    
            bos.close();    
            return bos.toByteArray();    
        } 
    }
  • 相关阅读:
    kibana We couldn't activate monitoring
    学Redis这篇就够了!
    elasticsearch 官方监控文档 老版但很有用
    java dump 内存分析 elasticsearch Bulk异常引发的Elasticsearch内存泄漏
    Apache Beam实战指南 | 大数据管道(pipeline)设计及实践
    InnoDB一棵B+树可以存放多少行数据?
    函数编程真不好
    面向对象编程灾难
    可能是全网最好的MySQL重要知识点 | 面试必备
    终于有人把elasticsearch原理讲通了
  • 原文地址:https://www.cnblogs.com/coolgame/p/8795898.html
Copyright © 2011-2022 走看看