zoukankan      html  css  js  c++  java
  • htmlunit+fastjson抓取酷狗音乐 qq音乐链接及下载

    上次学了jsoup之后,发现一些动态生成的网页内容是无法抓取的,于是又学习了htmlunit,下面是抓取酷狗音乐与qq音乐链接的例子:

    酷狗音乐:

    import java.io.BufferedInputStream;
    import java.io.FileOutputStream;
    import java.io.InputStream;
    import java.net.URL;
    import java.net.URLEncoder;
    import java.util.UUID;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    import org.jsoup.nodes.Element;
    
    import com.alibaba.fastjson.JSONArray;
    import com.alibaba.fastjson.JSONObject;
    import com.gargoylesoftware.htmlunit.BrowserVersion;
    import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
    import com.gargoylesoftware.htmlunit.Page;
    import com.gargoylesoftware.htmlunit.WebClient;
    
    public class worm7 {
    	 private static String name="离骚";
         public static WebClient getWebClient(boolean flag){
        	 WebClient webClient = new WebClient(BrowserVersion.FIREFOX_45); 		
        	 webClient.getOptions().setUseInsecureSSL(true);
        	 webClient.getOptions().setCssEnabled(false);     
             webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
             webClient.getOptions().setThrowExceptionOnScriptError(false);
             webClient.getOptions().setRedirectEnabled(true);
             webClient.getOptions().setAppletEnabled(false);
             webClient.getOptions().setJavaScriptEnabled(flag);    
             webClient.getOptions().setTimeout(60000);
             webClient.getOptions().setPrintContentOnFailingStatusCode(false);
             webClient.setAjaxController(new NicelyResynchronizingAjaxController()); 
             return webClient;
         }
         public static String getMp3Url(WebClient webClient){
        	 FileOutputStream outputStream = null;
             InputStream inputStream = null;
             BufferedInputStream bis = null;
        	try {
    			Page page=webClient.getPage("http://songsearch.kugou.com/song_search_v2?"
    					+ "callback=jQuery112408395432201569397_1532930925600"
    					+ "&keyword="+URLEncoder.encode(name, "utf-8")
    					+ "&page=1"
    					+ "&pagesize=30"
    					+ "&userid=-1"
    					+ "&clientver="
    					+ "&platform=WebFilter"
    					+ "&tag=em"
    					+ "&filter=2"
    					+ "&iscorrection=1"
    					+ "&privilege_filter=0"
    					+ "&_="+System.currentTimeMillis());
    			//System.out.println(page.getWebResponse().getContentAsString());
    			//System.out.println(zzee(page.getWebResponse().getContentAsString(),"(?<=\(\{).*?(?=\}\))"));
    			JSONObject job=JSONObject.parseObject("{"+zzee(page.getWebResponse().getContentAsString(),"(?<=\(\{).*?(?=\}\))")+"}").getJSONObject("data");
    			System.out.println("job:"+job);
    			JSONArray list=job.getJSONArray("lists");
    			System.out.println("list"+list);
    			for(int i=0;i<list.size();i++){
    				String id1=list.getJSONObject(i).getString("FileHash");
    				String id2=list.getJSONObject(i).getString("AlbumID");
    				String detailUrl="http://www.kugou.com/yy/index.php?r=play/getdata"
    						+ "&hash="+id1
    						+ "&album_id="+id2
    						+ "&_="+System.currentTimeMillis();
    				Page page2=webClient.getPage(detailUrl);
    				JSONObject job2=JSONObject.parseObject(page2.getWebResponse().getContentAsString()).getJSONObject("data");				
    				System.out.println("标题:"+job2.getString("audio_name"));
    				//System.out.println("歌词:"+job2.getString("lyrics"));
    				System.out.println("mp3:"+job2.getString("play_url"));
    				
    			   
    	                String outImage = job2.getString("audio_name")+ ".mp3";
    	                URL imgUrl = new URL(job2.getString("play_url"));//获取输入流
    	                inputStream = imgUrl.openConnection().getInputStream();
    	                //将输入流信息放入缓冲流提升读写速度
    	                bis = new BufferedInputStream(inputStream);  
    	                //读取字节娄
    	                byte[] buf = new byte[1024];
    	                //生成文件
    	                outputStream = new FileOutputStream("f://"+ outImage);
    	                int size = 0;
    	                //边读边写
    	                while ((size = bis.read(buf)) != -1) {
    	                     outputStream.write(buf, 0, size);
    	                }
    	                //刷新文件流
    	                outputStream.flush();
    
    	            
    			}
    		} catch (Exception e) {
    			e.printStackTrace();
    		} 
    		return name;
        	 
         }
         private static String zzee(String str, String zz) {
     		String list = null;
     		Pattern p = Pattern.compile(zz);
     		Matcher m = p.matcher(str);
     		while (m.find()) {
     			list = m.group();
     		}
     		
     		return list;
     	}
    	public static void main(String[] args) {
    		WebClient webClient=getWebClient(false);	
    		getMp3Url(webClient);		
    	}
    }
    

      运行结果:

    qq音乐抓取实例:

    import java.io.BufferedInputStream;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.net.URLEncoder;
    import java.util.UUID;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    import org.jsoup.nodes.Element;
    
    import com.alibaba.fastjson.JSON;
    import com.alibaba.fastjson.JSONArray;
    import com.alibaba.fastjson.JSONObject;
    import com.gargoylesoftware.htmlunit.BrowserVersion;
    import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
    import com.gargoylesoftware.htmlunit.Page;
    import com.gargoylesoftware.htmlunit.WebClient;
    
    public class worm6 {
    	 private static String name="离骚";
    	 static String id1=null;
    	 static String id2=null;
    	 static String id3=null;
    	 static String id4=null;
    	 static String name1=null;
    	 static String name2=null;
    	 static String url = null;
    	 static JSONObject  job2=null;
         public static WebClient getWebClient(boolean flag){
        	 WebClient webClient = new WebClient(BrowserVersion.FIREFOX_45); 		
        	 webClient.getOptions().setUseInsecureSSL(true);
        	 webClient.getOptions().setCssEnabled(false);     
             webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
             webClient.getOptions().setThrowExceptionOnScriptError(false);
             webClient.getOptions().setRedirectEnabled(true);
             webClient.getOptions().setAppletEnabled(false);
             webClient.getOptions().setJavaScriptEnabled(flag);    
             webClient.getOptions().setTimeout(60000);
             webClient.getOptions().setPrintContentOnFailingStatusCode(false);
             webClient.setAjaxController(new NicelyResynchronizingAjaxController()); 
             return webClient;
         }
         public static String getMp3Url(WebClient webClient){
        	 
        	try {
    			Page page=webClient.getPage("https://c.y.qq.com/soso/fcgi-bin/client_search_cp?"
    					+ "ct=24"
    					+ "&qqmusic_ver=1298"
    					+ "&new_json=1"
    					+ "&remoteplace=txt.yqq.center"
    					+ "&searchid=36047978388657978"
    					+ "&t=0"
    					+ "&aggr=1"
    					+ "&cr=1"
    					+ "&catZhida=1"
    					+ "&lossless=0"
    					+ "&p=1"
    					+ "&n=20"
    					+ "&w="+URLEncoder.encode(name, "utf-8")
    					+ "&g_tk=5381"
    					+ "&jsonpCallback=MusicJsonCallback6176591962889693"
    					+ "&loginUin=0"
    					+ "&hostUin=0"
    					+ "&format=jsonp"
    					+ "&inCharset=utf8"
    					+ "&outCharset=utf-8"
    					+ "&notice=0"
    					+ "&platform=yqq"
    					+ "&needNewCode=0"
    					);
    			//System.out.println("page:"+page);
    			//System.out.println("------"+page.getWebResponse().getContentAsString());
    			//System.out.println("======"+zzee(page.getWebResponse().getContentAsString(),"(?<=\(\{).*?(?=\}\))"));
    			
    			JSONObject job=JSONObject.parseObject("{"+zzee(page.getWebResponse().getContentAsString(),"(?<=\(\{).*?(?=\}\))")+"}").getJSONObject("data");
    			//System.out.println("job:"+job);
    			String job0=job.getString("song");
    			//System.out.println("job0"+job0);
    			job=JSON.parseObject(job0);
    			JSONArray list=job.getJSONArray("list");
    			//System.out.println("list:"+list);
    			for(int i=0;i<list.size();i++){
    				id1=list.getJSONObject(i).getString("mid");
    				//System.out.println("id1"+id1);
    				id2=list.getJSONObject(i).getString("file");
    				//System.out.println("id"+id2);
    				id2="C400"+JSONObject.parseObject(id2).getString("media_mid")+".m4a";
    				//System.out.println("id"+id2);
    				name1=list.getJSONObject(i).getString("title");
    				name2=list.getJSONObject(i).getString("singer");
    				//System.out.println(name2);
    				JSONArray name=JSON.parseArray(name2);
    				//System.out.println("job4:"+name);
    				name2=name.getJSONObject(0).getString("name");
    				//System.out.println(name.getJSONObject(0).getString("name"));
    				
    
    				/*String detailUrl="https://c.y.qq.com/v8/fcg-bin/fcg_play_single_song.fcg?"
    						+ "songmid="+id1
    						+ "&tpl=yqq_song_detail&format=jsonp&callback=getOneSongInfoCallback&g_tk=5381&jsonpCallback=getOneSongInfoCallback&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq&needNewCode=0"
    						;
    				Page page2=webClient.getPage(detailUrl);
    				//System.out.println(page2);
    				String b="{"+zzee(page2.getWebResponse().getContentAsString(),"(?<=\(\{).*?(?=\}\))")+"}";
    				//System.out.println("b"+b);
    				JSONObject job1=JSONObject.parseObject("{"+zzee(page2.getWebResponse().getContentAsString(),"(?<=\(\{).*?(?=\}\))")+"}").getJSONObject("url");
    				System.out.println("job1:"+job1);
    				String job2=job1.getString(id2);
    				
    				System.out.println("job2"+job2);*/
    				String url1="https://c.y.qq.com/base/fcgi-bin/fcg_music_express_mobile3.fcg?g_tk=5381&jsonpCallback=MusicJsonCallback32651599216689386&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq&needNewCode=0&cid=205361747&callback=MusicJsonCallback32651599216689386&uin=0"
    						+"&songmid="+id1
    						+"&filename="+id2
    						+"&guid=2241489759";
    						;
    						Page page2=webClient.getPage(url1);
    						//System.out.println("page2"+page2);
    						JSONObject job2=JSONObject.parseObject("{"+zzee(page2.getWebResponse().getContentAsString(),"(?<=\(\{).*?(?=\}\))")+"}").getJSONObject("data");				
    						//System.out.println("标题:"+job2.getString("items"));
    						String job3=job2.getString("items");
    						JSONArray job4=JSON.parseArray(job3);
    						//System.out.println("job4:"+job4);
    						//System.out.println(job4.getJSONObject(0).getString("vkey"));
    						url ="http://dl.stream.qqmusic.qq.com/"+id2+"?vkey="+job4.getJSONObject(0).getString("vkey")+"&guid=2241489759&uin=0&fromtag=66";
    						System.out.println("name:"+name1+"--"+name2);
    						System.out.println("url:"+url);
    						
    						download();
    			}
    	            
    			
    		} catch (Exception e) {
    			e.printStackTrace();
    		} 
    		return name;
        	 
         }
         private static String zzee(String str, String zz) {
     		String list = null;
     		Pattern p = Pattern.compile(zz);
     		Matcher m = p.matcher(str);
     		while (m.find()) {
     			list = m.group();
     		}
     		
     		return list;
     	}
         private static void download() throws IOException{
        	 FileOutputStream outputStream = null;
             InputStream inputStream = null;
             BufferedInputStream bis = null;
        	 String outImage = name1+"--"+name2+ ".mp3";
             URL imgUrl = new URL(url);//获取输入流
             inputStream = imgUrl.openConnection().getInputStream();
             //将输入流信息放入缓冲流提升读写速度
             bis = new BufferedInputStream(inputStream);  
             //读取字节娄
             byte[] buf = new byte[1024];
             //生成文件
             outputStream = new FileOutputStream("f://"+ outImage);
             int size = 0;
             //边读边写
             while ((size = bis.read(buf)) != -1) {
                  outputStream.write(buf, 0, size);
             }
             //刷新文件流
             outputStream.flush();
         }
    	public static void main(String[] args) {
    		WebClient webClient=getWebClient(false);	
    		getMp3Url(webClient);		
    	}
    }
    

      

    运行结果:

     

    相比之下,酷狗音乐相对好爬一些,QQ音乐有些繁琐。。。

  • 相关阅读:
    JeeSite4.x 搭建并部署到服务器
    maven编译时出现There are test failures
    ecplise An incompatible version [1.2.14] of the APR based Apache Tomcat Native library is installed, while T
    maven "mvn不是内部或外部命令,也不是可运行的程序或批处理文件"
    rar自动压缩备份
    mysql 0x80004005 unable to connect to any of the specified mysql hosts
    mysql too many connections
    输出控制台信息到日志 并 通过cronolog对tomcat进行日志切分
    Node.js相关——package概念及NPM
    Node.js相关——CommonJS规范
  • 原文地址:https://www.cnblogs.com/xr210/p/9404325.html
Copyright © 2011-2022 走看看