zoukankan      html  css  js  c++  java
  • 遍历CSDN博客

    --http://blog.csdn.net/leixiaohua1020/article/list/14?viewmode=contents
    
    function saveData(data)
    	local file = io.open("temp.html", "w")
    	file:write(data)
    	file:close()
    end
    
    function getPageCount(url)
    	local http = require("socket.http")
    	local resp = http.request(url)
    	local s = string.find(resp, "<div id="papelist" class="pagelist">")
    	local e = string.find(resp, "</div>", s)
    	local divData = string.sub(resp, s, e + 5)
    	--print(divData)
    	local i, j = string.find(divData, "共%d+页")
    	local pageCount = string.sub(divData, i + 3, j - 3)
    	return pageCount
    end
    
    function getTitles(username)
    	if(username == nil or username == "") then
    		print("username is nil")
    		return
    	end
    	local preUrl = "http://blog.csdn.net/"
    	local endUrl = "?viewmode=contents"
    	local url = preUrl .. username .. endUrl
    	local pageCount = getPageCount(url)
    	for i = 1, pageCount do
    		local blogUrl = preUrl .. username .. "/article/list/" .. i .. endUrl
    		local http = require("socket.http")
    		local resp = http.request(blogUrl)
    		local pos = 1
    		_, pos = string.find(resp, "link_title", pos)
    		while(pos ~= nil) do
    			local i, j = string.find(resp, "details/%d+">", pos)
    			--print("pageid = " ..  string.sub(resp, i + 8, j - 2))
    			local k, _ = string.find(resp, "</a></span>", j)
    			print(string.sub(resp, j + 11, k - 23))
    			_, pos = string.find(resp, "link_title", pos)
    		end
    	end
    end
    
    getTitles("leixiaohua1020")
    
    
    	



    如果想保存一篇指定的博文(只要正文),该怎么做呢?

    其实只要我们预先保存下博文的js、css和一些控制正文的html标记,然后把正文内容填充进去就可以了。

    以下lua脚本可以获得

    <div id="article_content" class="article_content">
    正文
    </div>


    start.html

    <html>
    <head>
        <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
        <script src="http://static.blog.csdn.net/scripts/jquery.js" type="text/javascript"></script>
        <link rel="Stylesheet" type="text/css" href="http://static.blog.csdn.net/skin/skin-blue/css/style.css?v=1.1" />
        <link rel="shortcut icon" href="http://c.csdnimg.cn/public/favicon.ico" />
        <link type="text/css" rel="stylesheet" href="http://static.blog.csdn.net/scripts/SyntaxHighlighter/styles/default.css" />
    </head>
    <body>
    <div id="container">
    <script type="text/javascript">
        var username = "x_iya";
        var _blogger = username;
        var blog_address = "http://blog.csdn.net/x_iya";
        var static_host = "http://static.blog.csdn.net";
        var currentUserName = "";  
    </script>
    <div id="body">
    <div id="main">
    <div class="main">
    <div id="article_details" class="details">
    
    
    end.html

    </div>                 
    </div>
    </div>
    <script type="text/javascript" src="http://static.blog.csdn.net/scripts/newblog.min.js"></script>
    </div>
    </body>
    </html> 


    lua代码:

    function GetHtml(url)
    	local http = require("socket.http")
    	local resp = http.request(url)
    	local s = string.find(resp, "<div id="article_content" class="article_content">")
    	local e = string.find(resp, "<!--", s)
    	local data = string.sub(resp, s, e - 1)
    	return data
    end
    
    function SaveData(data)
    	local file = io.open("csdn.html", "w")
    	file:write(data)
    	file:close()
    end
    
    function ReadData(filepath)
    	local file = io.open(filepath, "r")
    	local data = file:read("*a")
    	file:close()
    	return data
    end
    
    --local url = "http://blog.csdn.net/x_iya/article/details/52327827"
    
    if #arg == 1 then
    	local url = arg[1]
    	local startData = ReadData("start.html")
    	local endData = ReadData("end.html")
    	local html = startData .. GetHtml(url) .. endData
    	SaveData(html)
    else
    	print("Usage: lua csdn.lua url")
    end
    




    Keep it simple!
    作者:N3verL4nd
    知识共享,欢迎转载。
  • 相关阅读:
    python列表--查找集合中重复元素的个数
    python3-打印一个进度条
    python3-sys模块
    python3-字符串操作
    python3-深浅复制
    python3-os模块
    接口和抽象类有什么区别
    集合知识
    面向对象的特征有哪些方面
    javadoc时候乱码-编码 GBK 的不可映射字符
  • 原文地址:https://www.cnblogs.com/lgh1992314/p/5834638.html
Copyright © 2011-2022 走看看