--http://blog.csdn.net/leixiaohua1020/article/list/14?viewmode=contents function saveData(data) local file = io.open("temp.html", "w") file:write(data) file:close() end function getPageCount(url) local http = require("socket.http") local resp = http.request(url) local s = string.find(resp, "<div id="papelist" class="pagelist">") local e = string.find(resp, "</div>", s) local divData = string.sub(resp, s, e + 5) --print(divData) local i, j = string.find(divData, "共%d+页") local pageCount = string.sub(divData, i + 3, j - 3) return pageCount end function getTitles(username) if(username == nil or username == "") then print("username is nil") return end local preUrl = "http://blog.csdn.net/" local endUrl = "?viewmode=contents" local url = preUrl .. username .. endUrl local pageCount = getPageCount(url) for i = 1, pageCount do local blogUrl = preUrl .. username .. "/article/list/" .. i .. endUrl local http = require("socket.http") local resp = http.request(blogUrl) local pos = 1 _, pos = string.find(resp, "link_title", pos) while(pos ~= nil) do local i, j = string.find(resp, "details/%d+">", pos) --print("pageid = " .. string.sub(resp, i + 8, j - 2)) local k, _ = string.find(resp, "</a></span>", j) print(string.sub(resp, j + 11, k - 23)) _, pos = string.find(resp, "link_title", pos) end end end getTitles("leixiaohua1020")
如果想保存一篇指定的博文(只要正文),该怎么做呢?
其实只要我们预先保存下博文的js、css和一些控制正文的html标记,然后把正文内容填充进去就可以了。
以下lua脚本可以获得
<div id="article_content" class="article_content"> 正文 </div>
start.html
<html> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <script src="http://static.blog.csdn.net/scripts/jquery.js" type="text/javascript"></script> <link rel="Stylesheet" type="text/css" href="http://static.blog.csdn.net/skin/skin-blue/css/style.css?v=1.1" /> <link rel="shortcut icon" href="http://c.csdnimg.cn/public/favicon.ico" /> <link type="text/css" rel="stylesheet" href="http://static.blog.csdn.net/scripts/SyntaxHighlighter/styles/default.css" /> </head> <body> <div id="container"> <script type="text/javascript"> var username = "x_iya"; var _blogger = username; var blog_address = "http://blog.csdn.net/x_iya"; var static_host = "http://static.blog.csdn.net"; var currentUserName = ""; </script> <div id="body"> <div id="main"> <div class="main"> <div id="article_details" class="details">end.html
</div> </div> </div> <script type="text/javascript" src="http://static.blog.csdn.net/scripts/newblog.min.js"></script> </div> </body> </html>
function GetHtml(url) local http = require("socket.http") local resp = http.request(url) local s = string.find(resp, "<div id="article_content" class="article_content">") local e = string.find(resp, "<!--", s) local data = string.sub(resp, s, e - 1) return data end function SaveData(data) local file = io.open("csdn.html", "w") file:write(data) file:close() end function ReadData(filepath) local file = io.open(filepath, "r") local data = file:read("*a") file:close() return data end --local url = "http://blog.csdn.net/x_iya/article/details/52327827" if #arg == 1 then local url = arg[1] local startData = ReadData("start.html") local endData = ReadData("end.html") local html = startData .. GetHtml(url) .. endData SaveData(html) else print("Usage: lua csdn.lua url") end