-
Splash Lua脚本http://localhost:8050
-
入口及返回值
function main(splash, args) splash:go("http://www.baidu.com") splash:wait(0.5) local title = splash:evaljs("document.title") return {title=title} end 通过 evaljs()方法传人 JavaSer刷脚本, 而 document.title 的执行结果就是返回网页标题,执行完毕后将其赋值给一个 title 变盘,随后将其 返回 。
# 运行
-
异步处理 ----按照不同步的程序处理问题
function main(splash, args) local example_urls = {"www.baidu.com", "www.taobao.com", "www.zhihu.com"} local urls = args.urls or example_urls local results = {} for index, url in ipairs(urls) do local ok, reason = splash:go("http://" .. url) if ok then splash:wait(2) results[url] = splash:png() end end return results end wait(2) 等待2秒 字符串拼接符使用的是..操作符 go()方法 返回加载页面的结果状态
# 运行结果:(如果页面州现 4xx 或5xx状态码, ok变量就为空,就不会返回加载后的图片。)
-
-
Splash对象属性
-
args属性
获取加载时配置的参数
运行:输出: -
js_enableb属性
js_enabled属性是Splash的JavaScript执行开关
可以将其配置为true
或false
来控制是否执行JavaScript代码,默认为true
。function main(splash, args) splash:go("https://www.baidu.com") splash.js_enabled = false local title = splash:evaljs("document.title") return {title=title} end go()方法,加载页面 js_enabled = false,禁止执行JavaScript代码 运行情况: HTTP Error 400 (Bad Request) Type: ScriptError -> JS_ERROR Error happened while executing Lua script [string "function main(splash, args) ..."]:4: unknown JS error: None { "type": "ScriptError", "info": { "splash_method": "evaljs", "line_number": 4, "js_error_message": null, "type": "JS_ERROR", "error": "unknown JS error: None", "message": "[string "function main(splash, args) ..."]:4: unknown JS error: None", "source": "[string "function main(splash, args) ..."]" }, "description": "Error happened while executing Lua script", "error": 400 }
-
resource_timeout属性
resource_timeout属性设置加载的超时时间,单位是秒。
function main(splash) splash.resource_timeout = 0.1 assert(splash:go('https://www.taobao.com')) return splash:png() end png()方法,返回页面截图 resource_timeout = 0.1 表示设置的加载超时时间为0.1s
-
images_enabled属性
images_enabled属性设置图片是否加载,默认情况下是加载的。不加载图片,加载的速度会快很多。
function main(splash, args) splash.images_enabled = false assert(splash:go('https://www.jd.com')) return {png=splash:png()} end 运行后请求加载的网页不加载图片
-
-
plugins_enabled属性
plugins_enabled属性可以控制浏览器插件(如Flash插件)是否开启。默认情况下,此属性是false,表示不开启。
splash.plugins_enabled = true/false
-
scoll_position属性
scroll_position属性可以控制页面上下或左右滚动
function main(splash, args) assert(splash:go('https://www.taobao.com')) splash.scroll_position = {y=400} return {png=splash:png()} end 向下滚动400像素
-
-
Splash对象的方法
-
go()方法----请求某个链接
ok, reason = splash:go{url, baseurl=nil, headers=nil, http_method="GET", body=nil, formdata=nil} baseurl----资源加载的相对路径 headers----请求头 http_method----请求方法,GET或POST body----发POST请求时的表单数据,使用的Content-type为application/json。 formdata----POST请求时的表单数据,,使用的Content-type为application/x-www-form-urlencoded。
function main(splash, args) local ok, reason = splash:go{"http://httpbin.org/post", http_method="POST", body="name=Germey"} if ok then return splash:html() end end 输出: <html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">{ "args": {}, "data": "", "files": {}, "form": { "name": "Germey" }, "headers": { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Encoding": "gzip, deflate", "Accept-Language": "en,*", "Content-Length": "11", "Content-Type": "application/x-www-form-urlencoded", "Host": "httpbin.org", "Origin": "null", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/602.1 (KHTML, like Gecko) splash Version/9.0 Safari/602.1" }, "json": null, "origin": "120.239.195.171, 120.239.195.171", "url": "https://httpbin.org/post" } </pre></body></html>
-
wait()方法----控制页面的等待时间
ok, reason = splash:wait{time, cancel_on_redirect=false, cancel_on_error=true} time----等待时间/s cancel_on_redirect----如果发生重定向就停止等待,并返回重定向结果 cancel_on_error----如果发生了加载错我,就停止等待
function main(splash) splash:go("https://www.taobao.com") splash:wait(2) return {html=splash:html()} end 等待两秒
-
jsfunc()方法
jsfunc()方法,直接调用JavaScript定义的方法,即实现JavaScript方法到Lua脚本的转换
function main(splash, args) local get_div_count = splash:jsfunc([[ function () { var body = document.body; var divs = body.getElementsByTagName('div'); return divs.length; } ]]) splash:go("https://www.baidu.com") return ("There are %s DIVs"):format( get_div_count()) end 运行结果: "There are 22 DIVs"
-
evaljs()方法
执行JavaScript代码,并返回最后一条JavaScript语句的返回结果
result = splash:evalijs(js) -
runjs()方法
runjs()方法于evaljs()方法功能类似
function main(splash, args) splash:go("https://www.baidu.com") splash:runjs("foo = function() { return 'bar' }") local result = splash:evaljs("foo()") return result end 输出: "bar"
-
autoload()方法----sutoload()设置每个页面访问时自动加载的对象
ok, reason = splash:autoload{source_or_url, source=nil, url=nil} source_or_url----JavaScript代码或者JavaScript库链接。 source----JavaScript代码。 url----JavaScript库链接。
function main(splash, args) splash:autoload([[ function get_document_title(){ return document.title; } ]]) splash:go("https://www.baidu.com") return splash:evaljs("get_document_title()") end Splash Response: "百度一下,你就知道"
-
call_later()方法----通过设置定时任务和延迟时间来实现任务延时执行,并且可以再执行前通过cancel()方法重新执行定时任务
function main(splash, args) local snapshots = {} local timer = splash:call_later(function() snapshots["a"] = splash:png() splash:wait(1.0) snapshots["b"] = splash:png() end, 0.2) splash:go("https://www.taobao.com") splash:wait(3.0) return snapshots end
-
http_get()方法----模拟发送HTTP的GET请求
response = splash:http_get{url, headers=nil, follow_redirects=true} url----请求URL。 headers----可选参数,默认为空,请求头。 follow_redirects----可选参数,表示是否启动自动重定向,默认为true。
function main(splash, args) local treat = require("treat") local response = splash:http_get("http://httpbin.org/get") return { html=treat.as_string(response.body), url=response.url, status=response.status } end 输出结果: html: String (length 347) { "args": {}, "headers": { "Accept-Encoding": "gzip, deflate", "Accept-Language": "en,*", "Host": "httpbin.org", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/602.1 (KHTML, like Gecko) splash Version/9.0 Safari/602.1" }, "origin": "120.239.195.171, 120.239.195.171", "url": "https://httpbin.org/get" } status: 200 url: "http://httpbin.org/get"
-
http_post()方法----模拟发送HTTP的POST请求
response = splash:http_post{url, headers=nil, follow_redirects=true, body=nil} url----请求URL。 headers----可选参数,默认为空,请求头。 follow_redirects----可选参数,表示是否启动自动重定向,默认为true。 body----可选参数,即表单数据,默认为空。
function main(splash, args) local treat = require("treat") local json = require("json") local response = splash:http_post{"http://httpbin.org/post", body=json.encode({name="Germey"}), headers={["content-type"]="application/json"} } return { html=treat.as_string(response.body), url=response.url, status=response.status } end
-
set_content()方法----设置页面的内容
function main(splash) assert(splash:set_content("<html><body><h1>hello</h1></body></html>")) return splash:png() end
-
html()方法----获取网页源代码
function main(splash, args) splash:go("https://httpbin.org/get") return splash:html() end // 获取https://httpbin.org/get的源代码
-
png()方法----获取png格式的网页截图
function main(splash, args) splash:go("https://www.taobao.com") return splash:png() end
-
jpeg()方法----获取jpng格式的网页截图
function main(splash, args) splash:go("https://www.taobao.com") return splash:jpeg() end
-
har()方法----获取页面的加载过程
function main(splash, args) splash:go("https://www.baidu.com") return splash:har() end
-
url()方法----获取当前页面正在访问的URL
function main(splash, args) splash:go("https://www.baidu.com") return splash:url() end // 输出: https://www.baidu.com/
-
get_cookies()方法----获取当前页面的Cookies
function main(splash, args) splash:go("https://www.baidu.com") return splash:get_cookies() end // 输出: 0: Object domain: ".baidu.com" expires: "2087-08-08T12:53:28Z" httpOnly: false name: "BAIDUID" path: "/" secure: false value: "B556658F0EAB497638556503063F6AEE:FG=1" 1: Object domain: ".baidu.com" expires: "2087-08-08T12:53:28Z" httpOnly: false name: "BIDUPSID" path: "/" secure: false value: "B556658F0EAB497638556503063F6AEE" 2: Object domain: ".baidu.com" expires: "2087-08-08T12:53:28Z" httpOnly: false name: "PSTM" path: "/" secure: false value: "1563701961" 3: Object domain: ".baidu.com" httpOnly: false name: "delPer" path: "/" secure: false value: "0" 4: Object domain: "www.baidu.com" httpOnly: false name: "BD_HOME" path: "/" secure: false value: "0" 5: Object domain: ".baidu.com" httpOnly: false name: "H_PS_PSSID" path: "/" secure: false value: "29547_1434_21089_18560_29522_29518_28518_29099_28833_29220_26350_29459" 6: Object domain: "www.baidu.com" expires: "2019-07-31T09:39:21Z" httpOnly: false name: "BD_UPN" path: "/" secure: false value: "143354"
-
add_cookie()方法----为当前页面添加Cookie
cookies = splash:add_cookie{name, value, path=nil, domain=nil, expires=nil, httpOnly=nil, secure=nil}
function main(splash) splash:add_cookie{"sessionid", "237465ghgfsd", "/", domain="http://example.com"} splash:go("http://example.com/") return splash:html() end // 输出: <!DOCTYPE html><html><head> <title>Example Domain</title> <meta charset="utf-8"> <meta http-equiv="Content-type" content="text/html; charset=utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <style type="text/css"> body { background-color: #f0f0f2; margin: 0; padding: 0; font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; } div { 600px; margin: 5em auto; padding: 50px; background-color: #fff; border-radius: 1em; } a:link, a:visited { color: #38488f; text-decoration: none; } @media (max- 700px) { body { background-color: #fff; } div { auto; margin: 0 auto; border-radius: 0; padding: 1em; } } </style> </head> <body> <div> <h1>Example Domain</h1> <p>This domain is established to be used for illustrative examples in documents. You may use this domain in examples without prior coordination or asking for permission.</p> <p><a href="http://www.iana.org/domains/example">More information...</a></p> </div> </body></html>
-
clear_cookies()方法----清除所有Cookies
function main(splash) splash:go("https://www.baidu.com/") splash:clear_cookies() return splash:get_cookies() end // 输出: Array[0]
-
get_viewport_size()方法----获取页面的宽高
function main(splash) splash:go("https://www.baidu.com/") return splash:get_viewport_size() end
-
set_viewport_size()方法----设置页面的宽高
splash:set_viewport_size(width, height)
function main(splash) splash:set_viewport_size(400, 700) assert(splash:go("http://cuiqingcai.com")) return splash:png() end
-
set_viewport_full()方法----浏览器全频显示
function main(splash) splash:set_viewport_full() assert(splash:go("http://cuiqingcai.com")) return splash:png() end
-
set_user_agent()方法----设置浏览器的User_agent
function main(splash) splash:set_user_agent('Splash') splash:go("http://httpbin.org/get") return splash:html() end // 这里我们将浏览器的User-Agent设置为Splash
-
set_custom_headers()方法----设置请求头
function main(splash) splash:set_custom_headers({ ["User-Agent"] = "Splash", ["Site"] = "Splash", }) splash:go("http://httpbin.org/get") return splash:html() end // 输出: <html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">{ "args": {}, "headers": { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Encoding": "gzip, deflate", "Accept-Language": "en,*", "Host": "httpbin.org", "Site": "Splash", "User-Agent": "Splash" }, "origin": "120.239.195.171, 120.239.195.171", "url": "https://httpbin.org/get" } </pre></body></html>
-
select()方法----选中符合条件的第一个节点----参数为CSS选择器
function main(splash) splash:go("https://www.baidu.com/") input = splash:select("#kw") input:send_text('Splash') splash:wait(3) return splash:png() end // 首先访问了百度,然后选中了搜索框,随后调用了send_text()方法填写了文本,然后返回网页截图。
-
select_all()方法----选中符合条件的所有节点----参数为CSS选择器
function main(splash) local treat = require('treat') assert(splash:go("http://quotes.toscrape.com/")) assert(splash:wait(0.5)) local texts = splash:select_all('.quote .text') local results = {} for index, text in ipairs(texts) do results[index] = text.node.innerHTML end return treat.as_array(results) end // 输出: Array[10] 0: "“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”" 1: "“It is our choices, Harry, that show what we truly are, far more than our abilities.”" 2: “There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.” 3: "“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”" 4: "“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”" 5: "“Try not to become a man of success. Rather become a man of value.”" 6: "“It is better to be hated for what you are than to be loved for what you are not.”" 7: "“I have not failed. I've just found 10,000 ways that won't work.”" 8: "“A woman is like a tea bag; you never know how strong it is until it's in hot water.”" 9: "“A day without sunshine is like, you know, night.”"
-
mouse_click()方法----模拟鼠标点击操作,传入的参数为坐标值
x
和y
。此外,也可以直接选中某个节点,然后调用此方法function main(splash) splash:go("https://www.baidu.com/") input = splash:select("#kw") input:send_text('Splash') submit = splash:select('#su') submit:mouse_click() splash:wait(3) return splash:png() end // 首先选中页面的输入框,输入了文本,然后选中“提交”按钮,调用了mouse_click()方法提交查询,然后页面等待三秒,返回截图。
-