zoukankan      html  css  js  c++  java
  • 测试网站链接是否可用(wget和curl)

    一、wget用法案例

    系统给的命令参数如下:

    [root@litong_centos mysql3307]# wget --help
    GNU Wget 1.14, a non-interactive network retriever.
    Usage: wget [OPTION]... [URL]...
    
    Mandatory arguments to long options are mandatory for short options too.
    
    Startup:
      -V,  --version           display the version of Wget and exit.
      -h,  --help              print this help.
      -b,  --background        go to background after startup.
      -e,  --execute=COMMAND   execute a `.wgetrc'-style command.
    
    Logging and input file:
      -o,  --output-file=FILE    log messages to FILE.
      -a,  --append-output=FILE  append messages to FILE.
      -d,  --debug               print lots of debugging information.
      -q,  --quiet               quiet (no output).
      -v,  --verbose             be verbose (this is the default).
      -nv, --no-verbose          turn off verboseness, without being quiet.
           --report-speed=TYPE   Output bandwidth as TYPE.  TYPE can be bits.
      -i,  --input-file=FILE     download URLs found in local or external FILE.
      -F,  --force-html          treat input file as HTML.
      -B,  --base=URL            resolves HTML input-file links (-i -F)
                                 relative to URL.
           --config=FILE         Specify config file to use.
    
    Download:
      -t,  --tries=NUMBER            set number of retries to NUMBER (0 unlimits).
           --retry-connrefused       retry even if connection is refused.
      -O,  --output-document=FILE    write documents to FILE.
      -nc, --no-clobber              skip downloads that would download to
                                     existing files (overwriting them).
      -c,  --continue                resume getting a partially-downloaded file.
           --progress=TYPE           select progress gauge type.
      -N,  --timestamping            don't re-retrieve files unless newer than
                                     local.
      --no-use-server-timestamps     don't set the local file's timestamp by
                                     the one on the server.
      -S,  --server-response         print server response.
           --spider                  don't download anything.
      -T,  --timeout=SECONDS         set all timeout values to SECONDS.
           --dns-timeout=SECS        set the DNS lookup timeout to SECS.
           --connect-timeout=SECS    set the connect timeout to SECS.
           --read-timeout=SECS       set the read timeout to SECS.
      -w,  --wait=SECONDS            wait SECONDS between retrievals.
           --waitretry=SECONDS       wait 1..SECONDS between retries of a retrieval.
           --random-wait             wait from 0.5*WAIT...1.5*WAIT secs between retrievals.
           --no-proxy                explicitly turn off proxy.
      -Q,  --quota=NUMBER            set retrieval quota to NUMBER.
           --bind-address=ADDRESS    bind to ADDRESS (hostname or IP) on local host.
           --limit-rate=RATE         limit download rate to RATE.
           --no-dns-cache            disable caching DNS lookups.
           --restrict-file-names=OS  restrict chars in file names to ones OS allows.
           --ignore-case             ignore case when matching files/directories.
      -4,  --inet4-only              connect only to IPv4 addresses.
      -6,  --inet6-only              connect only to IPv6 addresses.
           --prefer-family=FAMILY    connect first to addresses of specified family,
                                     one of IPv6, IPv4, or none.
           --user=USER               set both ftp and http user to USER.
           --password=PASS           set both ftp and http password to PASS.
           --ask-password            prompt for passwords.
           --no-iri                  turn off IRI support.
           --local-encoding=ENC      use ENC as the local encoding for IRIs.
           --remote-encoding=ENC     use ENC as the default remote encoding.
           --unlink                  remove file before clobber.
    
    Directories:
      -nd, --no-directories           don't create directories.
      -x,  --force-directories        force creation of directories.
      -nH, --no-host-directories      don't create host directories.
           --protocol-directories     use protocol name in directories.
      -P,  --directory-prefix=PREFIX  save files to PREFIX/...
           --cut-dirs=NUMBER          ignore NUMBER remote directory components.
    
    HTTP options:
           --http-user=USER        set http user to USER.
           --http-password=PASS    set http password to PASS.
           --no-cache              disallow server-cached data.
           --default-page=NAME     Change the default page name (normally
                                   this is `index.html'.).
      -E,  --adjust-extension      save HTML/CSS documents with proper extensions.
           --ignore-length         ignore `Content-Length' header field.
           --header=STRING         insert STRING among the headers.
           --max-redirect          maximum redirections allowed per page.
           --proxy-user=USER       set USER as proxy username.
           --proxy-password=PASS   set PASS as proxy password.
           --referer=URL           include `Referer: URL' header in HTTP request.
           --save-headers          save the HTTP headers to file.
      -U,  --user-agent=AGENT      identify as AGENT instead of Wget/VERSION.
           --no-http-keep-alive    disable HTTP keep-alive (persistent connections).
           --no-cookies            don't use cookies.
           --load-cookies=FILE     load cookies from FILE before session.
           --save-cookies=FILE     save cookies to FILE after session.
           --keep-session-cookies  load and save session (non-permanent) cookies.
           --post-data=STRING      use the POST method; send STRING as the data.
           --post-file=FILE        use the POST method; send contents of FILE.
           --content-disposition   honor the Content-Disposition header when
                                   choosing local file names (EXPERIMENTAL).
           --content-on-error      output the received content on server errors.
           --auth-no-challenge     send Basic HTTP authentication information
                                   without first waiting for the server's
                                   challenge.
    
    HTTPS (SSL/TLS) options:
           --secure-protocol=PR     choose secure protocol, one of auto, SSLv2,
                                    SSLv3, TLSv1, TLSv1_1 and TLSv1_2.
           --no-check-certificate   don't validate the server's certificate.
           --certificate=FILE       client certificate file.
           --certificate-type=TYPE  client certificate type, PEM or DER.
           --private-key=FILE       private key file.
           --private-key-type=TYPE  private key type, PEM or DER.
           --ca-certificate=FILE    file with the bundle of CA's.
           --ca-directory=DIR       directory where hash list of CA's is stored.
           --random-file=FILE       file with random data for seeding the SSL PRNG.
           --egd-file=FILE          file naming the EGD socket with random data.
    
    FTP options:
           --ftp-user=USER         set ftp user to USER.
           --ftp-password=PASS     set ftp password to PASS.
           --no-remove-listing     don't remove `.listing' files.
           --no-glob               turn off FTP file name globbing.
           --no-passive-ftp        disable the "passive" transfer mode.
           --preserve-permissions  preserve remote file permissions.
           --retr-symlinks         when recursing, get linked-to files (not dir).
    
    WARC options:
           --warc-file=FILENAME      save request/response data to a .warc.gz file.
           --warc-header=STRING      insert STRING into the warcinfo record.
           --warc-max-size=NUMBER    set maximum size of WARC files to NUMBER.
           --warc-cdx                write CDX index files.
           --warc-dedup=FILENAME     do not store records listed in this CDX file.
           --no-warc-compression     do not compress WARC files with GZIP.
           --no-warc-digests         do not calculate SHA1 digests.
           --no-warc-keep-log        do not store the log file in a WARC record.
           --warc-tempdir=DIRECTORY  location for temporary files created by the
                                     WARC writer.
    
    Recursive download:
      -r,  --recursive          specify recursive download.
      -l,  --level=NUMBER       maximum recursion depth (inf or 0 for infinite).
           --delete-after       delete files locally after downloading them.
      -k,  --convert-links      make links in downloaded HTML or CSS point to
                                local files.
      --backups=N   before writing file X, rotate up to N backup files.
      -K,  --backup-converted   before converting file X, back up as X.orig.
      -m,  --mirror             shortcut for -N -r -l inf --no-remove-listing.
      -p,  --page-requisites    get all images, etc. needed to display HTML page.
           --strict-comments    turn on strict (SGML) handling of HTML comments.
    
    Recursive accept/reject:
      -A,  --accept=LIST               comma-separated list of accepted extensions.
      -R,  --reject=LIST               comma-separated list of rejected extensions.
           --accept-regex=REGEX        regex matching accepted URLs.
           --reject-regex=REGEX        regex matching rejected URLs.
           --regex-type=TYPE           regex type (posix|pcre).
      -D,  --domains=LIST              comma-separated list of accepted domains.
           --exclude-domains=LIST      comma-separated list of rejected domains.
           --follow-ftp                follow FTP links from HTML documents.
           --follow-tags=LIST          comma-separated list of followed HTML tags.
           --ignore-tags=LIST          comma-separated list of ignored HTML tags.
      -H,  --span-hosts                go to foreign hosts when recursive.
      -L,  --relative                  follow relative links only.
      -I,  --include-directories=LIST  list of allowed directories.
      --trust-server-names             use the name specified by the redirection
                                       url last component.
      -X,  --exclude-directories=LIST  list of excluded directories.
      -np, --no-parent                 don't ascend to the parent directory.
    
    Mail bug reports and suggestions to <bug-wget@gnu.org>.
    

    通常我们用到的参数很少,如下所示:

    --spider                   模拟爬虫的行为去访问网站,但不会下载网页
    -q,    --quiet             安静的访问,禁止输出,类似-o /dev/null功能
    -o,    --output-file=FILE  记录输出到文件
    -T,    --timeout=SECONDS   访问网站的超时时间
    -t,    --tries=NUMBER      当网站异常时重试网站的次数

    测试命令:

    wget --spider -T 5 -q -t 2 www.baidu.com | echo $?
    若返回值为0代表正常

    二、curl用法案例

    系统给的命令参数如下:

    [root@litong_centos mysql3307]# curl --help
    Usage: curl [options...] <url>
    Options: (H) means HTTP/HTTPS only, (F) means FTP only
         --anyauth       Pick "any" authentication method (H)
     -a, --append        Append to target file when uploading (F/SFTP)
         --basic         Use HTTP Basic Authentication (H)
         --cacert FILE   CA certificate to verify peer against (SSL)
         --capath DIR    CA directory to verify peer against (SSL)
     -E, --cert CERT[:PASSWD] Client certificate file and password (SSL)
         --cert-type TYPE Certificate file type (DER/PEM/ENG) (SSL)
         --ciphers LIST  SSL ciphers to use (SSL)
         --compressed    Request compressed response (using deflate or gzip)
     -K, --config FILE   Specify which config file to read
         --connect-timeout SECONDS  Maximum time allowed for connection
     -C, --continue-at OFFSET  Resumed transfer offset
     -b, --cookie STRING/FILE  String or file to read cookies from (H)
     -c, --cookie-jar FILE  Write cookies to this file after operation (H)
         --create-dirs   Create necessary local directory hierarchy
         --crlf          Convert LF to CRLF in upload
         --crlfile FILE  Get a CRL list in PEM format from the given file
     -d, --data DATA     HTTP POST data (H)
         --data-ascii DATA  HTTP POST ASCII data (H)
         --data-binary DATA  HTTP POST binary data (H)
         --data-urlencode DATA  HTTP POST data url encoded (H)
         --delegation STRING GSS-API delegation permission
         --digest        Use HTTP Digest Authentication (H)
         --disable-eprt  Inhibit using EPRT or LPRT (F)
         --disable-epsv  Inhibit using EPSV (F)
     -D, --dump-header FILE  Write the headers to this file
         --egd-file FILE  EGD socket path for random data (SSL)
         --engine ENGINGE  Crypto engine (SSL). "--engine list" for list
     -f, --fail          Fail silently (no output at all) on HTTP errors (H)
     -F, --form CONTENT  Specify HTTP multipart POST data (H)
         --form-string STRING  Specify HTTP multipart POST data (H)
         --ftp-account DATA  Account data string (F)
         --ftp-alternative-to-user COMMAND  String to replace "USER [name]" (F)
         --ftp-create-dirs  Create the remote dirs if not present (F)
         --ftp-method [MULTICWD/NOCWD/SINGLECWD] Control CWD usage (F)
         --ftp-pasv      Use PASV/EPSV instead of PORT (F)
     -P, --ftp-port ADR  Use PORT with given address instead of PASV (F)
         --ftp-skip-pasv-ip Skip the IP address for PASV (F)
         --ftp-pret      Send PRET before PASV (for drftpd) (F)
         --ftp-ssl-ccc   Send CCC after authenticating (F)
         --ftp-ssl-ccc-mode ACTIVE/PASSIVE  Set CCC mode (F)
         --ftp-ssl-control Require SSL/TLS for ftp login, clear for transfer (F)
     -G, --get           Send the -d data with a HTTP GET (H)
     -g, --globoff       Disable URL sequences and ranges using {} and []
     -H, --header LINE   Custom header to pass to server (H)
     -I, --head          Show document info only
     -h, --help          This help text
         --hostpubmd5 MD5  Hex encoded MD5 string of the host public key. (SSH)
     -0, --http1.0       Use HTTP 1.0 (H)
         --ignore-content-length  Ignore the HTTP Content-Length header
     -i, --include       Include protocol headers in the output (H/F)
     -k, --insecure      Allow connections to SSL sites without certs (H)
         --interface INTERFACE  Specify network interface/address to use
     -4, --ipv4          Resolve name to IPv4 address
     -6, --ipv6          Resolve name to IPv6 address
     -j, --junk-session-cookies Ignore session cookies read from file (H)
         --keepalive-time SECONDS  Interval between keepalive probes
         --key KEY       Private key file name (SSL/SSH)
         --key-type TYPE Private key file type (DER/PEM/ENG) (SSL)
         --krb LEVEL     Enable Kerberos with specified security level (F)
         --libcurl FILE  Dump libcurl equivalent code of this command line
         --limit-rate RATE  Limit transfer speed to this rate
     -l, --list-only     List only names of an FTP directory (F)
         --local-port RANGE  Force use of these local port numbers
     -L, --location      Follow redirects (H)
         --location-trusted like --location and send auth to other hosts (H)
     -M, --manual        Display the full manual
         --mail-from FROM  Mail from this address
         --mail-rcpt TO  Mail to this receiver(s)
         --mail-auth AUTH  Originator address of the original email
         --max-filesize BYTES  Maximum file size to download (H/F)
         --max-redirs NUM  Maximum number of redirects allowed (H)
     -m, --max-time SECONDS  Maximum time allowed for the transfer
         --metalink      Process given URLs as metalink XML file
         --negotiate     Use HTTP Negotiate Authentication (H)
     -n, --netrc         Must read .netrc for user name and password
         --netrc-optional Use either .netrc or URL; overrides -n
         --netrc-file FILE  Set up the netrc filename to use
     -N, --no-buffer     Disable buffering of the output stream
         --no-keepalive  Disable keepalive use on the connection
         --no-sessionid  Disable SSL session-ID reusing (SSL)
         --noproxy       List of hosts which do not use proxy
         --ntlm          Use HTTP NTLM authentication (H)
     -o, --output FILE   Write output to <file> instead of stdout
         --pass PASS     Pass phrase for the private key (SSL/SSH)
         --post301       Do not switch to GET after following a 301 redirect (H)
         --post302       Do not switch to GET after following a 302 redirect (H)
         --post303       Do not switch to GET after following a 303 redirect (H)
     -#, --progress-bar  Display transfer progress as a progress bar
         --proto PROTOCOLS  Enable/disable specified protocols
         --proto-redir PROTOCOLS  Enable/disable specified protocols on redirect
     -x, --proxy [PROTOCOL://]HOST[:PORT] Use proxy on given port
         --proxy-anyauth Pick "any" proxy authentication method (H)
         --proxy-basic   Use Basic authentication on the proxy (H)
         --proxy-digest  Use Digest authentication on the proxy (H)
         --proxy-negotiate Use Negotiate authentication on the proxy (H)
         --proxy-ntlm    Use NTLM authentication on the proxy (H)
     -U, --proxy-user USER[:PASSWORD]  Proxy user and password
         --proxy1.0 HOST[:PORT]  Use HTTP/1.0 proxy on given port
     -p, --proxytunnel   Operate through a HTTP proxy tunnel (using CONNECT)
         --pubkey KEY    Public key file name (SSH)
     -Q, --quote CMD     Send command(s) to server before transfer (F/SFTP)
         --random-file FILE  File for reading random data from (SSL)
     -r, --range RANGE   Retrieve only the bytes within a range
         --raw           Do HTTP "raw", without any transfer decoding (H)
     -e, --referer       Referer URL (H)
     -J, --remote-header-name Use the header-provided filename (H)
     -O, --remote-name   Write output to a file named as the remote file
         --remote-name-all Use the remote file name for all URLs
     -R, --remote-time   Set the remote file's time on the local output
     -X, --request COMMAND  Specify request command to use
         --resolve HOST:PORT:ADDRESS  Force resolve of HOST:PORT to ADDRESS
         --retry NUM   Retry request NUM times if transient problems occur
         --retry-delay SECONDS When retrying, wait this many seconds between each
         --retry-max-time SECONDS  Retry only within this period
     -S, --show-error    Show error. With -s, make curl show errors when they occur
     -s, --silent        Silent mode. Don't output anything
         --socks4 HOST[:PORT]  SOCKS4 proxy on given host + port
         --socks4a HOST[:PORT]  SOCKS4a proxy on given host + port
         --socks5 HOST[:PORT]  SOCKS5 proxy on given host + port
         --socks5-basic  Enable username/password auth for SOCKS5 proxies
         --socks5-gssapi Enable GSS-API auth for SOCKS5 proxies
         --socks5-hostname HOST[:PORT] SOCKS5 proxy, pass host name to proxy
         --socks5-gssapi-service NAME  SOCKS5 proxy service name for gssapi
         --socks5-gssapi-nec  Compatibility with NEC SOCKS5 server
     -Y, --speed-limit RATE  Stop transfers below speed-limit for 'speed-time' secs
     -y, --speed-time SECONDS  Time for trig speed-limit abort. Defaults to 30
         --ssl           Try SSL/TLS (FTP, IMAP, POP3, SMTP)
         --ssl-reqd      Require SSL/TLS (FTP, IMAP, POP3, SMTP)
     -2, --sslv2         Use SSLv2 (SSL)
     -3, --sslv3         Use SSLv3 (SSL)
         --ssl-allow-beast Allow security flaw to improve interop (SSL)
         --stderr FILE   Where to redirect stderr. - means stdout
         --tcp-nodelay   Use the TCP_NODELAY option
     -t, --telnet-option OPT=VAL  Set telnet option
         --tftp-blksize VALUE  Set TFTP BLKSIZE option (must be >512)
     -z, --time-cond TIME  Transfer based on a time condition
     -1, --tlsv1         Use => TLSv1 (SSL)
         --tlsv1.0       Use TLSv1.0 (SSL)
         --tlsv1.1       Use TLSv1.1 (SSL)
         --tlsv1.2       Use TLSv1.2 (SSL)
         --trace FILE    Write a debug trace to the given file
         --trace-ascii FILE  Like --trace but without the hex output
         --trace-time    Add time stamps to trace/verbose output
         --tr-encoding   Request compressed transfer encoding (H)
     -T, --upload-file FILE  Transfer FILE to destination
         --url URL       URL to work with
     -B, --use-ascii     Use ASCII/text transfer
     -u, --user USER[:PASSWORD]  Server user and password
         --tlsuser USER  TLS username
         --tlspassword STRING TLS password
         --tlsauthtype STRING  TLS authentication type (default SRP)
         --unix-socket FILE    Connect through this UNIX domain socket
     -A, --user-agent STRING  User-Agent to send to server (H)
     -v, --verbose       Make the operation more talkative
     -V, --version       Show version number and quit
     -w, --write-out FORMAT  What to output after completion
         --xattr        Store metadata in extended file attributes
     -q                 If used as the first parameter disables .curlrc
    

    跟wget相同,我们平常能用到的参数很少,如下所示:

    -I/--head                    显示响应头信息
    -m/--max-time <seconds>      访问超时的时间
    -o/--output <file>           记录访问信息到文件
    -s/--silent                  沉默模式访问,就是不输出信息
    -w/--write-out <format>      以固定特殊的格式输出,例如:%{http_code},输出状态码

    测试命令:

    curl -s -o /dev/null www.baidu.com | echo $?                      若返回值为0,则代表正常
    curl -I -m 5 -s -w "%{http_code}
    " -o /dev/null  www.baidu.com   根据返回状态码判断,若为200,则代表正常

    三、脚本

    #!/bin/sh
    #测试url是否正常,只能传入一个参数
    :<<EOF
    $0 这个程式的执行名字
    $n 这个程式的第n个参数值,n=1..9
    $* 这个程式的所有参数,此选项参数可超过9个。
    $# 这个程式的参数个数
    $$ 这个程式的PID(脚本运行的当前进程ID号)
    $! 执行上一个背景指令的PID(后台运行的最后一个进程的进程ID号)
    $? 执行上一个指令的返回值 (显示最后命令的退出状态。0表示没有错误,其他任何值表明有错误)
    $- 显示shell使用的当前选项,与set命令功能相同
    $@ 跟$*类似,但是可以当作数组用
    EOF
    usage() {     #提示脚本执行方式
        echo $"usage:$0 url"
        exit 1
    }
    
    check_url() {  #检查网址是否可用
        wget --spider -q -o /dev/null --tries=1 -T 5 $1
        if [ $? -eq 0 ]
        then
            echo "$1 is ok."
            exit 0
        else
            echo "$1 is fail."
            exit 1
        fi
    }
    
    main() {
         if [ $# -ne 1 ]   #判断参数个数,若不为1提示用户。
         then
             usage
         fi
         check_url $1
    }
    main $*
  • 相关阅读:
    requestAnimationFrame
    react 面试题
    useState使用和原理
    常用网址记录
    .net 学习官网
    centos nginx 设置开启启动
    Asp.Net Core 发布和部署 Linux + Nginx
    SQL Server 执行计划缓存
    SQL Server之索引解析(二)
    SQL Server之索引解析(一)
  • 原文地址:https://www.cnblogs.com/ltlinux/p/10214083.html
Copyright © 2011-2022 走看看