zoukankan      html  css  js  c++  java
  • 123

    # -*- coding: utf-8 -*-
    # @Author  : xuchunlin
    # @Time    : 2020/7/20 10:41
    # @License : Copyright(C),Drcnet

    # from common.contest import *
    from selenium import webdriver
    import time
    from selenium.webdriver import ChromeOptions
    from selenium.webdriver.common.keys import Keys
    # from common.contest import logger
    driver = webdriver.Firefox()
    driver.get('http://www.customs.gov.cn/customs/302249/302274/302277/index.html')
    # driver.get('http://www.customs.gov.cn/customs/302249/302274/302277/302276/310398/index.html')

    time.sleep(20)
    result = driver.page_source
    # print(result)
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(result,'html.parser')
    result_mainBox = soup.select('div#mainBox')[0]

    import re

    def replace_br(newline):
    """
    # 用正则过滤掉网页的注释并替换掉掉换行符
    :param newline: 传入一个字符串,过滤掉网页并且换行
    :return:
    """
    newline = str(newline)
    newline = newline.replace(' ','').replace(' ','').replace(' ','').replace(' ','').replace(' ','').replace(' ','').replace('amp;','').replace('<br/>','').replace('<br>','')
    re_comment = re.compile('<!--[^>]*-->')
    newlines = re_comment.sub('', newline)
    newlines = newlines.replace('<!--','').replace('-->','')
    return newlines

    result_mainBox_replace = replace_br(result_mainBox)

    driver.close()

    result_list = re.findall('<tbody><tr align="center" height="25"(.*?)</tr></tbody>',str(result_mainBox_replace))
    print(len(result_list))

    for item in result_list:
    if 'TEXT-INDENT: 5px' in str(item):
    print(1111111111)
    print(str(item).replace(" ",'').replace(" ",'').replace(" ",''))


    time.sleep(2222)

  • 相关阅读:
    [极客大挑战 2019]EasySQL CTF复现
    [极客大挑战 2019]Havefun (一起来撸猫) CTF复现
    一个简单漂亮的登录页面(前端)
    Python XPath的使用
    Python Requests的基本用法
    Linux配置jdk环境变量
    高性能 Java RPC 框架 Dubbo
    Zookeeper的配置文件及命令
    zookeeper怎么实现分布式锁
    Zookeeper-集群崩溃恢复
  • 原文地址:https://www.cnblogs.com/xuchunlin/p/13344655.html
Copyright © 2011-2022 走看看