zoukankan      html  css  js  c++  java
  • python实现将固定格式的字符串调整为字典的格式,用于爬虫爬取数据时快速添加请求数据

    #!/usr/local/bin/python3.7
    
    """
    @File    :   str_replace.py
    @Time    :   2020/06/03
    @Author  :   Mozili
    
    """# 输入多行字符串
    data = []
    str = input('输入要调整的字符串:
    ')
    data.append(str)
    stopword = ''#停止条件
    
    for line in iter(input,stopword):#iter()中第一个参数是可调用的,即可以像函数一样调用他,因此是input,而不是input()
        data.append(line.split())
    
    # print(data)
    # 遍历列表
    for i in range(len(data)):
        if ':' in data[0]:
            if i==0:
                s0 = data[0].split(':')[0]
                s1 = data[0].split(':')[1].strip(' ')
                # 给字符串加单引号
                s0 = '{}{}{}'.format("'", s0, "'")
                s1 = '{}{}{}'.format("'", s1, "'")
                # 拼接成一个新的字符串
                new_str = 'data = ' + '{' + '
     ' + s0 + ':' + s1 + ','  
            else:
                s0 = data[i][0].split(":")[0]
                s1 = data[i][1]
                s0 = '{}{}{}'.format("'", s0, "'")
                s1 = '{}{}{}'.format("'", s1, "'")
                if i==len(data)-1:
                    new_str = ' ' + s0 + ':' + s1 + '
    ' + '}'
                else:
                    new_str = ' ' + s0 + ':' + s1 + ','
        elif '&' in data[0]:
            # 以&分割字符串
            s_list = data[0].split('&')
            # print(s_list)
            for n in range(len(s_list)):
                # 将字符串中的=换成:
                str = s_list[n].replace('=', ':')
                # print(str)
                # 把字符串转为list
                str_list = list(str)
                # print(str_list)
                # 确认:在列表中的索引值
                i = str_list.index(':')
                # print(i)
                # 在:的前面添加'
                str_list.insert(i, "'")
                # 在:的后面添加'
                str_list.insert(i+2, "'")
                # 将list转换为str
                str = ''.join(str_list)
                # 给字符串添加单引号
                str = '{}{}{}'.format("'", str, "'")
                if n==0:
                    new_str = 'data = ' + '{' + '
     ' + str + ','
                else:
                    if n==len(s_list)-1:
                        new_str = ' ' + str + '}'
                    else:
                        new_str = ' ' + str + ','
                print(new_str)
    
                
        else:
            i=0
            while i < len(data):
                # print(i)
                if i==0:
                    s = data[0].split(' ')
                    # 获取第一个字符串
                    s0 = s[0]
                    # 给字符串加单引号
                    s0 = '{}{}{}'.format("'", s0, "'")
                    # 获取第二个字符串
                    s1 = s[1]
                    s1 = '{}{}{}'.format("'", s1, "'")
                    new_str = 'data = ' + '{' + '
     ' + s0 + ':' + s1 + ',' 
                else:
                    # 判断列表的长度,处理没有值的键值对
                    if len(data[i])==1:
                        s = data[i][0]
                        s0 = '{}{}{}'.format("'", s, "'")
                        new_str = ' ' + s0 + ':' + "''" + ','
                    else:
                        s0 = data[i][0]
                        s1 = data[i][1]
                        # 给字符串添加单引号
                        s0 = '{}{}{}'.format("'", s0, "'")
                        s1 = '{}{}{}'.format("'", s1, "'")
                        # 判断是否是最后一组数据
                        if i == len(data)-1:
                            new_str = ' ' + s0 + ':' + s1 + '}'
                        else:
                            new_str = ' ' + s0 + ':' + s1 + ','
                i+=1
                print(new_str)
            break
               
        print(new_str)
       
        
    # 支持替换以下三种格式的字符串
    str1 = """
    email 18827441xxxx
    icode 
    origURL http://www.renren.com/home
    domain renren.com
    key_id 1
    captcha_type web_login
    password 32904758c29a901f87064f3aa151f9c373112ed66370fe9b28152f0ce63d3796
    rkey d6b3acc434f19c92fad1f33176e506f0
    f https%3A%2F%2Fbaidu.com%2F
    """
    str2 = """
    from: en
    to: zh-Hans
    text: cat
    """
    str3 = "ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=python中正则替换"
  • 相关阅读:
    elasticsearch的cross_fields查询
    搭建elk集群 disabled in libcurl elasticsearch-6.2.2 更新license 版本
    elastic mapping not_analyzed 简单理解 + analysis-ik分词器安装
    ElasticsearchException: java.io.IOException: failed to read [id:0, file:/data/elasticsearch/nodes/0/_state/global-0.st]
    elastic 查询案例Query与Filter + 增删改查简单理解 + dynamic mapping + keyword
    kibana 查询例子
    用grok拆分java日志
    logstash 处理信息规律研究
    docker-compose 部署elk+解决时间不对导致kibana找不到logstash定义的index + docker-compose安装
    JavaScript(6):Number对象
  • 原文地址:https://www.cnblogs.com/lxmtx/p/13038619.html
Copyright © 2011-2022 走看看