zoukankan      html  css  js  c++  java
  • 字符串操作、文件操作,英文词频统计预处理

    作业来源:https://edu.cnblogs.com/campus/gzcc/GZCC-16SE1/homework/2684

    1.字符串操作:

    解析身份证号:生日、性别、出生地等

    import requests
    import chardet
    from bs4 import BeautifulSoup
    from fake_useragent import UserAgent
    
    def get_ID_Info(Id):
        url = "http://blog.sina.com.cn/s/blog_55a319c701015pjt.html"
        ua = UserAgent()
        headers = {'User_Agent':ua.random}
        html = requests.get(url,headers=headers)
        charset = chardet.detect(html.content)
        html.encoding = charset['encoding']
        soup = BeautifulSoup(html.text,'lxml')
        areaInfo = soup.select('#sina_keyword_ad_area2')[0].get_text(' ','<br/>').replace("u3000",' ')
        areaInfo = ' '.join(areaInfo.split()).split(' ')
        for areaVerify in areaInfo:
            if Id[0:6] == areaVerify:
                i = areaInfo.index(areaVerify)+1
                print(u"地区:{}".format(areaInfo[i]))
        print(u"出生:%s" % (Id[6:10] + '' + Id[10:12] + '' + Id[12:14]))
        if (int(Id[-2])%2) == 0:
            sex = ""
        else:
            sex = ""
        print(u"性别:%s" % sex)
    
    def check_ID_Number(Id):
        str_to_int = {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5,
                      '6': 6, '7': 7, '8': 8, '9': 9, 'X': 10}
        check_dict = {0: '1', 1: '0', 2: 'X', 3: '9', 4: '8', 5: '7',
                      6: '6', 7: '5', 8: '4', 9: '3', 10: '2'}
        if len(Id) != 18:
            raise TypeError(u'请输入标准的第二代身份证号码')
        check_num = 0
        for index, num in enumerate(Id):
            if index == 17:
                right_code = check_dict.get(check_num % 11)
                if num == right_code:
                    print(u"身份证号: %s 校验通过" % Id)
                    return True
                else:
                    print(u"身份证号: %s 校验不通过, 正确尾号应该为:%s" % (Id, right_code))
                    return False
            check_num += str_to_int.get(num) * (2 ** (17 - index) % 11)
    if __name__ == '__main__':
        Id = input(u'请输入标准的第二代身份证号码:')
        Id = str(Id)
        if check_ID_Number(Id):
            get_ID_Info(Id)

    凯撒密码编码与解码

    MAX_KEY_SIZE = 26
    def getMode():
        while True:
            print('请选择加密或解密模式,或者选择暴力破解:')
            print('加密:encrypt(e)')
            print('解密:decrypt(d)')
            print('暴力破解:brute(b)')
            mode = input().lower()
            if mode in 'encrypt e decrypt d brute b'.split():
                return mode
            else:
                print('请输入"encrypt"或"e"或"decrypt"或"d"或"brute"或"b"!')
    
    def getMessage():
        print('请输入你的信息:')
        return input()
    
    def getKey():
        key = 0
        while True:
            print('请输入密钥数字(1-%s)' % (MAX_KEY_SIZE))
            key = int(input())
            if (key >=1 and key <= MAX_KEY_SIZE):
                return key
    
    def getTranslatedMessage(mode, message, key):
        if mode[0] == 'd':
            key = -key
        translated = ''
        for symbol in message:
            if symbol.isalpha():
                num = ord(symbol)
                num += key
                if symbol.isupper():
                    if num > ord('Z'):
                        num -= 26
                    elif num < ord('A'):
                        num += 26
                elif symbol.islower():
                    if num > ord('z'):
                        num -= 26
                    elif num < ord('a'):
                        num += 26
                translated += chr(num)
            else:
                translated += symbol
        return translated
    
    if __name__ == '__main__':
        mode = getMode()
        message = getMessage()
        if mode[0] != 'b':
            key = getKey()
        print('你要翻译的信息是:')
        if mode[0] != 'b':
            print(getTranslatedMessage(mode, message, key))
        else:
            for key in range(1, MAX_KEY_SIZE + 1):
                print(key, getTranslatedMessage('decrypt', message, key))

    网址观察与批量生成

    for i in range(2,15):
        print('http://news.gzcc.cn/html/xiaoyuanxinwen/{}.html'.format(i))

    2.英文词频统计预处理

    下载一首英文的歌词或文章或小说

    将所有大写转换为小写

    将所有其他做分隔符(,.?!)替换为空格

    分隔出一个一个的单词

    并统计单词出现的次数

    import requests
    import chardet
    from bs4 import BeautifulSoup
    from fake_useragent import UserAgent
    
    def Info():
        url = "http://www.duwenzhang.com/wenzhang/yingyuwenzhang/20130519/255870.html"
        ua = UserAgent()
        headers = {'User_Agent':ua.random}
        html = requests.get(url,headers=headers)
        charset = chardet.detect(html.content)
        html.encoding = charset['encoding']
        soup = BeautifulSoup(html.text,'lxml')
        engInfo = soup.find('div',{'class':'article 255870'}).find_all('p')[0].get_text(' ','<br/>').replace("u3000",' ')#获取文本
        print(engInfo)
        engInfo = engInfo.lower()#将所有大写转换为小写
        print(engInfo)
        s = ',.?!'
        for i in s:
            engInfo = engInfo.replace(i,' ')#将所有其他做分隔符(,.?!)替换为空格
        print(engInfo)
        engInfo = engInfo.split()#分隔出一个一个的单词
        print(engInfo)
        InfoSet = set(engInfo)
        Count = {}
        for word in InfoSet:
            Count.setdefault(word,engInfo.count(word))#统计单词出现的次数
        print(Count)
    if __name__ == '__main__':
        Info()

    3.文件操作

    同一目录、绝对路径、相对路径

    凯撒密码:从文件读入密函,进行加密或解密,保存到文件。

    def getMima():
        Massage = str(input("输入明文并保存文本:"))
        with open('massage.txt','w') as f:
            f.write(Massage)
            f.close()
        Mima = ''
        for i in Massage:
            Mima = Mima + chr(ord(i)+3)
        print('加密结果:'+Mima+'
    ')
        with open('secret.txt','w') as f:
            f.write(Mima)
            f.close()
    
    def getMassageFromTXT():
        print("对文本内容解码..")
        with open('secret.txt','r') as f:
            s = f.read()
        Massage = ''
        if s == None:
            print('没有可解码的文本
    ')
        else:
            for i in s:
                Massage = Massage + chr(ord(i)-3)
            print('解码结果:'+Massage+'
    ')
    
    if __name__ == '__main__':
        while 1:
            a = int(input('加密(1)解码(2)退出(0):'))
            if a == 0:
                break
            elif a == 1:
                getMima()
            elif a == 2:
                getMassageFromTXT()

    词频统计:下载一首英文的歌词或文章或小说,保存为utf8文件。从文件读入文本进行处理。

    engInfo = '''My father was a self-taught mandolin player.
     He was one of the best string instrument players in our town.
      He could not read music, but if he heard a tune a few times, 
      he could play it. When he was younger, he was a member of a small country music band. 
      They would play at local dances and on a few occasions would play for the local radio station. 
      He often told us how he had auditioned and earned a position in a band that featured Patsy Cline as their lead singer.
       He told the family that after he was hired he never went back. Dad was a very religious man. 
       He stated that there was a lot of drinking and cursing the day of his audition and he did not want to be around that type of environment.
    '''
    f = open('EngTxt.txt','a',encoding='utf-8')
    f.write(engInfo)
    f.close()

    4.函数定义

    加密函数、解密函数

    def getTranslatedMessage(mode, message, key):
        if mode[0] == 'd':
            key = -key
        translated = ''
        for symbol in message:
            if symbol.isalpha():
                num = ord(symbol)
                num += key
                if symbol.isupper():
                    if num > ord('Z'):
                        num -= 26
                    elif num < ord('A'):
                        num += 26
                elif symbol.islower():
                    if num > ord('z'):
                        num -= 26
                    elif num < ord('a'):
                        num += 26
                translated += chr(num)
            else:
                translated += symbol
        return translated

    读文本函数

    def getMima():
        Massage = str(input("输入明文并保存文本:"))
        with open('massage.txt','w') as f:
            f.write(Massage)
            f.close()
        Mima = ''
        for i in Massage:
            Mima = Mima + chr(ord(i)+3)
        print('加密结果:'+Mima+'
    ')
        with open('secret.txt','w') as f:
            f.write(Mima)
            f.close()

     

  • 相关阅读:
    费马小定理
    Big Number阶乘位数计算(斯特林公式)
    V
    矩阵快速幂求斐波那契
    奇迹
    缘分
    求导
    拓扑排序
    线段树
    单调栈
  • 原文地址:https://www.cnblogs.com/Mram/p/10505210.html
Copyright © 2011-2022 走看看