# from aip import AipOcr # import requests # import re # import os # from decimal import Decimal#浮点数精度计算 # from lxml import html#此处直接引入etree报错是因为版本问题,换个方式引入 # etree = html.etree#引入etree方法 # from string import punctuation # # """ 你的 APPID AK SK """ # APP_ID = '23597797' # API_KEY = 'Va3onwymweV9htshK13GiNUs' # SECRET_KEY = 'FiAd8gWb489uDD2yUI7Y1iKaxQUOwqwM' # client = AipOcr(APP_ID, API_KEY, SECRET_KEY) # # headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4356.6 Safari/537.36'} # source= requests.get('http://tj.ziroom.com/x/777120723.html',headers=headers).text # # print(source) # base=etree.HTML(source).xpath('/html/body/section/aside/div[1]/i/@style') # px=[] # urls1='' # for i in base: # a='background-position:-(.*?)px;background-image: url((.*?));' # demo = re.compile(a) # 将用正则取出的字符串编译为字节代码。 # lists = demo.findall(i) # px.append(lists[0][0]) # urls1='http:'+lists[0][1]#验证图片链接 # print(px,urls1) # #获取超链接图片打码 # url = urls1 # """ 调用通用文字识别, 图片参数为远程url图片 """ # client.basicGeneralUrl(url); # """ 如果有可选参数 """ # options = {} # options["language_type"] = "CHN_ENG" # options["detect_direction"] = "true" # options["detect_language"] = "true" # options["probability"] = "true" # """ 带参数调用通用文字识别, 图片参数为远程url图片 """ # base=client.basicGeneralUrl(url, options) # print(base) # dicts={} # n=0 # b=31.24 # for i in range(10):#生成以图片位置坐标与数字下标对应的键值对 # dicts[str(n)]=i # n=round(n+b,2)#浮点型数据保留两位小数 # print(dicts) # index=[dicts[x] for x in px]#找出与图片获取到的位置相对应的下标 # print(index) # yzm=base['words_result'][0]['words']#打码出的内容 # jg=[yzm[x] for x in index ]#通过下标找出打码出的内容中对应的数 # jg=''.join(jg)#合并为字符串 # print(yzm) # print(jg)