geetest拼图破解:
1、考虑到每个网站的拼图数量很少,可以先把他们下下来,后面在通过RGB来判断差异,来找出需要移动到的位置
2、通过python + selenium来控制页面的操控页面的元素,是的拼图可以被移动
3、这里先直接贴上一段代码:
鸣谢:https://blog.csdn.net/qq_38685503/article/details/81187105
1 from selenium import webdriver 2 from selenium.webdriver.common.action_chains import ActionChains 3 from selenium.webdriver.support.ui import WebDriverWait 4 from selenium.webdriver.support import expected_conditions as EC 5 import time, random, os 6 from PIL import Image 7 8 base_url = "http://www.gsxt.gov.cn" 9 img_paths = "E:/DateSet/geetest/gsxt" 10 img_path = 'E:DateSetgeetestwaitquekou.png' 11 img_check_now = 'E:DateSetgeetestentercheck_now.png' 12 img_check = 'E:DateSetgeetestentercheck.png' 13 img_result = 'E:/DateSet/geetest/result/result.png' 14 js = "var q=document.documentElement.scrollTop=180" 15 16 # 访问国家企业信用信息公示系统 17 driver = webdriver.Chrome() 18 # driver.implicitly_wait(40) 19 driver.get(base_url) 20 driver.maximize_window() 21 22 23 # 判断元素是否存在,通过ID查询 24 def is_exist_by_id(element): 25 try: 26 driver.find_element_by_id(element) 27 return True 28 except: 29 return False 30 31 32 # 判断元素是否存在,通过Class查询 33 def is_exist_by_class(element): 34 try: 35 driver.find_element_by_class_name(element) 36 return True 37 except: 38 return False 39 40 41 # 判断元素是否存在,通过xpath查询 42 def is_exist_by_xpath(element): 43 try: 44 driver.find_element_by_xpath(element) 45 return True 46 except: 47 return False 48 49 50 while True: 51 if is_exist_by_id("keyword") and is_exist_by_id("keyword"): 52 break 53 time.sleep(0.5) 54 55 driver.find_element_by_id("keyword").clear() 56 driver.find_element_by_id("keyword").send_keys("腾讯") 57 time.sleep(1) 58 driver.find_element_by_id("btn_query").click() 59 time.sleep(1) 60 61 # 得到滑动小块在最右边的截图 62 while True: 63 if is_exist_by_class("geetest_slider_button"): 64 break 65 time.sleep(0.5) 66 slide_block = driver.find_element_by_class_name('geetest_slider_button') 67 ActionChains(driver).click_and_hold(slide_block).perform() 68 ActionChains(driver).move_by_offset(xoffset=200, yoffset=0).perform() 69 driver.save_screenshot(img_path) 70 71 72 # 得到匹配的原图 73 def match_source(image): 74 img_list = [] 75 file_dir = os.listdir(img_paths) 76 for file_name in file_dir: 77 file_path_temp = img_paths + '\' + file_name 78 img_list.append(Image.open(file_path_temp)) 79 # pingtu 557 * 176 80 pixel_image = image.getpixel((557, 176)) 81 for img in img_list: 82 # source 557 * 290 83 pixel_img = img.getpixel((557, 290)) 84 if abs(pixel_image[0] - pixel_img[0]) < 5: 85 return img 86 return image 87 88 89 # 判断是否是相似的位置 90 def is_similar(source_img, quekou_img, x, y): 91 pixel1 = source_img.getpixel((x, y + 114)) 92 pixel2 = quekou_img.getpixel((x, y)) 93 if abs(pixel1[0] - pixel2[0]) >= 70 and abs(pixel1[1] - pixel2[1]) >= 70 and abs(pixel1[2] - pixel2[2]) >= 70: 94 return False 95 return True 96 97 98 # 计算滑块位移距离 99 def get_diff_location(source_img, quekou_img): 100 for i in range(536, 803): 101 for j in range(164, 333): 102 if not is_similar(source_img, quekou_img, i, j): 103 return i 104 return -1 105 106 107 #滑块移动轨迹 108 def get_track(distance): 109 track = [distance] 110 # current = 0 111 # mid = distance * 7 / 8 112 # t = random.randint(2, 3) / 10 113 # v = 0 114 # while current < distance: 115 # if current < mid: 116 # a = 2 117 # else: 118 # a = -3 119 # v0 = v 120 # v = v0 + a * t 121 # move = v0 * t + 1 / 2 * a * t * t 122 # current += move 123 # track.append(round(move)) 124 return track 125 126 127 ActionChains(driver).release(slide_block).perform() 128 quekou_img = Image.open(img_path) 129 source_img = match_source(quekou_img) 130 131 # h_dis distance to left 132 h_dis = get_diff_location(source_img, quekou_img) 133 134 135 # 模拟人工移动 136 def imitate(distance): 137 track_list = get_track(distance) 138 time.sleep(2) 139 ActionChains(driver).click_and_hold(slide_block).perform() 140 time.sleep(0.02) 141 for track in track_list: 142 ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform() 143 imitate = ActionChains(driver).move_by_offset(xoffset=-1, yoffset=0) 144 time.sleep(0.015) 145 imitate.perform() 146 time.sleep(random.randint(5, 6)/10) 147 imitate.perform() 148 time.sleep(0.04) 149 imitate.perform() 150 time.sleep(0.012) 151 ActionChains(driver).move_by_offset(xoffset=1, yoffset=0).perform() 152 ActionChains(driver).pause(random.randint(1, 2)/10).release(slide_block).perform() 153 time.sleep(2) 154 155 156 imitate(h_dis - 552) 157 while True: 158 if is_exist_by_xpath("//*[@id='advs']/div/div[2]/a[1]"): 159 break 160 time.sleep(0.5) 161 162 # 通过xpath定位到第一条信息 163 driver.find_element_by_xpath("//*[@id='advs']/div/div[2]/a[1]").click() 164 # time.sleep(7) 165 166 167 windows = driver.window_handles 168 driver.switch_to.window(windows[-1]) 169 driver.execute_script(js) 170 driver.save_screenshot(img_result) 171 172 173 # driver.close()