python爬虫--------处理极验验证(滑块拼图验证)bilibili模式

软件发布|下载排行|最新软件

当前位置:首页IT学院IT技术

python爬虫--------处理极验验证(滑块拼图验证)bilibili模式

King~~~   2019-12-23 我要评论
from selenium import webdriver
from PIL import Image
import time
import random
from selenium.webdriver import ActionChains
import cv2
from matplotlib import pyplot as plt





class slide():
    #初始化
    def __init__(self):
        self.driver = webdriver.Chrome(executable_path='..') #自己的驱动地址
        self.driver.maximize_window()
        self.trance =0
        self.driver.get("http://passport.bilibili.com/login")
        self.driver.find_element_by_id('login-username').send_keys('....')#用户名
        self.driver.find_element_by_id('login-passwd').send_keys('....')#密码
        self.driver.find_element_by_class_name('btn-login').click()
        print('login..........')
        time.sleep(3)

    #有完整背景图的网页截图
    def before_deal_image(self):
        self.js1 = 'document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display="block"' #获得
        self.driver.execute_script(self.js1)
        time.sleep(2)

        self.location = self.driver.find_element_by_class_name('geetest_widget').location
        self.size = self.driver.find_element_by_class_name('geetest_widget').size


        time.sleep(2)
        self.driver.save_screenshot('capture1.png')  # 截取全屏

        self.left = self.location['x'] + 220  # 后面的数字自己调节控制截图包含验证码图片
        self.top = self.location['y'] + 57
        self.right = self.location['x'] + self.size['width'] + 295
        self.bottom = self.location['y'] + self.size['height'] + 113

        # print(self.location, self.size)
        self.im = Image.open('capture1.png')
        self.im = self.im.crop((self.left, self.top, self.right, self.bottom))
        self.im.save('ele_capture1.png')

    # 滑块移动
    def slide(self,num):

        self.num=num
        print('滑块应该移动距离------------->',self.num)

        self.button = self.driver.find_element_by_class_name('geetest_slider_button')
     #处理人机行为 
        first_distance = self.num/4*3
        second_distance = self.num-first_distance-3
        third_distance = self.num-second_distance-first_distance
        ActionChains(self.driver).click_and_hold(self.button).move_by_offset(first_distance,random.random()).perform()
        time.sleep(0.5)
        ActionChains(self.driver).click_and_hold(self.button).move_by_offset(second_distance,random.random()).perform()
        time.sleep(0.9)
        ActionChains(self.driver).click_and_hold(self.button).move_by_offset(third_distance,random.random()).release().perform()
     

    # 有缺口背景图的网页截图
    def after_deal_image(self):
        self.js2 = 'document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display="none"'
        self.driver.execute_script(self.js2)



        self.driver.save_screenshot('capture2.png') #整张网页图

        self.left = self.location['x'] + 220#与上面相同
        self.top = self.location['y'] + 57
        self.right = self.location['x'] + self.size['width'] + 295
        self.bottom = self.location['y'] + self.size['height'] + 113

        # print(self.location, self.size)
        self.im = Image.open('capture2.png')
        self.im = self.im.crop((self.left, self.top, self.right, self.bottom))
        self.im.save('ele_capture2.png')  #元素切割图






    #---------------------获得移动距离-----------------#

    #获得滑动距离
    def slide_distance(self,image1, image2):
        cut_image = Image.open(image2)
        full_image = Image.open(image1)
        threshold = 86  # 边界值颜色都为85,85,85

        for i in range(75, cut_image.size[0]):
            for j in range(0, cut_image.size[1]):
                pixel1 = cut_image.getpixel((i, j))
                pixel2 = full_image.getpixel((i, j))
                res_R = abs(pixel1[0] - pixel2[0])  # 计算RGB差
                res_G = abs(pixel1[1] - pixel2[1])  # 计算RGB差
                res_B = abs(pixel1[2] - pixel2[2])  # 计算RGB差

                if res_R > threshold and res_G > threshold and res_B > threshold:
                    print(i - 7)
                    return i - 7




    #-------------------------------------处理相似度问题--------------------------------#
    # size=(256, 256)
    def classify_gray_hist(self,image1, image2, size=(328, 211)): #截图大小
        # 先计算直方图
        # 几个参数必须用方括号括起来
        # 这里直接用灰度图计算直方图,所以是使用第一个通道,
        # 也可以进行通道分离后,得到多个通道的直方图
        # bins 取为16
        image1 = cv2.resize(image1, size)
        image2 = cv2.resize(image2, size)
        hist1 = cv2.calcHist([image1], [0], None, [256], [0.0, 255.0]) #缺口背景图
        hist2 = cv2.calcHist([image2], [0], None, [256], [0.0, 255.0]) #完整背景图
        # 可以比较下直方图
        # plt.plot(range(256),hist1,'r')
        # plt.plot(range(256),hist2,'b')
        # plt.show()
        # 计算直方图的重合度
        degree = 0
        for i in range(75,len(hist1)):
            if hist1[i] != hist2[i]:
                degree = degree + (1 - abs(hist1[i] - hist2[i]) / max(hist1[i], hist2[i]))
        else:
            degree = degree + 1
        degree = degree / len(hist1)
        return degree

    # -------------------------------------获得相似度-------------------------------#
    def run0(self,image1, image2):
        img1 = cv2.imread(image1)
        # cv2.imshow('img1', img1)
        img2 = cv2.imread(image2)
        # cv2.imshow('img2', img2)
        degree = self.classify_gray_hist(img1, img2)

        print('两张图片相似度为。。。。。',int(100 * degree[0])) #这是完整背景图和验证码图的相似度
        cv2.waitKey(0)
        return int(degree[0] * 100)





    # 函数的启动
    def run(self):
        try:
            self.before_deal_image()
            self.after_deal_image()
            self.num = self.slide_distance('ele_capture1.png', 'ele_capture2.png')
            print('原始距离--------->',self.num)
            # 'ele_capture1.png', 'ele_capture2.png'
            self.image1 = 'ele_capture1.png'
            self.image2 = 'ele_capture2.png'
            self.result = self.run0(self.image1, self.image2)
            real_distance = self.num*(262/328) #这是实际截图大长度和验证码实际图片的的长度,自己更改
            print('按照图像大小比列计算实际移动距离',real_distance)
            self.slide(real_distance)
            time.sleep(6)
        except:
            print('login_out..............')
            time.sleep(7)
        finally:
            self.driver.quit()
            print('程序运行结束')



slide().run()

  

 

 

   

  透明度0 和1 的色差85,86左右徘徊,R,G,B三值都相等,均为左边的85,86,因此。阀值86,85均可,即上面函数的 threshold,主要的难点就是缺口位置的查找。故此需要一个函数来测试是否符合下面介绍。

import cv2
from PIL import Image



def get_distance(cut_image, full_image):

    cut_image = Image.open(cut_image) #缺口背景图
    full_image = Image.open(full_image) #完整背景图
    threshold = 86     #灰度值正好为86,86,86 这个是透明度的差值,边界值像素的RGB中的B值为准


    for i in range(75, cut_image.size[0]):  #75为滑块的截图最右边阴影到图片最左端的长度
        for j in range(0, cut_image.size[1]):
            pixel1 = cut_image.getpixel((i, j))
            pixel2 = full_image.getpixel((i, j))
            res_R = abs(pixel1[0] - pixel2[0])  # 计算RGB差
            res_G = abs(pixel1[1] - pixel2[1])  # 计算RGB差
            res_B = abs(pixel1[2] - pixel2[2])  # 计算RGB差

            if res_R > threshold and res_G > threshold and res_B > threshold:
                print(i-7)
                return i-7

get_distance('ele_capture2.png','ele_capture1.png')

#'ele_capture2.png','ele_capture1.png'(缺口背景图,完整背景图)

  

 

  执行完第一个类,如果没登陆成功,就执行第上面这个,更改threshold ,并且将第一个类中的slide_distance()函数替换,上面函数中75均为为滑块的截图最右边阴影到图片最左端的长度,这个得自己修改。

 

 

  

  还有另一个方法就是移动滑块进行截图,86的色差变小,然后移动距离变小,移动一次进行图片相似度处理,与背景完全图相似度90%之上后将移动的距离保留,进行行为处理,模拟人的移动方法,然后移动滑块,也可以成功,这需要图像算法,我是不会,但我找到资料后测试过,也能通过验证。慢,但是准确度100%。

  

  如果都能成功,就可以将driver设置为无头模式,让selenium不在界面上显示。

 

 

  后续会出更好的爬虫博文。

  喜欢就点个赞,萌萌哒。

Copyright 2022 版权所有 软件发布 访问手机版

声明:所有软件和文章来自软件开发商或者作者 如有异议 请与本站联系 联系我们