滑动验证码

思路

通过对比两张图像素点,找出缺口的位置,得到需要移动的 X 坐标

  1. 获取两张图片
  2. 计算缺口的位置,得到 x 坐标
  3. 模拟滑动的轨迹
  4. 通过selenium模拟滑动的过程

案例一

# !/usr/bin/env python
# _*_ coding:utf-8 _*_
# author:满怀心 2019/8/20 12:26
"""
# code is far away from bugs with the god animal protecting
    I love animals. They taste delicious.
              ┏┓      ┏┓
            ┏┛┻━━━┛┻┓
            ┃      ☃      ┃
            ┃  ┳┛  ┗┳  ┃
            ┃      ┻      ┃
            ┗━┓      ┏━┛
                ┃      ┗━━━┓
                ┃  神兽保佑    ┣┓
                ┃ 永无BUG!   ┏┛
                ┗┓┓┏━┳┓┏┛
                  ┃┫┫  ┃┫┫
                  ┗┻┛  ┗┻┛
"""
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import requests
from PIL import Image
from io import BytesIO
import re
import time
import random


class HuaDonYanZhengMa(object):
    binary_location = r"D:\All_Work_App\Google\Google\Chrome\Application\chrome.exe"
    chromedriver_path = 'D:\Python\CREATE_PYTHON_ENV\Spider_env\chromedriver.exe'
    def __init__(self, url):
        self.url = url
        self.opt = webdriver.ChromeOptions()
        self.opt.binary_location = self.binary_location
        self.driver = webdriver.Chrome(executable_path=self.chromedriver_path, chrome_options=self.opt)
        self.wait = WebDriverWait(self.driver, timeout=10)
        self.driver.maximize_window()
        self.driver.get(self.url)

    # def click(self):
    #     submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'gt_slider_knob')))
    #     submit.click()


    def download_and_parse_img(self):
        self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'gt_slider_knob')))
        # time.sleep(2)
        background_img = self.driver.find_elements_by_xpath('//div[@class="gt_cut_fullbg_slice"]')
        location_img_list = []
        for img_data in background_img:
            # 解析没有阴影的图片
            location = {}
            img_data = img_data.get_attribute('style')
            img_url = re.findall(r'url\("(.*)"\);', img_data)[0]
            location['x'] = re.findall(r'background-position: (.*?)px (.*?)px;', img_data)[0][0]
            location['y'] = re.findall(r'background-position: (.*?)px (.*?)px;', img_data)[0][1]
            # print(location)
            location_img_list.append(location)

        response = requests.get(img_url).content
        img_content = BytesIO(response)
        img1 = self.join_img(img_content, location_img_list, 'gt_cut_fullbg_slice')


        location_img_shadow_list = []
        background_img_shadow = self.driver.find_elements_by_xpath('//div[@class="gt_cut_bg_slice"]')
        for img_data in background_img_shadow:
            # 解析有阴影的图片
            location = {}
            img_data = img_data.get_attribute('style')
            img_shadow_url = re.findall(r'url\("(.*)"\);', img_data)[0]
            location['x'] = re.findall(r'background-position: (.*?)px (.*?)px;', img_data)[0][0]
            location['y'] = re.findall(r'background-position: (.*?)px (.*?)px;', img_data)[0][1]
            # print(location)
            location_img_shadow_list.append(location)

        response = requests.get(img_shadow_url).content
        img_content = BytesIO(response)
        img2 = self.join_img(img_content, location_img_shadow_list, 'gt_cut_bg_slice')

        x = self.get_shadow_distance(img1, img2)
        return x

    def join_img(self, img_content, x_y_axis, img_name):
        """
        拼接图片,(0, 0)代表的是最左上角的点,向左和向下都加
        :param img_content:
        :param x_y_axis:
        :param img_name:
        :return:
        """
        img = Image.open(img_content)

        img_list_up = []
        img_list_down = []

        for x_y in x_y_axis:
            if int(x_y['y']) == -58:
                img_list_up.append(img.crop((abs(int(x_y['x'])), 58, abs(int(x_y['x']))+10, 116)))
            if int(x_y['y']) == 0:
                img_list_down.append(img.crop((abs(int(x_y['x'])), 0, abs(int(x_y['x']))+10, 58)))
        new_img = Image.new('RGB', (260, 116))

        x_offset = 0
        for im in img_list_up:
            # print('up', im)
            new_img.paste(im, (x_offset, 0))
            x_offset += 10

        x_offset = 0
        for im in img_list_down:
            # print('down', im)
            new_img.paste(im, (x_offset, 58))
            x_offset += 10

        new_img.save('{}.jpg'.format(img_name))
        return new_img

    def get_shadow_distance(self, img1, img2):
        """
        通过像素对比,找出缺口位置
        :param img1:
        :param img2:
        :return:
        """
        for x in range(1, 259):
            for y in range(1, 115):
                if self.is_similar(img1, img2, x, y) == False:
                    # 判断成立,表示x, y 两张图的像素点不一样
                    return x

    def is_similar(self, img1, img2, x, y):
        """
        获得 img1 和 img2 的每个点的像素(获得的时候是RGB模式),对比
        :param img1:
        :param img2:
        :param x:
        :param y:
        :return:
        """
        pixel1 = img1.getpixel((x, y))
        pixel2 = img2.getpixel((x, y))

        for i in range(0, 3):
            if abs(pixel1[i] - pixel2[i]) >= 50:
                return False
            return True

    def x_drag_and_drop(self, x):
        """点击拖拽元素"""
        elem = self.driver.find_element_by_class_name('gt_slider_knob')
        ActionChains(self.driver).click_and_hold(elem).perform()

        for x in self.get_tracks(x):
            ActionChains(self.driver).move_by_offset(xoffset=x, yoffset=0).perform()
        ActionChains(self.driver).release(elem).perform()


    def get_tracks(self,x):
        """
        拿到移动轨迹,模仿人的滑动行为,先匀加速后均减速
        匀变速运动基本公式:
        ①:v=v0+at
        ②:s=v0t+½at²
        ③:v²-v0²=2as
        :param distance:需要移动的距离
        :return:存放每0.3秒移动的距离
        """
        x += 20  # 先滑过一点,最后再反着滑动回来
        # 初速度
        v = 0
        # 单位时间为0.3s来统计轨迹,轨迹即0.3s内的位移
        t = 0.3
        # 位移/轨迹列表,列表内的一个元素代表0.3s的位移
        tracks = []
        # 当前位移
        current = 0
        # 到达mid值开始减速
        mid = x * 5/8
        # x = x + 10

        while current < x:
            if current < mid:
                # a = random.randint(1, 3)
                a = 2
                # 加速度越小,单位时间的位移越小,模拟的轨迹就越多越详细
            else:
                # a = random.randint(2, 4)
                a = -3
            # 初速度
            v0 = v
            # 0.3秒时间内的位移
            s = v0 * t + 0.5 * a * (t ** 2)
            # 当前的位置
            current += s
            # 添加到轨迹列表,round()为保留一位小数且该小数要进行四舍五入
            tracks.append(round(s))
            # 速度已经达到v,该速度作为下次的初速度
            v = v0 + a * t
        back_tracks = [-1, 2,- 1, 1, 2, -1, -1, -1, -1, -1, -1, 1, -1, 1, -1, -1, -1, -1, -2, 2, -1, -1 -2, -2, -2, -2, -1, -1, -1, -2, -1, -1, -1,-5, 1, 1, -2, 1, 2, 1, 1, 1, -1, 8, -2, -1, 1, -1, -1, -2, -1, -1]
        tracks.extend(back_tracks)
        return tracks


    def shutdown(self):
        time.sleep(5)
        self.driver.quit()

def main():
    HD = HuaDonYanZhengMa('http://www.cnbaowen.net/api/geetest/')
    # HD.click()
    count = 6
    while count > 0:
        x = HD.download_and_parse_img()
        HD.x_drag_and_drop(x)
        try:
            success = HD.driver.find_element_by_class_name('gt_success')
            print('匹配成功,即将关闭浏览器')
            break
        except Exception:
            if count == 1:
                print('匹配失败,即将关闭浏览器')
                break
            print('匹配失败,还有{}次机会'.format(count-1))
        time.sleep(1.5)
        count -= 1
    HD.shutdown()

if __name__ == '__main__':
    main()


"""
'''gt_cut_bg gt_show'''  有缺口
'''gt_cut_fullbg gt_show''' 没缺口
"""

案例二(bilibili)

# !/usr/bin/env python
# _*_ coding:utf-8 _*_
# author:满怀心 2019/8/20 12:26
"""
# code is far away from bugs with the god animal protecting
    I love animals. They taste delicious.
              ┏┓      ┏┓
            ┏┛┻━━━┛┻┓
            ┃      ☃      ┃
            ┃  ┳┛  ┗┳  ┃
            ┃      ┻      ┃
            ┗━┓      ┏━┛
                ┃      ┗━━━┓
                ┃  神兽保佑    ┣┓
                ┃ 永无BUG!   ┏┛
                ┗┓┓┏━┳┓┏┛
                  ┃┫┫  ┃┫┫
                  ┗┻┛  ┗┻┛
"""
import re
import time
import random
import requests
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC


class BiliBiliLogin(object):
    binary_location = r"D:\All_Work_App\Google\Google\Chrome\Application\chrome.exe"
    chromedriver_path = 'D:\Python\CREATE_PYTHON_ENV\Spider_env\chromedriver.exe'

    def __init__(self):
        self.opt = webdriver.ChromeOptions()
        self.opt.binary_location = self.binary_location
        self.driver = webdriver.Chrome(executable_path=self.chromedriver_path, chrome_options=self.opt)
        self.wait = WebDriverWait(self.driver, timeout=10)
        self.url = 'https://passport.bilibili.com/login'

    def input_info_click(self):
        """输入账号和密码"""
        self.driver.maximize_window()
        self.driver.get(self.url)
        username_input = self.wait.until(EC.presence_of_element_located((By.ID, 'login-username')))
        password_input = self.wait.until(EC.presence_of_element_located((By.ID, 'login-passwd')))
        submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'btn-login')))
        username_input.send_keys(username)
        password_input.send_keys(password)

        submit.click()
        time.sleep(1)
        try:
            self.driver.find_element_by_class_name('geetest_canvas_slice')
        except Exception:
            submit.click()


    def get_verify_img(self):
        """
        通过js改变标签的属性,使用selenium自带的截图工具截图,并用字节流把二进制数据储存起来用Img对象打开
        :return:
        """
        self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_canvas_slice')))
        # time.sleep(1.5)

        # 获得有缺口的图片
        js1 = 'var img_shadow = document.getElementsByClassName("geetest_canvas_slice");img_shadow[0].style="display: None";'
        self.driver.execute_script(js1)
        time.sleep(1.5)

        img = BytesIO(self.driver.find_element_by_class_name('geetest_canvas_bg').screenshot_as_png)
        img_shadow = Image.open(img)
        img_shadow.save('img_shadow.png')

        # 获得完整的图片
        js1 = 'var img_full = document.getElementsByClassName("geetest_canvas_fullbg");img_full[0].style="display: block";'
        self.driver.execute_script(js1)
        time.sleep(1.5)

        img = BytesIO(self.driver.find_element_by_class_name('geetest_canvas_bg').screenshot_as_png)
        img_full = Image.open(img)
        img_full.save('img_full.png')

        return (img_full, img_shadow)

    def get_x_offset(self, img_full, img_shadow):
        """
        传入有阴影和没有阴影的图片,通过px_dont_samilar方法对比像素,求出需要移动的轨迹
        :param img_full: 没有阴影的图片
        :param img_shadow: 有阴影的鱼片
        :return: 需要滑块移动的距离
        """
        weight, height = img_full.size
        for x in range(1, weight):
            for y in range(1, height):
                if self.px_is_not_samilar(img_full, img_shadow, x, y):
                    return x

    def px_is_not_samilar(self, img_full, img_shadow, x, y):
        """
        用来区别每个像素点是否相同
        :param img_full: 没有阴影的图片
        :param img_shadow: 有阴影的鱼片
        :param x: 要对比的x轴的点
        :param y: 要对比的y轴的点
        :return: True/False
        """
        img_full_px = img_full.getpixel((x, y))
        img_shadow_px = img_shadow.getpixel((x, y))

        for i in range(3):
            if abs(img_full_px[i] - img_shadow_px[i]) >= 50:
                return True
            return False

    def move_x_offset(self, x, change):
        elem = self.driver.find_element_by_class_name('geetest_slider_button')
        ActionChains(self.driver).click_and_hold(elem).perform()

        for x in self.get_tracks(x, change):
            ActionChains(self.driver).move_by_offset(xoffset=x, yoffset=0).perform()
        ActionChains(self.driver).release(elem).perform()

    def get_tracks(self,x, change):
        """让滑块图片恢复原状,在计算滑块轨迹"""
        js1 = 'var img_shadow = document.getElementsByClassName("geetest_canvas_slice");img_shadow[0].style="display: Block";'
        self.driver.execute_script(js1)
        time.sleep(1.5)

        js1 = 'var img_full = document.getElementsByClassName("geetest_canvas_fullbg");img_full[0].style="display: None";'
        self.driver.execute_script(js1)

        '''
        拿到移动轨迹,模仿人的滑动行为,先匀加速后匀减速
        匀变速运动基本公式:
        ①v=v0+at
        ②s=v0t+(1/2)at²
        ③v²-v0²=2as

        :param distance: 需要移动的距离
        :return: 存放每0.2秒移动的距离
        '''
        # 初速度
        v = 0
        # 单位时间为0.2s来统计轨迹,轨迹即0.2内的位移
        t = 0.1
        # 位移/轨迹列表,列表内的一个元素代表0.2s的位移
        tracks = []
        # 当前的位移
        current = 0
        # 到达mid值开始减速
        mid = x * 4 / 5

        x += 10  # 先滑过一点,最后再反着滑动回来

        while current < x:
            if current < mid:
                # 加速度越小,单位时间的位移越小,模拟的轨迹就越多越详细
                a = 2  # 加速运动
            else:
                a = -3  # 减速运动

            # 初速度
            v0 = v
            # 0.2秒时间内的位移
            s = v0 * t + 0.5 * a * (t ** 2)
            # 当前的位置
            current += s
            # 添加到轨迹列表
            tracks.append(round(s))

            # 速度已经达到v,该速度作为下次的初速度
            v = v0 + a * t

        # 反着滑动到大概准确位置
        for i in range(3):
            tracks.append(-2)
        for i in range(4):
            tracks.append(-1)
        tracks.extend([-1 for _ in range(change)])
        return tracks

    def main(self, username, password):
        self.input_info_click()
        img_full, img_shadow = self.get_verify_img()
        print(img_full.size)
        print(img_shadow.size)
        x_offset = self.get_x_offset(img_full, img_shadow)
        print(x_offset)
        for i in range(3, 10, 2):
            try:
                self.move_x_offset(x_offset, i)
                time.sleep(3)
            except Exception:
                pass

            try:
                self.driver.find_element_by_xpath('//a[@class="link-ranking"]')
                print('登录成功')
            except Exception:
                pass


if __name__ == '__main__':
    username = 'xxxxxxxxxx'
    password = 'xxxxxxxxxx'
    bilibili = BiliBiliLogin()
    bilibili.main(username, password)

升级版本(获取cookies)

# !/usr/bin/env python
# _*_ coding:utf-8 _*_
# author:满怀心 2019/8/20 12:26
"""
# code is far away from bugs with the god animal protecting
    I love animals. They taste delicious.
              ┏┓      ┏┓
            ┏┛┻━━━┛┻┓
            ┃      ☃      ┃
            ┃  ┳┛  ┗┳  ┃
            ┃      ┻      ┃
            ┗━┓      ┏━┛
                ┃      ┗━━━┓
                ┃  神兽保佑    ┣┓
                ┃ 永无BUG!   ┏┛
                ┗┓┓┏━┳┓┏┛
                  ┃┫┫  ┃┫┫
                  ┗┻┛  ┗┻┛
"""
import re
import time
import random
import requests
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC


class BiliBiliLogin(object):
    binary_location = r"D:\All_Work_App\Google\Google\Chrome\Application\chrome.exe"
    chromedriver_path = 'D:\Python\CREATE_PYTHON_ENV\Spider_env\chromedriver.exe'

    def __init__(self):
        self.opt = webdriver.ChromeOptions()
        self.opt.binary_location = self.binary_location
        self.driver = webdriver.Chrome(executable_path=self.chromedriver_path, chrome_options=self.opt)
        self.wait = WebDriverWait(self.driver, timeout=10)
        self.url = 'https://passport.bilibili.com/login'

    def input_info_click(self):
        """输入账号和密码"""
        self.driver.maximize_window()
        self.driver.get(self.url)
        username_input = self.wait.until(EC.presence_of_element_located((By.ID, 'login-username')))
        password_input = self.wait.until(EC.presence_of_element_located((By.ID, 'login-passwd')))
        submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'btn-login')))
        username_input.send_keys(username)
        password_input.send_keys(password)

        submit.click()
        time.sleep(1)
        try:
            self.driver.find_element_by_class_name('geetest_canvas_slice')
        except Exception:
            submit.click()


    def get_verify_img(self):
        """
        通过js改变标签的属性,使用selenium自带的截图工具截图,并用字节流把二进制数据储存起来用Img对象打开
        :return:
        """
        self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_canvas_slice')))
        # time.sleep(1.5)

        # 获得有缺口的图片
        js1 = 'var img_shadow = document.getElementsByClassName("geetest_canvas_slice");img_shadow[0].style="display: None";'
        self.driver.execute_script(js1)
        time.sleep(1.5)

        img = BytesIO(self.driver.find_element_by_class_name('geetest_canvas_bg').screenshot_as_png)
        img_shadow = Image.open(img)
        img_shadow.save('img_shadow.png')

        # 获得完整的图片
        js1 = 'var img_full = document.getElementsByClassName("geetest_canvas_fullbg");img_full[0].style="display: block";'
        self.driver.execute_script(js1)
        time.sleep(1.5)

        img = BytesIO(self.driver.find_element_by_class_name('geetest_canvas_bg').screenshot_as_png)
        img_full = Image.open(img)
        img_full.save('img_full.png')

        return (img_full, img_shadow)

    def get_x_offset(self, img_full, img_shadow):
        """
        传入有阴影和没有阴影的图片,通过px_dont_samilar方法对比像素,求出需要移动的轨迹
        :param img_full: 没有阴影的图片
        :param img_shadow: 有阴影的鱼片
        :return: 需要滑块移动的距离
        """
        weight, height = img_full.size
        for x in range(1, weight):
            for y in range(1, height):
                if self.px_is_not_samilar(img_full, img_shadow, x, y):
                    return x

    def px_is_not_samilar(self, img_full, img_shadow, x, y):
        """
        用来区别每个像素点是否相同
        :param img_full: 没有阴影的图片
        :param img_shadow: 有阴影的鱼片
        :param x: 要对比的x轴的点
        :param y: 要对比的y轴的点
        :return: True/False
        """
        img_full_px = img_full.getpixel((x, y))
        img_shadow_px = img_shadow.getpixel((x, y))

        for i in range(3):
            if abs(img_full_px[i] - img_shadow_px[i]) >= 50:
                return True
            return False

    def move_x_offset(self, x, change):
        """
        按住滑块进行平移
        :param x: 需要滑的x轴的距离
        :param change: 
        :return: 
        """
        elem = self.driver.find_element_by_class_name('geetest_slider_button')
        ActionChains(self.driver).click_and_hold(elem).perform()

        for x in self.get_tracks(x, change):
            ActionChains(self.driver).move_by_offset(xoffset=x, yoffset=0).perform()
        ActionChains(self.driver).release(elem).perform()

    def get_tracks(self,x, change):
        """
        先让滑块图片恢复原状,在计算滑块的轨迹,根据change进行微调
        :param x: 需要滑动的x轴的距离
        :param change: 一共4次机会,每次滑动位置不对之后传入的微调距离的参数
        :return:
        """
        js1 = 'var img_shadow = document.getElementsByClassName("geetest_canvas_slice");img_shadow[0].style="display: Block";'
        self.driver.execute_script(js1)
        time.sleep(1.5)

        js1 = 'var img_full = document.getElementsByClassName("geetest_canvas_fullbg");img_full[0].style="display: None";'
        self.driver.execute_script(js1)

        '''
        拿到移动轨迹,模仿人的滑动行为,先匀加速后匀减速
        匀变速运动基本公式:
        ①v=v0+at
        ②s=v0t+(1/2)at²
        ③v²-v0²=2as

        :param distance: 需要移动的距离
        :return: 存放每0.2秒移动的距离
        '''
        # 初速度
        v = 0
        # 单位时间为0.2s来统计轨迹,轨迹即0.2内的位移
        t = 0.1
        # 位移/轨迹列表,列表内的一个元素代表0.2s的位移
        tracks = []
        # 当前的位移
        current = 0
        # 到达mid值开始减速
        mid = x * 4 / 5

        x += 10  # 先滑过一点,最后再反着滑动回来

        while current < x:
            if current < mid:
                # 加速度越小,单位时间的位移越小,模拟的轨迹就越多越详细
                a = 2  # 加速运动
            else:
                a = -3  # 减速运动

            # 初速度
            v0 = v
            # 0.2秒时间内的位移
            s = v0 * t + 0.5 * a * (t ** 2)
            # 当前的位置
            current += s
            # 添加到轨迹列表
            tracks.append(round(s))

            # 速度已经达到v,该速度作为下次的初速度
            v = v0 + a * t

        # 反着滑动到大概准确位置
        for i in range(3):
            tracks.append(-2)
        for i in range(4):
            tracks.append(-1)
        tracks.extend([-1 for _ in range(change)])
        return tracks

    def get_cookies(self):
        """
        获取cookies
        :return: cookies
        """
        cookies = {}
        for i in self.driver.get_cookies():
            cookies[i['name']] = i['value']
        return cookies

    def main(self, username, password):
        """
        主函数,负责整个互动验证码代码的运行,
        :param username: 用户名
        :param password: 密码
        :return: 
        """
        self.input_info_click()
        img_full, img_shadow = self.get_verify_img()
        print(img_full.size)    # 完整图片的大小
        print(img_shadow.size)  # 带有阴影图片的大小
        x_offset = self.get_x_offset(img_full, img_shadow)  # 需要移动的x轴的距离
        print(x_offset)
        for i in range(3, 10, 2):   # 一共滑动四次机会,每次失败滑块轨迹减少一点
            try:
                self.move_x_offset(x_offset, i)
                time.sleep(3)
            except Exception:
                pass

            try:
                self.driver.find_element_by_xpath('//a[@class="link-ranking"]')
                print('登录成功')
                cookies = self.get_cookies()    # 获取登录之后的cookies
                return cookies
            except Exception:
                pass


if __name__ == '__main__':
    username = 'xxxxxxxxxx'
    password = 'xxxxxxxxxx'
    bilibili = BiliBiliLogin()
    cookies = bilibili.main(username, password)
    print(cookies)

案例三(豆瓣)

import re
import time
import random
import requests
from io import BytesIO
from lxml import etree
from PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC


class DouBanSpider:
    binary_location = r"D:\All_Work_App\Google\Google\Chrome\Application\chrome.exe"
    chromedriver_path = 'D:\Python\CREATE_PYTHON_ENV\Spider_env\chromedriver.exe'

    def __init__(self, username, password):
        self.username = username
        self.password = password
        self.opt = webdriver.ChromeOptions()
        self.opt.binary_location = self.binary_location
        self.driver = webdriver.Chrome(executable_path=self.chromedriver_path, chrome_options=self.opt)
        self.wait = WebDriverWait(self.driver, timeout=10)
        self.url = 'https://www.douban.com/'

    def get_login_form(self, page_sourse):
        tree = etree.HTML(page_sourse)
        log_form_url ='https:' + tree.xpath('//div[@class="login"]/iframe/@src')[0]
        self.driver.get(log_form_url)

    def find_username_password(self):
        self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'account-tab-account'))).click()
        username = self.wait.until(EC.element_to_be_clickable((By.ID, 'username')))
        password = self.wait.until(EC.element_to_be_clickable((By.ID, 'password')))

        username.send_keys(self.username)
        password.send_keys(self.password)

        submit = self.wait.until(EC.element_to_be_clickable((By.XPATH, '//a[@class="btn btn-account btn-active"]')))
        submit.click()

        self.wait.until(EC.element_to_be_clickable((By.XPATH, '//div[@class="aside"]/a'))).click()

        cookies = self.get_cookies()
        return cookies

    def get_cookies(self):
        """
        获取cookies
        :return: cookies
        """
        cookies = {}
        for i in self.driver.get_cookies():
            cookies[i['name']] = i['value']
        return cookies


    def main(self):
        self.driver.get(self.url)
        self.get_login_form(self.driver.page_source)
        cookies = self.find_username_password()
        print(cookies)


if __name__ == '__main__':
    username = 'xxxxxxxxxx'
    password = 'xxxxxxxxxx'
    douban = DouBanSpider(username, password)
    douban.main()
Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐