Little white learning crawler - set Selenium+Chrome agent

Micro blog login limits the number of errors... In addition, a large number of Cookie accounts are blocked and need to be removed from the Cookie pool... Need to use a proxy... Rogue Baidu has been special for most of the day??? It turns out that Google can solve the problem in minutes? What else can baidu do besides sell fake medicine

The Selenium+Chrome authentication agent cannot be processed through options. It can only be solved by extension in another way

Original address: https://stackoverflow.com/questions/29983106/how-can-i-set-proxy-with-authentication-in-selenium-chrome-web-driver-using-pythą·answer-30953780 (Stack Overflow is a good place)

# -*- coding: utf-8 -*-
# @Time    : 2017/11/15 9:50
# @Author  : Ouch, lying trough
# @Site    : 
# @File    : pubilc.py
# @Software: PyCharm

import string
import zipfile

def create_proxyauth_extension(proxy_host, proxy_port,
                               proxy_username, proxy_password,
                               scheme='http', plugin_path=None):
    """Proxy authentication plug-in

    args:
        proxy_host (str): Your proxy address or domain name( str Type)
        proxy_port (int): Agent port number( int Type)
        proxy_username (str):User name (string)
        proxy_password (str): Password (string)
    kwargs:
        scheme (str): Agent mode default http
        plugin_path (str): Absolute path of extension

    return str -> plugin_path
    """
    

    if plugin_path is None:
        plugin_path = 'vimm_chrome_proxyauth_plugin.zip'

    manifest_json = """
    {
        "version": "1.0.0",
        "manifest_version": 2,
        "name": "Chrome Proxy",
        "permissions": [
            "proxy",
            "tabs",
            "unlimitedStorage",
            "storage",
            "<all_urls>",
            "webRequest",
            "webRequestBlocking"
        ],
        "background": {
            "scripts": ["background.js"]
        },
        "minimum_chrome_version":"22.0.0"
    }
    """

    background_js = string.Template(
    """
    var config = {
            mode: "fixed_servers",
            rules: {
              singleProxy: {
                scheme: "${scheme}",
                host: "${host}",
                port: parseInt(${port})
              },
              bypassList: ["foobar.com"]
            }
          };

    chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

    function callbackFn(details) {
        return {
            authCredentials: {
                username: "${username}",
                password: "${password}"
            }
        };
    }

    chrome.webRequest.onAuthRequired.addListener(
                callbackFn,
                {urls: ["<all_urls>"]},
                ['blocking']
    );
    """
    ).substitute(
        host=proxy_host,
        port=proxy_port,
        username=proxy_username,
        password=proxy_password,
        scheme=scheme,
    )
    with zipfile.ZipFile(plugin_path, 'w') as zp:
        zp.writestr("manifest.json", manifest_json)
        zp.writestr("background.js", background_js)

    return plugin_path

usage method:

from selenium import webdriver  
from common.pubilc import create_proxyauth_extension  
  
proxyauth_plugin_path = create_proxyauth_extension(  
    proxy_host="XXXXX.com",  
    proxy_port=9020,  
    proxy_username="XXXXXXX",  
    proxy_password="XXXXXXX"  
)  
  
  
co = webdriver.ChromeOptions()  
# co.add_argument("--start-maximized")  
co.add_extension(proxyauth_plugin_path)  
  
  
driver = webdriver.Chrome(executable_path="C:\chromedriver.exe", chrome_options=co)  
driver.get("http://ip138.com/")  
print(driver.page_source)  

No authentication agent:

options = webdriver.ChromeOptions()  
options.add_argument('--proxy-server=http://ip:port')    
driver = webdriver.Chrome(executable_path="C:\chromedriver.exe", chrome_options=0ptions)  
driver.get("http://ip138.com/")  
print(driver.page_source)  

Tags: Python Selenium Google Pycharm JSON

Posted on Sat, 09 May 2020 11:39:48 -0400 by gethinw