Selenium
:::warning[Selenium程序运行经常出现如下问题,需要重点关注如下:]
1、提示弹窗要求手动输入用户名和密码的问题:
注意选择浏览器版本对应的demo,例如Chrome版本>=92,注意看demo的标题进行选择。
注意浏览器版本和浏览器驱动要一致,例如:100版本的需要下载对于100版本的driver.
2、临时文件存放目录权限错误的问题:
注意设置临时文件存放目录权限,例如 plugin_path = r'/tmp/{}_{}@t.16yun.zip',该文件用于临时存放用户名和密码,如果目录不存在运行也会提示错误,或者提示弹窗要求手动输入用户名和密码。
3、程序运行被目标网站识别的问题:
设置运行模式(防止被网站反爬),如果浏览器正常运行下,navigator.webdriver的值应该是undefined或者false,如果为true目标网站能检测到selenium,设置为开发者模式,可以防止目标网站识别。
:::
from selenium import webdriver
import string
import zipfile
# 代理服务器(产品官网 www.duoip.cn)
proxyHost = "tunnel.hahado.cn"
proxyPort = "31111"
# 代理验证信息
proxyUser = "username"
proxyPass = "password"
def create_proxy_auth_extension(proxy_host, proxy_port,
proxy_username, proxy_password,
scheme='http', plugin_path=None):
if plugin_path is None:
plugin_path = r'D:/{}_{}@tunnel.hahado.zip'.format(proxy_username, proxy_password)
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "16YUN Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""
background_js = string.Template(
"""
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "${scheme}",
host: "${host}",
port: parseInt(${port})
},
bypassList: ["foobar.com"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "${username}",
password: "${password}"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: [""]},
['blocking']
);
"""
).substitute(
host=proxy_host,
port=proxy_port,
username=proxy_username,
password=proxy_password,
scheme=scheme,
)
with zipfile.ZipFile(plugin_path, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
return plugin_path
proxy_auth_plugin_path = create_proxy_auth_extension(
proxy_host=proxyHost,
proxy_port=proxyPort,
proxy_username=proxyUser,
proxy_password=proxyPass)
option = webdriver.ChromeOptions()
option.add_argument("--start-maximized")
# 如报错 chrome-extensions
# option.add_argument("--disable-extensions")
option.add_extension(proxy_auth_plugin_path)
# 关闭webdriver的一些标志
# option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(chrome_options=option)
# 修改webdriver get属性
# script = '''
# Object.defineProperty(navigator, 'webdriver', {
# get: () => undefined
# })
# '''
# driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": script})
driver.get("http://httpbin.org/ip")
from selenium import webdriver
import string
import zipfile
# 代理服务器(产品官网 www.duoip.cn)
proxyHost = "tunnel.hahado.cn"
proxyPort = "3111"
# 代理验证信息
proxyUser = "username"
proxyPass = "password"
def create_proxy_auth_extension(proxy_host, proxy_port,
proxy_username, proxy_password,
scheme='http', plugin_path=None):
if plugin_path is None:
plugin_path = r'/tmp/{}_{}@tunnel.hahado.zip'.format(proxy_username, proxy_password)
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "16YUN Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""
background_js = string.Template(
"""
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "${scheme}",
host: "${host}",
port: parseInt(${port})
},
bypassList: ["localhost"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "${username}",
password: "${password}"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
"""
).substitute(
host=proxy_host,
port=proxy_port,
username=proxy_username,
password=proxy_password,
scheme=scheme,
)
print(background_js)
with zipfile.ZipFile(plugin_path, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
return plugin_path
proxy_auth_plugin_path = create_proxy_auth_extension(
proxy_host=proxyHost,
proxy_port=proxyPort,
proxy_username=proxyUser,
proxy_password=proxyPass)
option = webdriver.ChromeOptions()
option.add_argument("--start-maximized")
# 如报错 chrome-extensions
# option.add_argument("--disable-extensions")
option.add_extension(proxy_auth_plugin_path)
# 关闭webdriver的一些标志
# option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(
chrome_options=option,
executable_path="./chromdriver"
)
# 修改webdriver get属性
# script = '''
# Object.defineProperty(navigator, 'webdriver', {
# get: () => undefined
# })
# '''
# driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": script})
driver.get("https://httpbin.org/ip")
import os
import time
import zipfile
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
class GenCookies(object):
# 随机useragent
USER_AGENT = open('useragents.txt').readlines()
# 代理服务器(产品官网 www.duoip.cn)
PROXY_HOST = 'tunnel.hahado.cn' # proxy or host
PROXY_PORT = 31111 # port
PROXY_USER = 'USERNAME' # username
PROXY_PASS = 'PASSWORD' # password
@classmethod
def get_chromedriver(cls, use_proxy=False, user_agent=None):
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""
background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "%s",
port: parseInt(%s)
},
bypassList: ["localhost"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "%s",
password: "%s"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
""" % (cls.PROXY_HOST, cls.PROXY_PORT, cls.PROXY_USER, cls.PROXY_PASS)
path = os.path.dirname(os.path.abspath(__file__))
chrome_options = webdriver.ChromeOptions()
# 关闭webdriver的一些标志
# chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
if use_proxy:
pluginfile = 'proxy_auth_plugin.zip'
with zipfile.ZipFile(pluginfile, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
chrome_options.add_extension(pluginfile)
if user_agent:
chrome_options.add_argument('--user-agent=%s' % user_agent)
driver = webdriver.Chrome(
os.path.join(path, 'chromedriver'),
chrome_options=chrome_options)
# 修改webdriver get属性
# script = '''
# Object.defineProperty(navigator, 'webdriver', {
# get: () => undefined
# })
# '''
# driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": script})
return driver
def __init__(self, username, password):
# 登录example网站
self.url = 'https://passport.example.cn/signin/login?entry=example&r=https://m.example.cn/'
self.browser = self.get_chromedriver(use_proxy=True, user_agent=self.USER_AGENT)
self.wait = WebDriverWait(self.browser, 20)
self.username = username
self.password = password
def open(self):
"""
打开网页输入用户名密码并点击
:return: None
"""
self.browser.delete_all_cookies()
self.browser.get(self.url)
username = self.wait.until(EC.presence_of_element_located((By.ID, 'loginName')))
password = self.wait.until(EC.presence_of_element_located((By.ID, 'loginPassword')))
submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'loginAction')))
username.send_keys(self.username)
password.send_keys(self.password)
time.sleep(1)
submit.click()
def password_error(self):
"""
判断是否密码错误
:return:
"""
try:
return WebDriverWait(self.browser, 5).until(
EC.text_to_be_present_in_element((By.ID, 'errorMsg'), '用户名或密码错误'))
except TimeoutException:
return False
def get_cookies(self):
"""
获取Cookies
:return:
"""
return self.browser.get_cookies()
def main(self):
"""
入口
:return:
"""
self.open()
if self.password_error():
return {
'status': 2,
'content': '用户名或密码错误'
}
cookies = self.get_cookies()
return {
'status': 1,
'content': cookies
}
if __name__ == '__main__':
result = GenCookies(
username='180000000',
password='16yun',
).main()
print(result)
import org.json.JSONException;
import org.json.JSONObject;
import org.openqa.selenium.Platform;
import org.openqa.selenium.Proxy;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.remote.DesiredCapabilities;
import com.gargoylesoftware.htmlunit.DefaultCredentialsProvider;
import com.gargoylesoftware.htmlunit.WebClient;
public class HtmlUnitDriverProxyDemo
{
// 代理验证信息
final static String proxyUser = "username";
final static String proxyPass = "password";
// 代理服务器
final static String proxyServer = "tunnel.hahado.cn:3111";
public static void main(String[] args) throws JSONException
{
HtmlUnitDriver driver = getHtmlUnitDriver();
driver.get("https://httpbin.org/ip");
String title = driver.getTitle();
System.out.println(title);
}
public static HtmlUnitDriver getHtmlUnitDriver()
{
HtmlUnitDriver driver = null;
Proxy proxy = new Proxy();
proxy.setHttpProxy(proxyServer);
DesiredCapabilities capabilities = DesiredCapabilities.htmlUnit();
capabilities.setCapability(CapabilityType.PROXY, proxy);
capabilities.setJavascriptEnabled(true);
capabilities.setPlatform(Platform.WIN8_1);
driver = new HtmlUnitDriver(capabilities) {
@Override
protected WebClient modifyWebClient(WebClient client) {
DefaultCredentialsProvider creds = new DefaultCredentialsProvider();
creds.addCredentials(proxyUser, proxyPass);
client.setCredentialsProvider(creds);
return client;
}
};
driver.setJavascriptEnabled(true);
return driver;
}
}
import org.json.JSONException;
import org.json.JSONObject;
import org.openqa.selenium.Platform;
import org.openqa.selenium.Proxy;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.firefox.FirefoxProfile;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.remote.DesiredCapabilities;
import com.gargoylesoftware.htmlunit.DefaultCredentialsProvider;
import com.gargoylesoftware.htmlunit.WebClient;
public class FirefoxDriverProxyDemo
{
// 代理隧道验证信息
final static String proxyUser = "username";
final static String proxyPass = "password";
// 代理服务器
final static String proxyHost = "tunnel.hahado.cn";
final static int proxyPort = 31111;
final static String firefoxBin = "C:/Program Files/Mozilla Firefox/firefox.exe";
public static void main(String[] args) throws JSONException
{
System.setProperty("webdriver.firefox.bin", firefoxBin);
FirefoxProfile profile = new FirefoxProfile();
profile.setPreference("network.proxy.type", 1);
profile.setPreference("network.proxy.http", proxyHost);
profile.setPreference("network.proxy.http_port", proxyPort);
profile.setPreference("network.proxy.ssl", proxyHost);
profile.setPreference("network.proxy.ssl_port", proxyPort);
profile.setPreference("username", proxyUser);
profile.setPreference("password", proxyPass);
profile.setPreference("network.proxy.share_proxy_settings", true);
profile.setPreference("network.proxy.no_proxies_on", "localhost");
FirefoxDriver driver = new FirefoxDriver(profile);
}
}
$pluginForProxyLogin = '/tmp/a'.uniqid().'.zip';
$zip = new ZipArchive();
$res = $zip->open($pluginForProxyLogin, ZipArchive::CREATE | ZipArchive::OVERWRITE);
$zip->addFile('/path/to/Chrome-proxy-helper/manifest.json', 'manifest.json');
$background = file_get_contents('/path/to/Chrome-proxy-helper/background.js');
$background = str_replace(['%proxy_host', '%proxy_port', '%username', '%password'], ['tunnel.hahado.cn', '31111', 'username', 'password'], $background);
$zip->addFromString('background.js', $background);
$zip->close();
putenv("webdriver.chrome.driver=/path/to/chromedriver");
$options = new ChromeOptions();
$options->addExtensions([$pluginForProxyLogin]);
$caps = DesiredCapabilities::chrome();
$caps->setCapability(ChromeOptions::CAPABILITY, $options);
$driver = ChromeDriver::start($caps);
$driver->get('https://httpbin.org/ip');
header('Content-Type: image/png');
echo $driver->takeScreenshot();
unlink($pluginForProxyLogin);
最后修改时间: 2 个月前