配置环境 ,准备2个东西,一个是谷歌浏览器和一个浏览器驱动。

<!-- selenium --> <dependency> <groupId>org.seleniumhq.selenium</groupId> <artifactId>selenium-java</artifactId> <version>3.141.0</version> </dependency> <!-- selenium -->
public class Demo {
public static void main(String[] args) {
System.setProperty("webdriver.chrome.driver","C:\\Users\\Administrator.USER-20190917GB\\AppData\\Local\\Programs\\Python\\Python36\\chromedriver.exe");
WebDriver driver = new ChromeDriver();
driver.get("http://www.baidu.com");
String title = driver.getTitle();
System.out.print(title);
}
}package com.java456.selenium;
import java.util.HashMap;
import java.util.Map;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.support.ui.ExpectedCondition;
import org.openqa.selenium.support.ui.WebDriverWait;
public class Demo {
public static void main(String[] args) {
System.setProperty("webdriver.chrome.driver",
"C:\\Users\\Administrator.USER-20190917GB\\AppData\\Local\\Programs\\Python\\Python36\\chromedriver.exe");
// 初始化 不加载图片
Map<String, Object> preferences = new HashMap<String, Object>();
ChromeOptions options = new ChromeOptions();
preferences.put("profile.managed_default_content_settings.images", 2);
options.setExperimentalOption("prefs", preferences);
WebDriver driver = new ChromeDriver(options);
driver.get("https://pan.baidu.com/s/1qYmsGSs");
// 初始化 不加载图片
// 设置超时5秒 默认也有的可能长一点。
WebDriverWait wait = new WebDriverWait(driver, 5);
wait.until(new ExpectedCondition<Boolean>() {
public Boolean apply(WebDriver d) {
boolean loadcomplete = d.findElement(By.tagName("body")).isDisplayed();
return loadcomplete;
}
});
// 设置超时5秒 默认也有的可能长一点。
String title = driver.getTitle();
System.out.print(title);
driver.close();
driver.quit();
}
}public static void main(String[] args) {
System.setProperty("webdriver.chrome.driver",
"C:\\Users\\Administrator.USER-20190917GB\\AppData\\Local\\Programs\\Python\\Python36\\chromedriver.exe");
// 初始化 不加载图片
Map<String, Object> preferences = new HashMap<String, Object>();
ChromeOptions options = new ChromeOptions();
preferences.put("profile.managed_default_content_settings.images", 2);
options.setExperimentalOption("prefs", preferences);
WebDriver driver = new ChromeDriver(options);
driver.get("https://pan.baidu.com/s/1wFE8gYizFaebwJLf93lAxQ ");
// 初始化 不加载图片
// 设置超时5秒 默认也有的可能长一点。
WebDriverWait wait = new WebDriverWait(driver, 5);
wait.until(new ExpectedCondition<Boolean>() {
public Boolean apply(WebDriver d) {
boolean loadcomplete = d.findElement(By.tagName("body")).isDisplayed();
return loadcomplete;
}
});
// 设置超时5秒 默认也有的可能长一点。
String title = driver.getTitle();
WebElement input = driver.findElement(By.cssSelector(".QKKaIE.LxgeIt"));
WebElement btn = driver.findElement(By.cssSelector(".g-button-right"));
input.sendKeys("6nlq");
btn.click();
System.out.print(title);
//System.out.print(driver.getPageSource());
//driver.close();
//driver.quit();
}WebElement input2 = driver.findElement(By.cssSelector("#input_search"));
((JavascriptExecutor)driver).executeScript("document.getElementById(\"input_search\").value=34", input2);
中文要使用下面的这段代码
try {
((JavascriptExecutor)driver).executeScript("document.getElementById(\"input_search\").value='在在'" , input2);
}catch (Exception e){
System.out.println(e);
}
执行js代码要try一下。不然报错。String title = driver.getTitle();
WebElement input = driver.findElement(By.cssSelector(".QKKaIE.LxgeIt"));
WebElement btn = driver.findElement(By.cssSelector(".g-button-right"));
input.sendKeys("6nlq");
btn.click();driver.findElement(By.linkText("链接的全部文字内容")); |
driver.findElement(By.partialLinkText("链接的部分文字内容")); |
driver.findElement(By.partialLinkText(">")).click(); |
def start():
driver = webdriver.Chrome()
driver.get('https://kns.cnki.net/kns/brief/default_result.aspx') # 打开url
input = driver.find_element_by_css_selector('input.rekeyword')
input.send_keys(word)
btn = driver.find_element_by_css_selector('input#btnSearch')
btn.click()
time.sleep(4)
iframe = driver.find_element_by_id('iframeResult')
driver.switch_to.frame(iframe) # 切换到内容iframe
go(driver)//这个需要切回到原来的页面 切回list列表页面
driver.switchTo().window(listWindow);
String titleChild = driver.getTitle();
System.out.println("当前url:"+driver.getCurrentUrl());
System.out.println("titleChild:"+titleChild);
//这个需要切回到原来的页面 切回list列表页面//
//这个节点很多。他总是点第1个。不对。 可以使用list 分别点击不同的。
List<WebElement> btnList = driver.findElements(By.cssSelector("div.zwss-item-job-title"));
WebElement btn = btnList.get(index);
btn.click();//点击 这个点击是新打开的窗口
index++;
String url = driver.getCurrentUrl();
Set<String> childids = driver.getWindowHandles();//因为有多个窗口,所以在遍历 一睛。
String listWindow = "";
for(String s:childids){
System.out.println(s);
if(!driver.switchTo().window(s).getCurrentUrl().contains("https://www.szhr.com.cn/zwss")){
//如果不是我列表的url 那么就说明这个页面是 新打开的。获取新页面的url和标题。
driver.switchTo().window(s);
String titleChild = driver.getTitle();
System.out.println("当前url:"+driver.getCurrentUrl());
System.out.println("titleChild:"+titleChild);
driver.close();
break;
}else{
//这个窗口是我的列表窗口记录一下
listWindow = s;
}
}
//这个需要切回到原来的页面 切回list列表页面
driver.switchTo().window(listWindow);
String titleChild = driver.getTitle();
System.out.println("当前url:"+driver.getCurrentUrl());
System.out.println("titleChild:"+titleChild);
//这个需要切回到原来的页面 切回list列表页面WebElement btn = driver.findElement(By.cssSelector(".btn-next"));
if(btn.isEnabled()){
btn.click();
}else{
System.out.println("下一页按钮不可以点击!!!!!!");
}driver.navigate().back();
from selenium import webdriver
import time
from bs4 import BeautifulSoup
import ZhiWang_DB
word = "中医药"
def zhiwang2():
driver = webdriver.Chrome()
driver.get('https://kns.cnki.net/kns/brief/default_result.aspx') # 打开url
input = driver.find_element_by_css_selector('input.rekeyword')
input.send_keys('新冠肺炎')
btn = driver.find_element_by_css_selector('input#btnSearch')
btn.click()
time.sleep(4)
#使用BeautifulSoup解析html里面的内容
def jie_xi_html(driver):
#print(driver.page_source)
mysoup = BeautifulSoup(driver.page_source)
table = mysoup.find('table', attrs={'class': 'GridTableContent'})
print('table_len:%d'%len(table))
tbody = table.find('tbody')
print('tbody_len:%d'%len(tbody))
trList = tbody.find_all('tr')
print('trList_len:%d' %len(trList))
#跳过第一个。
for tr in trList[1:]:
#print(tr)
tdList = tr.find_all('td')
#print('tdList:%d' % len(tdList))
#1是 题目
td1 = tdList[1]
print('题目%s'%td1.getText().strip())
print('题目链接%s'%td1.find('a').get("href"))
title = td1.getText().strip()
title = title.replace('\'',' ')
print(title)
#2 是作者
td2 = tdList[2]
print('作者%s'%td2.getText().strip())
# 3 是来源
td3 = tdList[3]
print('来源%s'%td3.getText().strip())
# 4 是时间
td4 = tdList[4]
print('时间%s'%td4.getText().strip())
# 5 是数据库
td5 = tdList[5]
print('数据库%s'%td5.getText().strip())
ZhiWang_DB.add(title,
td2.getText().strip(),
td4.getText().strip(),
td5.getText().strip(),
td3.getText().strip(),
word)
print('================================')
#tdList = tr.find('td')
#print('tdList:%d'%len(tdList))
#找到iframe 输出 解析html 跳转下一页 继续执行自身
def go(driver):
#找不到下一页 报错
#print(next_page.text)
#解析内容 输出
jie_xi_html(driver)
# 滚动到下面 点击 下一页
next_page = driver.find_element_by_partial_link_text('下一页')
next_page.click()
time.sleep(6)
go(driver)
def start():
driver = webdriver.Chrome()
driver.get('https://kns.cnki.net/kns/brief/default_result.aspx') # 打开url
input = driver.find_element_by_css_selector('input.rekeyword')
input.send_keys(word)
btn = driver.find_element_by_css_selector('input#btnSearch')
btn.click()
time.sleep(4)
iframe = driver.find_element_by_id('iframeResult')
driver.switch_to.frame(iframe) # 切换到内容iframe
go(driver)
def test_zhiwang():
driver = webdriver.Chrome()
driver.get('https://kns.cnki.net/kns/brief/result.aspx?dbprefix=scdb') # 打开url
input = driver.find_element_by_css_selector('#txt_1_value1')
input.send_keys('中医')
time.sleep(2)
btn = driver.find_element_by_css_selector('#btnSearch')
btn.click()#点击事件 报错。
time.sleep(14)
start()import java.util.Set;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.phantomjs.PhantomJSDriver;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.Wait;
import org.openqa.selenium.support.ui.WebDriverWait;
public class YsfTest_20180725{
public static void main(String[] args) throws InterruptedException{
WebElement search = null;
System.setProperty("webdriver.chrome.driver","C:/Program Files (x86)/Google/Chrome/Application/chromedriver.exe");
WebDriver driver = new ChromeDriver();
driver.get("http://www.w3school.com.cn/html/html_forms.asp");
//获得父级窗口的id
String parentId = driver.getWindowHandle();
//获取父级窗口的标题名
String parentTitle = driver.getTitle();
//找到“亲自试一试”按钮
search = driver.findElement(By.xpath("/html/body/div/div[4]/div[6]/p[2]/a"));
//点击“亲自试一试”按钮
search.click();
//获取父级窗口id和子级窗口id,放在chirldids数组里
Set<String> childids = driver.getWindowHandles();
// System.out.println("list:"+chirldids);
for(String s:childids){
if(driver.switchTo().window(s).getTitle().contains("W3School在线测试工具 V2")){
driver.switchTo().window(s);
String titleChild = driver.getTitle();
System.out.println("name:"+titleChild);
Thread.sleep(5000);
driver.close();
break;
}
}
driver.switchTo().window(parentId);
String titleParent = driver.getTitle();
System.out.println("name:"+titleParent);
driver.close();
}
}站长微信:xiaomao0055
站长QQ:14496453