前面研究了java操作chrome浏览器的基础,完全基于 (chromedevtools )https://chromedevtools.github.io/devtools-protocol/,并输出了案例 java直接操作chrome谷歌浏览器 实现网页爬虫 和 java直接调用google chrome headless浏览器 实现网页转换导出为pdf 。现在把完整的基于chromedevtools java 操作 google浏览器chrome的工具类贴出来
java chromeUtil代码
public class ChromeUtil {
public static MyWebSocket launch(String tempath) throws URISyntaxException, IOException {
MyWebSocket webSocket=null;
String cmdID=UUID.randomUUID().toString();
//命令启动浏览器
List<String> arguments=new ArrayList<>();
String os = System.getProperty("os.name");
if(os.toLowerCase().startsWith("windows")){
arguments.add("chrome");
}else if(os.toLowerCase().startsWith("linux")){
arguments.add("google-chrome");
}
arguments.add("--disable-features=TranslateUI");
arguments.add("--disable-extensions");
arguments.add("--disable-background-networking");
arguments.add("--safebrowsing-disable-auto-update");
arguments.add("--disable-sync");
arguments.add("--metrics-recording-only");
arguments.add("--disable-default-apps");
arguments.add("--mute-audio");
arguments.add("--no-first-run");
arguments.add("--no-default-browser-check");
arguments.add("--disable-plugin-power-saver");
arguments.add("--disable-popup-blocking");
arguments.add("--remote-debugging-port=0");
arguments.add("--disable-gpu");
arguments.add("--enable-automation");
arguments.add("--no-sandbox");
arguments.add("--disable-dev-shm-usage");
arguments.add("--disable-setuid-sandbox");
arguments.add("--disable-web-security");
arguments.add("--no-zygote");
arguments.add("--headless");
arguments.add("--myId="+cmdID);
arguments.add("--user-data-dir="+tempath+File.separator+cmdID);
arguments.add("&");
ProcessBuilder builder = new ProcessBuilder(arguments);
builder.environment().put("MY_ID",cmdID);
Process process= builder.start();
long startTime=new Date().getTime();
String url=null;
Scanner scanner = new Scanner(process.getErrorStream());
while (scanner.hasNext()){
String line = scanner.nextLine().trim();
if (!line.isEmpty() && line.toLowerCase().startsWith("devtools listening on")) {
int start = line.indexOf("ws://");
url=line.substring(start, line.length());
break;
}
if(new Date().getTime()-startTime>=600000){
throw new RuntimeException("启动浏览器超时");
}
}
scanner.close();
if(StringUtils.isEmpty(url)){
throw new RuntimeException("启动浏览器失败");
}
log.warn("启动浏览器成功:"+url);
URI socketUri = new URI(url);
webSocket= new MyWebSocket(socketUri);
return webSocket;
}
public static void discoverTargets(MyWebSocket myWebSocket){
JSONObject commond=new JSONObject();
commond.put("id",100);
commond.put("method","Target.setDiscoverTargets");
JSONObject params=new JSONObject();
params.put("discover",true);
commond.put("params",params);
myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
}
public static String createContext(MyWebSocket myWebSocket){
JSONObject commond=new JSONObject();
commond.put("id",200);
commond.put("method","Target.createBrowserContext");
String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
JSONObject msgObj=JSON.parseObject(result);
String browserContextId=msgObj.getJSONObject("result").getString("browserContextId");
return browserContextId;
}
public static String createTarget(MyWebSocket myWebSocket,String browserContextId){
JSONObject commond=new JSONObject();
commond.put("id",300);
commond.put("method","Target.createTarget");
JSONObject params=new JSONObject();
params.put("url","about:blank");
params.put("width",1366);
params.put("height",768);
params.put("browserContextId",browserContextId);
params.put("enableBeginFrameControl",false);
commond.put("params",params);
String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
JSONObject msgObj=JSON.parseObject(result);
String targetId=msgObj.getJSONObject("result").getString("targetId");
return targetId;
}
public static void closeTarget(MyWebSocket myWebSocket,String targetId){
JSONObject commond=new JSONObject();
commond.put("id",310);
commond.put("method","Target.closeTarget");
JSONObject params=new JSONObject();
params.put("targetId",targetId);
commond.put("params",params);
myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
}
public static String createSession(MyWebSocket myWebSocket,String targetId){
JSONObject commond=new JSONObject();
commond.put("id",400);
commond.put("method","Target.attachToTarget");
JSONObject params=new JSONObject();
params.put("targetId",targetId);
params.put("flatten",true);
commond.put("params",params);
String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
JSONObject msgObj=JSON.parseObject(result);
String sessionId=msgObj.getJSONObject("result").getString("sessionId");
return sessionId;
}
public static void enablePage(MyWebSocket myWebSocket,String sessionId){
myWebSocket.sendAndGet("{\"id\":500,\"sessionId\":\""+sessionId+"\",\"method\":\"Runtime.enable\"}",500);
myWebSocket.sendAndGet("{\"id\":510,\"sessionId\":\""+sessionId+"\",\"method\":\"Page.enable\"}",510);
myWebSocket.sendAndGet("{\"id\":520,\"sessionId\":\""+sessionId+"\",\"method\":\"Page.setLifecycleEventsEnabled\",\"params\":{\"enabled\":true}}",520);
}
public static void navigate(MyWebSocket myWebSocket,String sessionId,String url){
JSONObject commond=new JSONObject();
commond.put("id",600);
commond.put("method","Page.navigate");
commond.put("sessionId",sessionId);
JSONObject params=new JSONObject();
params.put("url",url);
commond.put("params",params);
String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
log.warn("跳转页面"+result);
}
/**
* 等待页面加载完成
* @param myWebSocket
* @param sessionId
* @param startDate
* @throws InterruptedException
*/
public static void waitReady(MyWebSocket myWebSocket,String sessionId,Date startDate) throws InterruptedException {
Thread.sleep(100);
boolean isReady;
JSONObject commond=new JSONObject();
commond.put("id",700);
commond.put("method","Runtime.evaluate");
commond.put("sessionId",sessionId);
JSONObject params=new JSONObject();
params.put("expression","document.readyState == 'complete'");
commond.put("params",params);
String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
JSONObject msgObj=JSON.parseObject(result);
isReady = msgObj.getJSONObject("result").getJSONObject("result").getBooleanValue("value");
System.out.println("###"+result);
if(!isReady){
if(new Date().getTime()-startDate.getTime()>=300000){
throw new InterruptedException("加载网页超时!");
}
waitReady(myWebSocket,sessionId,startDate);
}
}
/**
* 等待页面渲染完成 对应等待paint事件回调
* {"method":"Page.lifecycleEvent","params":{"frameId":"085D45927EF84EF41BA88A4E36DA4EF4","loaderId":"340CEA67B9B7FE8CB0A45F6950EE6BFB","name":"firstMeaningfulPaint","timestamp":29393.87702},"sessionId":"5CE410C83FC9A04F1C250B369D786456"}
* @param myWebSocket
* @param sessionId
* @param startDate
* @throws InterruptedException
*/
public static void waitPagePaint(MyWebSocket myWebSocket,String sessionId,Date startDate) throws InterruptedException {
do{
Thread.sleep(100);
if(new Date().getTime()-startDate.getTime()>=120000){
throw new InterruptedException("加载网页超时!");
}
}while (!myWebSocket.isPagefinished());
}
public static String printToPDF(MyWebSocket myWebSocket,String sessionId,String headerTemplate,String footerTemplate){
JSONObject commond=new JSONObject();
commond.put("id",800);
commond.put("method","Page.printToPDF");
commond.put("sessionId",sessionId);
JSONObject params=new JSONObject();
params.put("printBackground",true);
params.put("transferMode","ReturnAsStream");
params.put("displayHeaderFooter",true);
//params.put("preferCSSPageSize",true);
params.put("headerTemplate",headerTemplate);
params.put("footerTemplate",footerTemplate);
params.put("paperHeight",10.08);
params.put("paperWidth",6.93);
params.put("marginBottom",0.59);
params.put("marginLeft",0.59);
params.put("marginRight",0.59);
params.put("marginTop",0.59);
commond.put("params",params);
String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
JSONObject msgObj=JSON.parseObject(result);
String ioHandle=msgObj.getJSONObject("result").getString("stream");
return ioHandle;
}
public static void storePdf(MyWebSocket myWebSocket, String sessionId, String ioHandle, OutputStream out) throws IOException {
JSONObject commond=new JSONObject();
commond.put("id",900);
commond.put("method","IO.read");
commond.put("sessionId",sessionId);
JSONObject params=new JSONObject();
params.put("handle",ioHandle);
commond.put("params",params);
String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
JSONObject msgObj=JSON.parseObject(result);
String ioData=msgObj.getJSONObject("result").getString("data");
byte[] content = Base64.getDecoder().decode(ioData);
out.write(content);
boolean ioEof=msgObj.getJSONObject("result").getBooleanValue("eof");
if(!ioEof){
storePdf(myWebSocket,sessionId,ioHandle,out);
}else{
closeIO( myWebSocket, sessionId, ioHandle);
}
}
public static void closeIO(MyWebSocket myWebSocket, String sessionId,String ioHandle){
JSONObject commond=new JSONObject();
commond.put("id",1000);
commond.put("method","IO.close");
commond.put("sessionId",sessionId);
JSONObject params=new JSONObject();
params.put("handle",ioHandle);
commond.put("params",params);
myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
}
public static String getDocument(MyWebSocket myWebSocket,String sessionId,int depth) {
JSONObject commond=new JSONObject();
commond.put("id",1100);
commond.put("method","DOM.getDocument");
commond.put("sessionId",sessionId);
JSONObject params=new JSONObject();
params.put("depth",depth);
commond.put("params",params);
return myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
}
/**
* 检测网页内容 网页上错误信息 直接返回,-10000的返回错误都需要重试
* @param myWebSocket
* @param sessionId
* @param depth
* @return
*/
public static Result getPageResult(MyWebSocket myWebSocket,String sessionId,int depth){
Result result=Result.sucess(0,"");
String documentStr=getDocument(myWebSocket,sessionId,depth);
JSONObject document=JSON.parseObject(documentStr);
JSONArray htmlArr=document.getJSONObject("result").getJSONObject("root").getJSONArray("children");
JSONObject htmlObj=null;
for(int i=0;i<htmlArr.size();i++){
JSONObject tempObj=htmlArr.getJSONObject(i);
if(tempObj.getString("nodeName").toLowerCase().equals("html")&&tempObj.getString("localName").toLowerCase().equals("html")){
htmlObj=tempObj;
break;
}
}
JSONArray htmlChildrens=htmlObj.getJSONArray("children");
JSONObject bodyObj=null;
for(int j=0;j<htmlChildrens.size();j++){
JSONObject tempObj=htmlChildrens.getJSONObject(j);
if(tempObj.getString("nodeName").toLowerCase().equals("body")&&tempObj.getString("localName").toLowerCase().equals("body")){
bodyObj=tempObj;
break;
}
}
JSONArray bodyChildrens=bodyObj.getJSONArray("children");
if(bodyChildrens==null||bodyChildrens.size()<=0){
result=Result.failure(-10000,"错题集内容为空");
}else {
JSONObject firstElement = bodyChildrens.getJSONObject(0);
if (firstElement.containsKey("children") && firstElement.getJSONArray("children").size() > 0) {
JSONObject errResultObj = firstElement.getJSONArray("children").getJSONObject(0);
if (errResultObj != null && errResultObj.containsKey("nodeValue")) {
String err = errResultObj.getString("nodeValue");
if (err.contains("statusCode") && err.contains("message")) {
JSONObject errObj = JSON.parseObject(err);
result = Result.failure(errObj.getIntValue("statusCode"), errObj.getString("message"));
}
}
}
}
return result;
}
public static Result html2pdf(MyWebSocket myWebSocket, String sessionId, String url, String path,String headerTemplate,String footerTemplate){
Result result=Result.failure(-100,"初始状态");
try {
myWebSocket.setPagefinished(false);
log.warn("加载页面:"+url);
navigate(myWebSocket,sessionId,url);
//waitReady(myWebSocket,sessionId,new Date());
waitPagePaint(myWebSocket,sessionId,new Date());
Thread.sleep(5000);
log.warn("完成加载:"+url);
Result checkResult=getPageResult(myWebSocket,sessionId,3);
log.warn("获取内容:"+url);
if(!checkResult.isSuccess()){
return checkResult;
}
String ioHandle=printToPDF(myWebSocket,sessionId,headerTemplate,footerTemplate);
log.warn("获取数据:"+path);
OutputStream out=new FileOutputStream(path);
storePdf(myWebSocket,sessionId,ioHandle,out);
out.close();
log.warn("完成打印:"+path);
File resultFile=new File(path);
boolean isSuccess=resultFile.exists()&&resultFile.length()>0;
if(isSuccess){
result=Result.sucess(1,"成功");
}else{
result=Result.failure(-10000,"文件保存错误");
}
} catch (InterruptedException e) {
log.error(e.getMessage(),e);
result=Result.failure(-10000,"加载网页超时");
} catch (FileNotFoundException e) {
log.error(e.getMessage(),e);
result=Result.failure(-10000,"文件不存在");
} catch (IOException e) {
log.error(e.getMessage(),e);
result=Result.failure(-10000,"文件读写错误");
} catch (Exception e) {
log.error(e.getMessage(),e);
result=Result.failure(-10000,"未知异常");
}
return result;
}
public static void killChrome(){
try {
String commond="";
String os = System.getProperty("os.name");
Process process=null;
if(os.toLowerCase().startsWith("windows")){
commond="taskkill /f /t /im chrome.exe";
process=Runtime.getRuntime().exec(commond);
}else if(os.toLowerCase().startsWith("linux")){
commond="kill -9 $(ps aux|grep google-chrome|grep -v 'grep' |awk '{print $2}')";
String[] cmd = new String[]{"sh","-c",commond};
process=Runtime.getRuntime().exec(cmd);
}
process.waitFor(10, TimeUnit.SECONDS);
InputStream inputStream=process.getInputStream();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
IOUtils.copy(process.getInputStream(),baos);
log.warn("关闭所有浏览器:"+baos.toString());
baos.close();
inputStream.close();
} catch (IOException e) {
log.error(e.getMessage(),e);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
java websocket代码(因为和chrome浏览器通信是基于websocket)
public class MyWebSocket extends WebSocketClient {
private int code;
private String message;
private boolean isPagefinished;
public MyWebSocket(URI serverUri) {
super(serverUri);
this.waitConnect();
}
public void waitConnect(){
try {
this.connectBlocking();
} catch (InterruptedException e) {
log.error(e.getMessage(),e);
}
}
@Override
public void onOpen(ServerHandshake serverHandshake) {
log.warn("###建立连接");
}
@Override
public void onMessage(String s) {
//log.warn("接受消息:"+s);
synchronized(this) {
if (!StringUtils.isEmpty(s)) {
JSONObject obj = JSON.parseObject(s);
if (obj.containsKey("id") && obj.getIntValue("id") == code) {
//处理交互消息
this.message = s;
this.notify();
if(obj.getIntValue("id")!=900)log.warn("接受内容消息:"+s);
}else if(obj.containsKey("method") && "Page.lifecycleEvent".equals(obj.getString("method"))){
//处理事件消息
JSONObject eventObj=obj.getJSONObject("params");
//渲染完成事件
if(eventObj.containsKey("name")&&("firstMeaningfulPaint".equals(eventObj.getString("name"))||"networkIdle".equals(eventObj.getString("name")))){
this.setPagefinished(true);
}
log.warn("接受事件消息:"+s);
}
}
}
}
@Override
public void onClose(int i, String s, boolean b) {
log.warn("###关闭连接:"+this.getURI()+"-"+i+"-"+s+"-"+b);
}
@Override
public void onError(Exception e) {
log.error("###通信错误"+e.getMessage(),e);
}
public String sendAndGet(String text,int code) {
synchronized(this) {
message = null;
this.code = code;
try {
this.send(text);
this.wait(120000);
} catch (InterruptedException e) {
log.error(e.getMessage(), e);
}
}
return message;
}
public int getCode() {
return code;
}
public void setCode(int code) {
this.code = code;
}
public String getMessage() {
return message;
}
public void setMessage(String message) {
this.message = message;
}
public boolean isPagefinished() {
return isPagefinished;
}
public void setPagefinished(boolean pagefinished) {
isPagefinished = pagefinished;
}
}
附:chromedevtools协议文档官方的两个链接
https://chromedevtools.github.io/devtools-protocol/
https://vanilla.aslushnikov.com/