上一篇文章介绍了 java直接操作chrome谷歌浏览器 实现网页爬虫 ,我们学会了 java通过 rest api的方式调用谷歌chrome浏览器headless无头模式,下面案例,直接将网页渲染转换导出为pdf.
无论何时都需要参考官方协议文档:https://chromedevtools.github.io/devtools-protocol/
首先在电脑上按
1.pom
<dependency>
<groupId>org.java-websocket</groupId>
<artifactId>Java-WebSocket</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
</dependency>
2.java代码
public static void htmlToPdf() {
try {
URI socketUri=null;
String cmdID=Long.toHexString(ThreadLocalRandom.current().nextLong());
//命令启动浏览器
List<String> arguments=new ArrayList<>();
String os = System.getProperty("os.name");
if(os.toLowerCase().startsWith("windows")){
arguments.add("C:\\Users\\Administrator\\AppData\\Local\\Google\\Chrome\\Application\\chrome.exe");
}else if(os.toLowerCase().startsWith("linux")){
arguments.add("google-chrome");
}
arguments.add("--disable-features=TranslateUI");
arguments.add("--disable-extensions");
arguments.add("--disable-background-networking");
arguments.add("--safebrowsing-disable-auto-update");
arguments.add("--disable-sync");
arguments.add("--metrics-recording-only");
arguments.add("--disable-default-apps");
arguments.add("--mute-audio");
arguments.add("--no-first-run");
arguments.add("--no-default-browser-check");
arguments.add("--disable-plugin-power-saver");
arguments.add("--disable-popup-blocking");
arguments.add("--remote-debugging-port=0");
arguments.add("--user-data-dir=d:\\chromeHome\\"+cmdID);
arguments.add("--headless");
//arguments.add("--cdp4jId="+cmdID);
//arguments.add("--disable-gpu");
// arguments.add("--enable-automation");
//Process process=Runtime.getRuntime().exec(String.join(" ",arguments));
ProcessBuilder builder = new ProcessBuilder(arguments);
builder.environment().put("CDP4J_ID",cmdID);
Process process=builder.start();
Scanner scanner = new Scanner(process.getErrorStream());
while (scanner.hasNext()){
String line = scanner.nextLine().trim();
if (!line.isEmpty() && line.toLowerCase().startsWith("devtools listening on")) {
int start = line.indexOf("ws://");
socketUri = new URI(line.substring(start, line.length()));
break;
}
}
scanner.close();
Thread t=Thread.currentThread();
//socketUri=new URI("ws://127.0.0.1:62409/devtools/browser/0aea4764-4d9a-42c6-aa0a-770706a262ab");
//websocket 通信
WebSocketClient webSocketClient=new WebSocketClient(socketUri) {
@Override
public void onOpen(ServerHandshake serverHandshake) {
System.out.println("已经连接");
LockSupport.unpark(t);
}
@Override
public void onMessage(String s) {
System.out.println("接受到消息:"+s);
JSONObject message=JSON.parseObject(s);
if(!StringUtils.isEmpty(message.get("id"))){
if(message.getIntValue("id")==10){
browserContextId=message.getJSONObject("result").getString("browserContextId");
}else if(message.getIntValue("id")==20){
targetId=message.getJSONObject("result").getString("targetId");
}else if(message.getIntValue("id")==30){
sessionId=message.getJSONObject("result").getString("sessionId");
}else if(message.getIntValue("id")==35){
isPageReady=message.getJSONObject("result").getJSONObject("result").getBooleanValue("value");
}else if(message.getIntValue("id")==40){
ioHandle=message.getJSONObject("result").getString("stream");
}else if(message.getIntValue("id")==50){
ioData=message.getJSONObject("result").getString("data");
ioEof=message.getJSONObject("result").getBooleanValue("eof");
}
LockSupport.unpark(t);
}
}
@Override
public void onClose(int i, String s, boolean b) {
LockSupport.unpark(t);
}
@Override
public void onError(Exception e) {
e.printStackTrace();
}
};
webSocketClient.connect();
LockSupport.park();
//获取target信息
webSocketClient.send("{\"id\":1000,\"method\":\"Target.setDiscoverTargets\",\"params\":{\"discover\":true}}");
LockSupport.park();
webSocketClient.send("{\"id\":1000,\"method\":\"Target.getTargetInfo\"}");
LockSupport.park();
//创建context
webSocketClient.send("{\"id\":10,\"method\":\"Target.createBrowserContext\"}");
LockSupport.park();
//创建target
webSocketClient.send("{\"id\":20,\"method\":\"Target.createTarget\",\"params\":{\"url\":\"about:blank\",\"width\":1366,\"height\":768,\"browserContextId\":\""+browserContextId+"\",\"enableBeginFrameControl\":false}}");
LockSupport.park();
//建立session
webSocketClient.send("{\"id\":30,\"method\":\"Target.attachToTarget\",\"params\":{\"targetId\":\""+targetId+"\",\"flatten\":true}}");
LockSupport.park();
//将页面设置为可用
webSocketClient.send("{\"id\":1000,\"sessionId\":\""+sessionId+"\",\"method\":\"Runtime.enable\"}");
LockSupport.park();
webSocketClient.send("{\"id\":1000,\"sessionId\":\""+sessionId+"\",\"method\":\"Page.enable\"}");
LockSupport.park();
webSocketClient.send("{\"id\":1000,\"sessionId\":\""+sessionId+"\",\"method\":\"Page.setLifecycleEventsEnabled\",\"params\":{\"enabled\":true}}");
LockSupport.park();
//跳转访问页面
webSocketClient.send("{\"id\":1000,\"sessionId\":\""+sessionId+"\",\"method\":\"Page.navigate\",\"params\":{\"url\":\"http://bcxw.net\"}}");
LockSupport.park();
//等待页面加载完成
do{
Thread.sleep(1000);
webSocketClient.send("{\"id\":35,\"sessionId\":\""+sessionId+"\",\"method\":\"Runtime.evaluate\",\"params\":{\"expression\":\"document.readyState == 'complete'\",\"contextId\":2}}");
LockSupport.park();
}while(!isPageReady);
//将页面导出为pdf
webSocketClient.send("{\"id\":40,\"sessionId\":\""+sessionId+"\",\"method\":\"Page.printToPDF\",\"params\":{\"transferMode\":\"ReturnAsStream\"}}");
LockSupport.park();
//读取文件流
FileOutputStream out=new FileOutputStream("d:/test.pdf");
while (!ioEof){
webSocketClient.send("{\"id\":50,\"sessionId\":\""+sessionId+"\",\"method\":\"IO.read\",\"params\":{\"handle\":\""+ioHandle+"\"}}");
LockSupport.park();
byte[] content = Base64.getDecoder().decode(ioData);
out.write(content);
}
out.close();
//关闭文件流
webSocketClient.send("{\"id\":1000,\"sessionId\":\""+sessionId+"\",\"method\":\"IO.close\",\"params\":{\"handle\":\""+ioHandle+"\"}}");
LockSupport.park();
//关闭浏览器
webSocketClient.send("{\"id\":1000,\"method\":\"Browser.close\"}");
LockSupport.park();
//关闭socket
webSocketClient.close();
LockSupport.park();
} catch (Exception e) {
e.printStackTrace();
}
}