java直接调用google chrome headless浏览器 实现网页转换导出为pdf

2020-04-16 11:50:21 | 编辑

上一篇文章介绍了 java直接操作chrome谷歌浏览器 实现网页爬虫 ,我们学会了 java通过 rest api的方式调用谷歌chrome浏览器headless无头模式,下面案例,直接将网页渲染转换导出为pdf.

无论何时都需要参考官方协议文档:https://chromedevtools.github.io/devtools-protocol/

首先在电脑上按

1.pom

    <dependency>
      <groupId>org.java-websocket</groupId>
      <artifactId>Java-WebSocket</artifactId>
      <version>1.4.0</version>
    </dependency>

    <dependency>
      <groupId>com.alibaba</groupId>
      <artifactId>fastjson</artifactId>
    </dependency>

2.java代码

public static void htmlToPdf() {
        try {
            URI socketUri=null;
            String cmdID=Long.toHexString(ThreadLocalRandom.current().nextLong());

            //命令启动浏览器
            List<String> arguments=new ArrayList<>();
            String os = System.getProperty("os.name");
            if(os.toLowerCase().startsWith("windows")){
                arguments.add("C:\\Users\\Administrator\\AppData\\Local\\Google\\Chrome\\Application\\chrome.exe");
            }else if(os.toLowerCase().startsWith("linux")){
                arguments.add("google-chrome");
            }
            arguments.add("--disable-features=TranslateUI");
            arguments.add("--disable-extensions");
            arguments.add("--disable-background-networking");
            arguments.add("--safebrowsing-disable-auto-update");
            arguments.add("--disable-sync");
            arguments.add("--metrics-recording-only");
            arguments.add("--disable-default-apps");
            arguments.add("--mute-audio");
            arguments.add("--no-first-run");
            arguments.add("--no-default-browser-check");
            arguments.add("--disable-plugin-power-saver");
            arguments.add("--disable-popup-blocking");
            arguments.add("--remote-debugging-port=0");
            arguments.add("--user-data-dir=d:\\chromeHome\\"+cmdID);
            arguments.add("--headless");
            //arguments.add("--cdp4jId="+cmdID);
            //arguments.add("--disable-gpu");
            // arguments.add("--enable-automation");




            //Process process=Runtime.getRuntime().exec(String.join(" ",arguments));
            ProcessBuilder builder = new ProcessBuilder(arguments);
            builder.environment().put("CDP4J_ID",cmdID);

            Process process=builder.start();


            Scanner scanner = new Scanner(process.getErrorStream());
            while (scanner.hasNext()){
                String line = scanner.nextLine().trim();
                if (!line.isEmpty() && line.toLowerCase().startsWith("devtools listening on")) {
                    int start = line.indexOf("ws://");
                    socketUri = new URI(line.substring(start, line.length()));
                    break;
                }
            }
            scanner.close();


            Thread t=Thread.currentThread();

            //socketUri=new URI("ws://127.0.0.1:62409/devtools/browser/0aea4764-4d9a-42c6-aa0a-770706a262ab");

            //websocket 通信
            WebSocketClient webSocketClient=new WebSocketClient(socketUri) {
                @Override
                public void onOpen(ServerHandshake serverHandshake) {
                    System.out.println("已经连接");
                    LockSupport.unpark(t);
                }

                @Override
                public void onMessage(String s) {
                    System.out.println("接受到消息:"+s);
                    JSONObject message=JSON.parseObject(s);
                    if(!StringUtils.isEmpty(message.get("id"))){
                        if(message.getIntValue("id")==10){
                            browserContextId=message.getJSONObject("result").getString("browserContextId");
                        }else if(message.getIntValue("id")==20){
                            targetId=message.getJSONObject("result").getString("targetId");
                        }else if(message.getIntValue("id")==30){
                            sessionId=message.getJSONObject("result").getString("sessionId");
                        }else if(message.getIntValue("id")==35){
                            isPageReady=message.getJSONObject("result").getJSONObject("result").getBooleanValue("value");
                        }else if(message.getIntValue("id")==40){
                            ioHandle=message.getJSONObject("result").getString("stream");
                        }else if(message.getIntValue("id")==50){
                            ioData=message.getJSONObject("result").getString("data");
                            ioEof=message.getJSONObject("result").getBooleanValue("eof");
                        }
                        LockSupport.unpark(t);
                    }


                }

                @Override
                public void onClose(int i, String s, boolean b) {
                    LockSupport.unpark(t);
                }

                @Override
                public void onError(Exception e) {
                    e.printStackTrace();
                }
            };
            webSocketClient.connect();
            LockSupport.park();

            //获取target信息
            webSocketClient.send("{\"id\":1000,\"method\":\"Target.setDiscoverTargets\",\"params\":{\"discover\":true}}");
            LockSupport.park();
            webSocketClient.send("{\"id\":1000,\"method\":\"Target.getTargetInfo\"}");
            LockSupport.park();

            //创建context
            webSocketClient.send("{\"id\":10,\"method\":\"Target.createBrowserContext\"}");
            LockSupport.park();

            //创建target
            webSocketClient.send("{\"id\":20,\"method\":\"Target.createTarget\",\"params\":{\"url\":\"about:blank\",\"width\":1366,\"height\":768,\"browserContextId\":\""+browserContextId+"\",\"enableBeginFrameControl\":false}}");
            LockSupport.park();

            //建立session
            webSocketClient.send("{\"id\":30,\"method\":\"Target.attachToTarget\",\"params\":{\"targetId\":\""+targetId+"\",\"flatten\":true}}");
            LockSupport.park();

            //将页面设置为可用
            webSocketClient.send("{\"id\":1000,\"sessionId\":\""+sessionId+"\",\"method\":\"Runtime.enable\"}");
            LockSupport.park();
            webSocketClient.send("{\"id\":1000,\"sessionId\":\""+sessionId+"\",\"method\":\"Page.enable\"}");
            LockSupport.park();
            webSocketClient.send("{\"id\":1000,\"sessionId\":\""+sessionId+"\",\"method\":\"Page.setLifecycleEventsEnabled\",\"params\":{\"enabled\":true}}");
            LockSupport.park();

            //跳转访问页面
            webSocketClient.send("{\"id\":1000,\"sessionId\":\""+sessionId+"\",\"method\":\"Page.navigate\",\"params\":{\"url\":\"http://bcxw.net\"}}");
            LockSupport.park();

            //等待页面加载完成
            do{
                Thread.sleep(1000);
                webSocketClient.send("{\"id\":35,\"sessionId\":\""+sessionId+"\",\"method\":\"Runtime.evaluate\",\"params\":{\"expression\":\"document.readyState == 'complete'\",\"contextId\":2}}");
                LockSupport.park();
            }while(!isPageReady);

            //将页面导出为pdf
            webSocketClient.send("{\"id\":40,\"sessionId\":\""+sessionId+"\",\"method\":\"Page.printToPDF\",\"params\":{\"transferMode\":\"ReturnAsStream\"}}");
            LockSupport.park();

            //读取文件流
            FileOutputStream out=new FileOutputStream("d:/test.pdf");
            while (!ioEof){
                webSocketClient.send("{\"id\":50,\"sessionId\":\""+sessionId+"\",\"method\":\"IO.read\",\"params\":{\"handle\":\""+ioHandle+"\"}}");
                LockSupport.park();
                byte[] content = Base64.getDecoder().decode(ioData);
                out.write(content);
            }
            out.close();

            //关闭文件流
            webSocketClient.send("{\"id\":1000,\"sessionId\":\""+sessionId+"\",\"method\":\"IO.close\",\"params\":{\"handle\":\""+ioHandle+"\"}}");
            LockSupport.park();

            //关闭浏览器
            webSocketClient.send("{\"id\":1000,\"method\":\"Browser.close\"}");
            LockSupport.park();

            //关闭socket
            webSocketClient.close();
            LockSupport.park();

        } catch (Exception e) {
            e.printStackTrace();
        }


    }

 

 

 

登录后即可回复 登录 | 注册
    
关注编程学问公众号