java cdp4j操作浏览器渲染html导出pdf

java | 2020-03-15 11:51:53

关于cdp4j请参考Java操作Chrome浏览器的API库-cdp4j。因为html包含js影响页面展示,所以必须要使用html渲染,才能正常导出为pdf

1.下载cdp4j jar

新的cdp4j在maven中没有,所以我是先下载然后安装到本地仓库,再maven引用。

jar下载地址

https://github.com/webfolderio/cdp4j/releases/download/4.2.2/cdp4j-4.2.2.jar
https://github.com/webfolderio/cdp4j/releases/download/4.2.2/cdp4j-4.2.2-sources.jar

然后安装jar包到manve

参考:maven mvn命令把本地jar包上传到私服

另外要注意的是下载的jar依赖的包很多是没有打进来的,所以把项目的pom打开,然后把maven信息复制进来

2.pom配置

我是把源码中的有需要的pom的maven依赖都复制到自己项目了,能正常运行


    <dependency>
      <groupId>com.alibaba</groupId>
      <artifactId>fastjson</artifactId>
    </dependency>

    <dependency>
      <groupId>io.webfolder</groupId>
      <artifactId>cdp</artifactId>
      <version>4.2.2</version>
    </dependency>

    <dependency>
      <groupId>io.webfolder</groupId>
      <artifactId>cdp-sources</artifactId>
      <version>4.2.2</version>
    </dependency>

    <dependency>
      <groupId>com.vimeo.stag</groupId>
      <artifactId>stag-library</artifactId>
      <version>2.6.0</version>
      <scope>provided</scope>
      <optional>true</optional>
    </dependency>


    <dependency>
      <groupId>com.google.code.gson</groupId>
      <artifactId>gson</artifactId>
      <version>2.8.6</version>
    </dependency>

    <dependency>
      <groupId>io.webfolder</groupId>
      <artifactId>wf-exec</artifactId>
      <version>1.0.3</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>com.neovisionaries</groupId>
      <artifactId>nv-websocket-client</artifactId>
      <version>2.9</version>
    </dependency>

    <dependency>
      <groupId>org.asynchttpclient</groupId>
      <artifactId>async-http-client</artifactId>
      <version>2.10.1</version>
      <scope>provided</scope>
      <exclusions>
        <exclusion>
          <groupId>org.slf4j</groupId>
          <artifactId>slf4j-api</artifactId>
        </exclusion>
      </exclusions>
    </dependency>


    <dependency>
      <groupId>org.jvnet.winp</groupId>
      <artifactId>winp</artifactId>
      <version>1.28</version>
    </dependency>


    <dependency>
      <groupId>org.java-websocket</groupId>
      <artifactId>Java-WebSocket</artifactId>
      <version>1.4.0</version>
      <scope>provided</scope>
      <exclusions>
        <exclusion>
          <groupId>org.slf4j</groupId>
          <artifactId>slf4j-api</artifactId>
        </exclusion>
      </exclusions>
    </dependency>


    <dependency>
      <groupId>org.apache.tomcat</groupId>
      <artifactId>tomcat-websocket</artifactId>
      <version>9.0.24</version>
      <scope>provided</scope>
    </dependency>

    <dependency>
      <groupId>org.eclipse.jetty.websocket</groupId>
      <artifactId>websocket-client</artifactId>
      <version>9.4.20.v20190813</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>io.undertow</groupId>
      <artifactId>undertow-core</artifactId>
      <version>2.0.26.Final</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.glassfish.tyrus</groupId>
      <artifactId>tyrus-container-grizzly-client</artifactId>
      <version>1.15</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.tomcat</groupId>
      <artifactId>tomcat-websocket</artifactId>
      <version>9.0.24</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.tomcat</groupId>
      <artifactId>tomcat-api</artifactId>
      <version>9.0.24</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.graalvm.nativeimage</groupId>
      <artifactId>svm</artifactId>
      <version>19.3.0.2</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.java-websocket</groupId>
      <artifactId>Java-WebSocket</artifactId>
      <version>1.4.0</version>
      <scope>provided</scope>
      <exclusions>
        <exclusion>
          <groupId>org.slf4j</groupId>
          <artifactId>slf4j-api</artifactId>
        </exclusion>
      </exclusions>
    </dependency>

    <dependency>
      <groupId>io.github.kostaskougios</groupId>
      <artifactId>cloning</artifactId>
      <version>1.10.1</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>com.squareup</groupId>
      <artifactId>javapoet</artifactId>
      <version>1.11.1</version>
      <scope>test</scope>
    </dependency>

    <dependency>
      <groupId>org.jboss.forge.roaster</groupId>
      <artifactId>roaster-jdt</artifactId>
      <version>2.21.0.Final</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>io.vertx</groupId>
      <artifactId>vertx-core</artifactId>
      <version>4.0.0-milestone4</version>
      <scope>provided</scope>
      <exclusions>
        <exclusion>
          <groupId>io.netty</groupId>
          <artifactId>netty-codec-http2</artifactId>
        </exclusion>
        <exclusion>
          <groupId>io.netty</groupId>
          <artifactId>netty-resolver-dns</artifactId>
        </exclusion>
        <exclusion>
          <groupId>io.netty</groupId>
          <artifactId>netty-codec-socks</artifactId>
        </exclusion>
      </exclusions>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
    <dependency>
      <groupId>org.jsoup</groupId>
      <artifactId>jsoup</artifactId>
      <version>1.10.1</version>
    </dependency>

 

3.官方案例 导出网页html到pdf

    public static void main(String[] args) throws IOException {

        Path file = createTempFile("cdp4j", ".pdf");

        Options options = Options.builder()
                .headless(true)
            .build();

        Launcher launcher = new Launcher(options);

        try (SessionFactory factory = launcher.launch()) {

            String context = factory.createBrowserContext();
            try (Session session = factory.create(context)) {

                session.navigate("https://webfolder.io/cdp4j.html");
                session.waitDocumentReady();

                byte[] content = session.printToPDF();
                write(file, content);
            }

            factory.disposeBrowserContext(context);
        }

        if (isDesktopSupported()) {
            getDesktop().open(file.toFile());
        }

        launcher.kill();
    }

 

4.我自己写的使用cdp4j导出html到pdf的工具类

@Slf4j
public class HTML2PDF {
    public static int c=0;

    public static Vector<PDFSessionDetail> sessionPool=new Vector();

    public synchronized static PDFSessionDetail useSession(){
        PDFSessionDetail returnSession=null;
        if(!CollectionUtils.isEmpty(sessionPool))
        for(PDFSessionDetail pdfSessionDetail:sessionPool){
            if(!pdfSessionDetail.isHold()&&pdfSessionDetail.isActive()){
                pdfSessionDetail.setHold(true);
                pdfSessionDetail.setLastHoldDate(new Date());
                pdfSessionDetail.setHoldTimes(pdfSessionDetail.getHoldTimes()+1);
                returnSession=pdfSessionDetail;
                break;
            }
        }
        return returnSession;
    }

    public static void releaseSession(PDFSessionDetail pdfSessionDetail){
        if(pdfSessionDetail!=null){
            if(pdfSessionDetail.getHoldTimes()>100){
                log.warn("切换session");
                Session session=pdfSessionDetail.getSession();
                session.close();
                session=null;
                pdfSessionDetail.setSession(null);

                Session newSession=createSession(pdfSessionDetail.getSessionFactory(),pdfSessionDetail.getBrowserContext());
                pdfSessionDetail.setSession(newSession);
                pdfSessionDetail.setHoldTimes(0);
            }
            pdfSessionDetail.setHold(false);
        }
    }

    public static Session createSession(SessionFactory factory ,String context){
        return factory.create(context);
    }

    public static String createBrowserContext(SessionFactory factory){
        return factory.createBrowserContext();
    }

    public static Launcher createLauncher(String tempPath){
        File tempFile =new File(tempPath);
        if(tempFile.exists()){
            deleteDirectoryContent(tempFile);
        };

        Options options = Options.builder()
                .headless(true)
                .userDataDir(tempFile.toPath())
                .build();

        Launcher launcher = new Launcher(options);
        return launcher;

    }

    public static SessionFactory createFactory(Launcher launcher){
        return launcher.launch();
    }


    public static Result html2Pdf(Session session, String url, String path)  {
        FileOutputStream out=null;
        IO io=null;
        String stream=null;
        try {
            log.warn("开始访问");
            session.navigate(url);
            session.waitDocumentReady();
            String html=session.getContent();
            Document doc= Jsoup.parse(html);
            String str=doc.body().text();

            JSONObject errObject=null;
            try {
                if(str.contains("statusCode"))
                errObject= JSON.parseObject(str);
            } catch (Exception e) {
                log.error(e.getMessage(),e);
            }



            if(errObject!=null&&errObject.containsKey("statusCode")){
                return Result.failure(errObject.getString("statusCode"),errObject.getString("error"));
            }else {
                log.warn("完成文档渲染-开始打印PDF");
                PrintToPDFResult result = session.getCommand().getPage().printToPDF(null, null,
                        true, null,
                        null, null,
                        null, null,
                        null, null,
                        null, null,
                        null, null,
                        null, ReturnAsStream);

                io = session.getCommand().getIO();
                stream = result.getStream();

                out = new FileOutputStream(path);

                boolean eof = false;
                while (!eof) {
                    ReadResult streamResult = io.read(stream);
                    eof = streamResult.getEof();
                    if (streamResult.getBase64Encoded()) {
                        if (streamResult.getData() != null &&
                                !streamResult.getData().isEmpty()) {
                            byte[] content = getDecoder().decode(streamResult.getData());
                            out.write(content);
                        }
                    } else {
                        return Result.failure("-9","编码错误:Inavlid content encoding: it must be base64");
                    }
                }
                result = null;
                return Result.sucess("0",null);
            }
        } catch (Throwable e) {
            log.error(e.getMessage(),e);
            return Result.failure("-10","未知错误:"+e.getMessage());
        } finally {
            try {
                out.flush();
                out.close();
                io.close(stream);
            } catch (IOException e) {
                log.error(e.getMessage(),e);
            }
        }
    }


    public static void deleteDirectoryContent(File file){
        File[] list = file.listFiles();
        for (File f:list){
            if (f.isDirectory()){
                deleteDirectoryContent(new File(f.getPath()));
            }else{
                f.delete();
            }
        }
        file.delete();
    };

}

 

登录后即可回复 登录 | 注册
    
关注编程学问公众号