java chromedevtools chrome浏览器操作工具类chromeUtil

2020-05-18 08:45:06 | 编辑

前面研究了java操作chrome浏览器的基础,完全基于 (chromedevtools )https://chromedevtools.github.io/devtools-protocol/,并输出了案例 java直接操作chrome谷歌浏览器 实现网页爬虫 和 java直接调用google chrome headless浏览器 实现网页转换导出为pdf 。现在把完整的基于chromedevtools java 操作 google浏览器chrome的工具类贴出来

java chromeUtil代码

public class ChromeUtil {

    public static MyWebSocket launch(String tempath) throws URISyntaxException, IOException {
        MyWebSocket webSocket=null;

        String cmdID=UUID.randomUUID().toString();

        //命令启动浏览器
        List<String> arguments=new ArrayList<>();
        String os = System.getProperty("os.name");
        if(os.toLowerCase().startsWith("windows")){
            arguments.add("chrome");
        }else if(os.toLowerCase().startsWith("linux")){
            arguments.add("google-chrome");
        }
        arguments.add("--disable-features=TranslateUI");
        arguments.add("--disable-extensions");
        arguments.add("--disable-background-networking");
        arguments.add("--safebrowsing-disable-auto-update");
        arguments.add("--disable-sync");
        arguments.add("--metrics-recording-only");
        arguments.add("--disable-default-apps");
        arguments.add("--mute-audio");
        arguments.add("--no-first-run");
        arguments.add("--no-default-browser-check");
        arguments.add("--disable-plugin-power-saver");
        arguments.add("--disable-popup-blocking");
        arguments.add("--remote-debugging-port=0");
        arguments.add("--disable-gpu");
        arguments.add("--enable-automation");
        arguments.add("--no-sandbox");
        arguments.add("--disable-dev-shm-usage");
        arguments.add("--disable-setuid-sandbox");
        arguments.add("--disable-web-security");
        arguments.add("--no-zygote");
        arguments.add("--headless");
        arguments.add("--myId="+cmdID);
        arguments.add("--user-data-dir="+tempath+File.separator+cmdID);
        arguments.add("&");

        ProcessBuilder builder = new ProcessBuilder(arguments);
        builder.environment().put("MY_ID",cmdID);

        Process process=  builder.start();


        long startTime=new Date().getTime();
        String url=null;
        Scanner scanner = new Scanner(process.getErrorStream());
        while (scanner.hasNext()){
            String line = scanner.nextLine().trim();
            if (!line.isEmpty() && line.toLowerCase().startsWith("devtools listening on")) {
                int start = line.indexOf("ws://");
                url=line.substring(start, line.length());
                break;
            }
            if(new Date().getTime()-startTime>=600000){
                throw new RuntimeException("启动浏览器超时");
            }
        }
        scanner.close();

        if(StringUtils.isEmpty(url)){
            throw new RuntimeException("启动浏览器失败");
        }

        log.warn("启动浏览器成功:"+url);

        URI socketUri = new URI(url);
        webSocket= new MyWebSocket(socketUri);

        return webSocket;
    }
    public static void discoverTargets(MyWebSocket myWebSocket){
        JSONObject commond=new JSONObject();
        commond.put("id",100);
        commond.put("method","Target.setDiscoverTargets");

        JSONObject params=new JSONObject();
        params.put("discover",true);

        commond.put("params",params);

        myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));

    }

    public static String createContext(MyWebSocket myWebSocket){
        JSONObject commond=new JSONObject();
        commond.put("id",200);
        commond.put("method","Target.createBrowserContext");

        String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
        JSONObject msgObj=JSON.parseObject(result);
        String browserContextId=msgObj.getJSONObject("result").getString("browserContextId");
        return browserContextId;
    }

    public static String createTarget(MyWebSocket myWebSocket,String browserContextId){

        JSONObject commond=new JSONObject();
        commond.put("id",300);
        commond.put("method","Target.createTarget");

        JSONObject params=new JSONObject();
        params.put("url","about:blank");
        params.put("width",1366);
        params.put("height",768);
        params.put("browserContextId",browserContextId);
        params.put("enableBeginFrameControl",false);

        commond.put("params",params);

        String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
        JSONObject msgObj=JSON.parseObject(result);
        String targetId=msgObj.getJSONObject("result").getString("targetId");
        return targetId;
    }

    public static void closeTarget(MyWebSocket myWebSocket,String targetId){

        JSONObject commond=new JSONObject();
        commond.put("id",310);
        commond.put("method","Target.closeTarget");

        JSONObject params=new JSONObject();
        params.put("targetId",targetId);

        commond.put("params",params);

        myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
    }

    public static String createSession(MyWebSocket myWebSocket,String targetId){
        JSONObject commond=new JSONObject();
        commond.put("id",400);
        commond.put("method","Target.attachToTarget");

        JSONObject params=new JSONObject();
        params.put("targetId",targetId);
        params.put("flatten",true);

        commond.put("params",params);

        String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
        JSONObject msgObj=JSON.parseObject(result);
        String sessionId=msgObj.getJSONObject("result").getString("sessionId");
        return sessionId;
    }

    public static void enablePage(MyWebSocket myWebSocket,String sessionId){
        myWebSocket.sendAndGet("{\"id\":500,\"sessionId\":\""+sessionId+"\",\"method\":\"Runtime.enable\"}",500);
        myWebSocket.sendAndGet("{\"id\":510,\"sessionId\":\""+sessionId+"\",\"method\":\"Page.enable\"}",510);
        myWebSocket.sendAndGet("{\"id\":520,\"sessionId\":\""+sessionId+"\",\"method\":\"Page.setLifecycleEventsEnabled\",\"params\":{\"enabled\":true}}",520);

    }

    public static void navigate(MyWebSocket myWebSocket,String sessionId,String url){
        JSONObject commond=new JSONObject();
        commond.put("id",600);
        commond.put("method","Page.navigate");
        commond.put("sessionId",sessionId);

        JSONObject params=new JSONObject();
        params.put("url",url);

        commond.put("params",params);

        String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
        log.warn("跳转页面"+result);
    }

    /**
     * 等待页面加载完成
     * @param myWebSocket
     * @param sessionId
     * @param startDate
     * @throws InterruptedException
     */
    public static void waitReady(MyWebSocket myWebSocket,String sessionId,Date startDate) throws InterruptedException {
        Thread.sleep(100);

        boolean isReady;
        JSONObject commond=new JSONObject();
        commond.put("id",700);
        commond.put("method","Runtime.evaluate");
        commond.put("sessionId",sessionId);

        JSONObject params=new JSONObject();
        params.put("expression","document.readyState == 'complete'");

        commond.put("params",params);
        String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));

        JSONObject msgObj=JSON.parseObject(result);
        isReady = msgObj.getJSONObject("result").getJSONObject("result").getBooleanValue("value");

        System.out.println("###"+result);

        if(!isReady){
            if(new Date().getTime()-startDate.getTime()>=300000){
                throw new InterruptedException("加载网页超时!");
            }

            waitReady(myWebSocket,sessionId,startDate);
        }
    }

    /**
     * 等待页面渲染完成 对应等待paint事件回调
     * {"method":"Page.lifecycleEvent","params":{"frameId":"085D45927EF84EF41BA88A4E36DA4EF4","loaderId":"340CEA67B9B7FE8CB0A45F6950EE6BFB","name":"firstMeaningfulPaint","timestamp":29393.87702},"sessionId":"5CE410C83FC9A04F1C250B369D786456"}
     * @param myWebSocket
     * @param sessionId
     * @param startDate
     * @throws InterruptedException
     */
    public static void waitPagePaint(MyWebSocket myWebSocket,String sessionId,Date startDate) throws InterruptedException {
        do{
            Thread.sleep(100);
            if(new Date().getTime()-startDate.getTime()>=120000){
                throw new InterruptedException("加载网页超时!");
            }
        }while (!myWebSocket.isPagefinished());
    }




    public static String printToPDF(MyWebSocket myWebSocket,String sessionId,String headerTemplate,String footerTemplate){
        JSONObject commond=new JSONObject();
        commond.put("id",800);
        commond.put("method","Page.printToPDF");
        commond.put("sessionId",sessionId);

        JSONObject params=new JSONObject();
        params.put("printBackground",true);
        params.put("transferMode","ReturnAsStream");
        params.put("displayHeaderFooter",true);

        //params.put("preferCSSPageSize",true);

        params.put("headerTemplate",headerTemplate);
        params.put("footerTemplate",footerTemplate);

        params.put("paperHeight",10.08);
        params.put("paperWidth",6.93);

        params.put("marginBottom",0.59);
        params.put("marginLeft",0.59);
        params.put("marginRight",0.59);
        params.put("marginTop",0.59);


        commond.put("params",params);

        String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
        JSONObject msgObj=JSON.parseObject(result);
        String ioHandle=msgObj.getJSONObject("result").getString("stream");
        return ioHandle;
    }

    public static void storePdf(MyWebSocket myWebSocket, String sessionId, String ioHandle, OutputStream out) throws IOException {

        JSONObject commond=new JSONObject();
        commond.put("id",900);
        commond.put("method","IO.read");
        commond.put("sessionId",sessionId);

        JSONObject params=new JSONObject();
        params.put("handle",ioHandle);

        commond.put("params",params);

        String result=myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
        JSONObject msgObj=JSON.parseObject(result);

        String ioData=msgObj.getJSONObject("result").getString("data");
        byte[] content = Base64.getDecoder().decode(ioData);
        out.write(content);

        boolean ioEof=msgObj.getJSONObject("result").getBooleanValue("eof");

        if(!ioEof){
            storePdf(myWebSocket,sessionId,ioHandle,out);
        }else{
            closeIO( myWebSocket,  sessionId, ioHandle);
        }
    }

    public static void closeIO(MyWebSocket myWebSocket, String sessionId,String ioHandle){
        JSONObject commond=new JSONObject();
        commond.put("id",1000);
        commond.put("method","IO.close");
        commond.put("sessionId",sessionId);

        JSONObject params=new JSONObject();
        params.put("handle",ioHandle);

        commond.put("params",params);

        myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
    }

    public static String getDocument(MyWebSocket myWebSocket,String sessionId,int depth) {
        JSONObject commond=new JSONObject();
        commond.put("id",1100);
        commond.put("method","DOM.getDocument");
        commond.put("sessionId",sessionId);

        JSONObject params=new JSONObject();
        params.put("depth",depth);

        commond.put("params",params);
        return myWebSocket.sendAndGet(commond.toJSONString(),commond.getIntValue("id"));
    }

    /**
     * 检测网页内容 网页上错误信息 直接返回,-10000的返回错误都需要重试
     * @param myWebSocket
     * @param sessionId
     * @param depth
     * @return
     */
    public static Result getPageResult(MyWebSocket myWebSocket,String sessionId,int depth){
        Result result=Result.sucess(0,"");

        String documentStr=getDocument(myWebSocket,sessionId,depth);

        JSONObject document=JSON.parseObject(documentStr);

        JSONArray htmlArr=document.getJSONObject("result").getJSONObject("root").getJSONArray("children");
        JSONObject htmlObj=null;
        for(int i=0;i<htmlArr.size();i++){
            JSONObject tempObj=htmlArr.getJSONObject(i);
            if(tempObj.getString("nodeName").toLowerCase().equals("html")&&tempObj.getString("localName").toLowerCase().equals("html")){
                htmlObj=tempObj;
                break;
            }
        }

        JSONArray htmlChildrens=htmlObj.getJSONArray("children");
        JSONObject bodyObj=null;
        for(int j=0;j<htmlChildrens.size();j++){
            JSONObject tempObj=htmlChildrens.getJSONObject(j);
            if(tempObj.getString("nodeName").toLowerCase().equals("body")&&tempObj.getString("localName").toLowerCase().equals("body")){
                bodyObj=tempObj;
                break;
            }
        }

        JSONArray bodyChildrens=bodyObj.getJSONArray("children");
        if(bodyChildrens==null||bodyChildrens.size()<=0){
            result=Result.failure(-10000,"错题集内容为空");
        }else {
            JSONObject firstElement = bodyChildrens.getJSONObject(0);

            if (firstElement.containsKey("children") && firstElement.getJSONArray("children").size() > 0) {
                JSONObject errResultObj = firstElement.getJSONArray("children").getJSONObject(0);
                if (errResultObj != null && errResultObj.containsKey("nodeValue")) {
                    String err = errResultObj.getString("nodeValue");
                    if (err.contains("statusCode") && err.contains("message")) {
                        JSONObject errObj = JSON.parseObject(err);
                        result = Result.failure(errObj.getIntValue("statusCode"), errObj.getString("message"));
                    }
                }
            }
        }
        return result;
    }

    public static Result html2pdf(MyWebSocket myWebSocket, String sessionId, String url, String path,String headerTemplate,String footerTemplate){
        Result result=Result.failure(-100,"初始状态");
        try {
            myWebSocket.setPagefinished(false);
            log.warn("加载页面:"+url);
            navigate(myWebSocket,sessionId,url);
            //waitReady(myWebSocket,sessionId,new Date());
            waitPagePaint(myWebSocket,sessionId,new Date());
            Thread.sleep(5000);
            log.warn("完成加载:"+url);
            Result checkResult=getPageResult(myWebSocket,sessionId,3);
            log.warn("获取内容:"+url);
            if(!checkResult.isSuccess()){
                return checkResult;
            }
            String ioHandle=printToPDF(myWebSocket,sessionId,headerTemplate,footerTemplate);
            log.warn("获取数据:"+path);
            OutputStream out=new FileOutputStream(path);
            storePdf(myWebSocket,sessionId,ioHandle,out);
            out.close();
            log.warn("完成打印:"+path);

            File resultFile=new File(path);
            boolean isSuccess=resultFile.exists()&&resultFile.length()>0;

            if(isSuccess){
                result=Result.sucess(1,"成功");
            }else{
                result=Result.failure(-10000,"文件保存错误");
            }
        } catch (InterruptedException e) {
            log.error(e.getMessage(),e);
            result=Result.failure(-10000,"加载网页超时");
        } catch (FileNotFoundException e) {
            log.error(e.getMessage(),e);
            result=Result.failure(-10000,"文件不存在");
        } catch (IOException e) {
            log.error(e.getMessage(),e);
            result=Result.failure(-10000,"文件读写错误");
        } catch (Exception e) {
            log.error(e.getMessage(),e);
            result=Result.failure(-10000,"未知异常");
        }

        return result;
    }



    public static void killChrome(){
        try {
            String commond="";
            String os = System.getProperty("os.name");
            Process process=null;
            if(os.toLowerCase().startsWith("windows")){
                commond="taskkill /f /t /im chrome.exe";
                process=Runtime.getRuntime().exec(commond);
            }else if(os.toLowerCase().startsWith("linux")){
                commond="kill -9 $(ps aux|grep google-chrome|grep -v 'grep' |awk '{print $2}')";
                String[] cmd = new String[]{"sh","-c",commond};
                process=Runtime.getRuntime().exec(cmd);
            }
            process.waitFor(10, TimeUnit.SECONDS);

            InputStream inputStream=process.getInputStream();
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            IOUtils.copy(process.getInputStream(),baos);
            log.warn("关闭所有浏览器:"+baos.toString());
            baos.close();
            inputStream.close();
        } catch (IOException e) {
            log.error(e.getMessage(),e);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

}

java websocket代码(因为和chrome浏览器通信是基于websocket)

public class MyWebSocket extends WebSocketClient {
    private int code;
    private String message;
    private boolean isPagefinished;
    public MyWebSocket(URI serverUri) {
        super(serverUri);
        this.waitConnect();

    }
    public void waitConnect(){
        try {
            this.connectBlocking();
        } catch (InterruptedException e) {
            log.error(e.getMessage(),e);
        }
    }

    @Override
    public void onOpen(ServerHandshake serverHandshake) {
        log.warn("###建立连接");
    }

    @Override
    public void onMessage(String s) {
        //log.warn("接受消息:"+s);
        synchronized(this) {
            if (!StringUtils.isEmpty(s)) {
                JSONObject obj = JSON.parseObject(s);
                if (obj.containsKey("id") && obj.getIntValue("id") == code) {
                    //处理交互消息
                    this.message = s;
                    this.notify();
                    if(obj.getIntValue("id")!=900)log.warn("接受内容消息:"+s);
                }else if(obj.containsKey("method") && "Page.lifecycleEvent".equals(obj.getString("method"))){
                    //处理事件消息
                    JSONObject eventObj=obj.getJSONObject("params");
                    //渲染完成事件
                    if(eventObj.containsKey("name")&&("firstMeaningfulPaint".equals(eventObj.getString("name"))||"networkIdle".equals(eventObj.getString("name")))){
                        this.setPagefinished(true);
                    }
                    log.warn("接受事件消息:"+s);
                }
            }
        }

    }

    @Override
    public void onClose(int i, String s, boolean b) {
        log.warn("###关闭连接:"+this.getURI()+"-"+i+"-"+s+"-"+b);
    }

    @Override
    public void onError(Exception e) {
        log.error("###通信错误"+e.getMessage(),e);
    }

    public String sendAndGet(String text,int code) {
        synchronized(this) {
            message = null;
            this.code = code;
            try {

                this.send(text);
                this.wait(120000);
            } catch (InterruptedException e) {
                log.error(e.getMessage(), e);
            }
        }
        return message;
    }

    public int getCode() {
        return code;
    }

    public void setCode(int code) {
        this.code = code;
    }

    public String getMessage() {
        return message;
    }

    public void setMessage(String message) {
        this.message = message;
    }

    public boolean isPagefinished() {
        return isPagefinished;
    }

    public void setPagefinished(boolean pagefinished) {
        isPagefinished = pagefinished;
    }
}

 

附:chromedevtools协议文档官方的两个链接

https://chromedevtools.github.io/devtools-protocol/

https://vanilla.aslushnikov.com/

 

登录后即可回复 登录 | 注册
    
关注编程学问公众号