I tried to use HtmlUnit to scrape the webpage of https://www.maersk.com/tracking/
I already set webClient.getOptions().setJavaScriptEnabled(true);
String url = "https://www.maersk.com/tracking/";
HtmlPage htmlPage;
try (WebClient webClient = new WebClient(BrowserVersion.FIREFOX)) {
webClient.getOptions().setUseInsecureSSL(true);
webClient.getOptions().setCssEnabled(true);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
webClient.getCookieManager().setCookiesEnabled(true);
try {
htmlPage = webClient.getPage(url);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
System.out.println("HTML Page: \n" + htmlPage.asXml());
However, I still got a page saying Maersk doesn't work properly without JavaScript enabled. Please enable it to continue.
The HTML Page I got is as below:
<?xml version="1.0" encoding="UTF-8"?>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<link rel="preconnect" href="https://assets.maerskline.com"/>
<title>
Track Shipment | Cargo & Container Tracking | Maersk
</title>
<meta name="description" content="It's easy to track your shipment online with Maersk. Simply enter your bill of lading, shipment or container number and click Track."/>
<iframe src="javascript:void(0)" title="" style="width:0;height:0;border:0;display:none;">
</iframe>
<script async="" src="//www.googletagmanager.com/gtm.js?id=GTM-W6LN7D">
</script>
<script type="text/javascript" src="https://www.maersk.com/static/4cc4672db364488d1796395f48d13b19892400114f7597">
</script>
<script>
//<![CDATA[
var dataLayer = [];
(function (w, d, s, l, i) {
w[l] = w[l] || []; w[l].push({ 'gtm.start': new Date().getTime(), event: 'gtm.js' });
var f = d.getElementsByTagName(s)[0], j = d.createElement(s), dl = l != 'dataLayer' ? '&l=' + l : '';
j.async = true; j.src = '//www.googletagmanager.com/gtm.js?id=' + i + dl;
f.parentNode.insertBefore(j, f);
})(window, document, 'script', 'dataLayer', 'GTM-W6LN7D');
//]]>
</script>
<script src="https://www.datadoghq-browser-agent.com/datadog-rum-eu.js">
</script>
<script>
//<![CDATA[
if (window.location.hostname !== "localhost") {
var isProd = (["www", "beta", "mybeta"].indexOf(window.location.host.substr(0, window.location.host.indexOf('.'))) > -1);
window.DD_RUM && window.DD_RUM.init(
{
clientToken: isProd ? 'pub37f04b35d0d33532dc0c637c1350075d' : 'pubf43a1b6494c468ba2e182ddc2bb4838c',
applicationId: isProd ? '3349ab16-997d-4708-b10e-31ad5079dab8' : '9cddc80f-4698-4d53-ae1b-3c817fe3f033',
service: "ui-tracking"
}
);
}
//]]>
</script>
<script>
//<![CDATA[
!function(a){var e="https://s.go-mpulse.net/boomerang/",t="addEventListener";if("False"=="True")a.BOOMR_config=a.BOOMR_config||{},a.BOOMR_config.PageParams=a.BOOMR_config.PageParams||{},a.BOOMR_config.PageParams.pci=!0,e="https://s2.go-mpulse.net/boomerang/";if(window.BOOMR_API_key="T2WNA-KCBS3-CZU4N-2SR5A-FXN8A",function(){function n(e){a.BOOMR_onload=e&&e.timeStamp||(new Date).getTime()}if(!a.BOOMR||!a.BOOMR.version&&!a.BOOMR.snippetExecuted){a.BOOMR=a.BOOMR||{},a.BOOMR.snippetExecuted=!0;var i,_,o,r=document.createElement("iframe");if(a[t])a[t]("load",n,!1);else if(a.attachEvent)a.attachEvent("onload",n);r.src="javascript:void(0)",r.title="",r.role="presentation",(r.frameElement||r).style.cssText="width:0;height:0;border:0;display:none;",o=document.getElementsByTagName("script")[0],o.parentNode.insertBefore(r,o);try{_=r.contentWindow.document}catch(O){i=document.domain,r.src="javascript:var d=document.open();d.domain='"+i+"';void(0);",_=r.contentWindow.document}_.open()._l=function(){var a=this.createElement("script");if(i)this.domain=i;a.id="boomr-if-as",a.src=e+"T2WNA-KCBS3-CZU4N-2SR5A-FXN8A",BOOMR_lstart=(new Date).getTime(),this.body.appendChild(a)},_.write("<bo"+'dy onload="document._l();">'),_.close()}}(),"".length>0)if(a&&"performance"in a&&a.performance&&"function"==typeof a.performance.setResourceTimingBufferSize)a.performance.setResourceTimingBufferSize();!function(){if(BOOMR=a.BOOMR||{},BOOMR.plugins=BOOMR.plugins||{},!BOOMR.plugins.AK){var e=""=="true"?1:0,t="",n="qpsmkcixjo2kmy3jrxua-f-fbd5c6e16-clientnsv4-s.akamaihd.net",i="false"=="true"?2:1,_={"ak.v":"33","ak.cp":"937959","ak.ai":parseInt("476710",10),"ak.ol":"0","ak.cr":122,"ak.ipv":4,"ak.proto":"http/1.1","ak.rid":"ffc6d1c","ak.r":42309,"ak.a2":e,"ak.m":"","ak.n":"essl","ak.bpcip":"131.228.197.0","ak.cport":3782,"ak.gh":"23.1.32.219","ak.quicv":"","ak.tlsv":"tls1.3","ak.0rtt":"","ak.csrc":"-","ak.acc":"reno","ak.t":"1667861992","ak.ak":"hOBiQwZUYzCg5VSAfCLimQ==ZcX216iRQ7gdX2lqDNcgWKblqD6BfhDD6vSQYUicAsXIq1b/o+srNy+XUPKDRnnmxpaD9h3G6Hgmy5blXXzGfHaE8gmew08ydPFk7q24D2E8YWP7QaCbYrfTJggSr31yIZRscY4DxrkaRAjx/Z89AlyJJ96R0QjQ8AHxhCIU8tyxPKUUXSMhb0g8K1lOvc0CgbBQWQwJOtRr2Xf7oORoIe4SOdN2OuwMKmr3tPuV3vP613kUXFZMa4R3GISqsGF8Ho1XZsu8Z9uYq804/vpUZ2GKthjPL58eEjzQpQj1ypoLSOhqJ42b4AGpYvDBxDBGWHTV0AmAIW54JNCl/guTQAQ7FUXpy8UKDs4+QN34uHpzCdJCWqqgj5bfQnBDVg4Bnf/xMTqEaCo1Za8go4/WOh11E4gtLhWqfF2QTax4Z1U=","ak.pv":"420","ak.dpoabenc":"","ak.tf":i};if(""!==t)_["ak.ruds"]=t;var o={i:!1,av:function(e){var t="http.initiator";if(e&&(!e[t]||"spa_hard"===e[t]))_["ak.feo"]=void 0!==a.aFeoApplied?1:0,BOOMR.addVar(_)},rv:function(){var a=["ak.bpcip","ak.cport","ak.cr","ak.csrc","ak.gh","ak.ipv","ak.m","ak.n","ak.ol","ak.proto","ak.quicv","ak.tlsv","ak.0rtt","ak.r","ak.acc","ak.t","ak.tf"];BOOMR.removeVar(a)}};BOOMR.plugins.AK={akVars:_,akDNSPreFetchDomain:n,init:function(){if(!o.i){var a=BOOMR.subscribe;a("before_beacon",o.av,null,null),a("onbeacon",o.rv,null,null),o.i=!0}return this},is_complete:function(){return!0}}}}()}(window);
//]]>
</script>
<script>
//<![CDATA[
bazadebezolkohpepadr="1439975919"
//]]>
</script>
<script type="text/javascript" src="https://www.maersk.com/akam/13/55d44856" defer="">
</script>
</head>
<body>
<noscript>
<strong>We're sorry but Track Shipment | Cargo &amp; Container Tracking | Maersk doesn't work properly without JavaScript enabled. Please enable it to continue.</strong>
</noscript>
<main id="main">
<div id="maersk-app">
</div>
</main>
<noscript>
<iframe src="https://www.googletagmanager.com/ns.html?id=GTM-W6LN7D" height="0" width="0" style="display:none;visibility:hidden"></iframe>
</noscript>
<script type="module" src="/tracking/js/chunk-vendors.a0eda998.js">
</script>
<script type="module" src="/tracking/js/index.9fb0269e.js">
</script>
<script>
//<![CDATA[
!function(){var e=document,t=e.createElement("script");if(!("noModule"in t)&&"onbeforeload"in t){var n=!1;e.addEventListener("beforeload",function(e){if(e.target===t)n=!0;else if(!e.target.hasAttribute("nomodule")||!n)return;e.preventDefault()},!0),t.type="module",t.src=".",e.head.appendChild(t),t.remove()}}();
//]]>
</script>
<script src="/tracking/js/chunk-vendors-legacy.a0eda998.js" nomodule="">
</script>
<script src="/tracking/js/index-legacy.216f3445.js" nomodule="">
</script>
<noscript>
<img src="https://www.maersk.com/akam/13/pixel_55d44856?a=dD1mYTFjOTcyOTAyMzgyNzE5ODM1OTE2ODMzYTUzOWUzNWUyNzQyYzkxJmpzPW9mZg==" style="visibility: hidden; position: absolute; left: -999px; top: -999px;" />
</noscript>
<script type="text/javascript" src="/IyR_SZ/rh2Hr/ffW1-/2Q/bYfV8kbLiO/fgMKIwUl/ekUHI/U4mHDUB">
</script>
</body>
</html>
Does anyone know if anything is not set correctly? Or the Maersk
website has some special setting to access? Can anyone give try to scrape the https://www.maersk.com/tracking/
? Thanks.
(I posted this question in Stack Overflow: https://stackoverflow.com/questions/74353983/htmlunit-webclient-enabled-javascript-but-still-got-javascript-disabled-message. Someone redirected me to create a ticket in the HtmlUnit GitHub here)
Pay now to fund the work behind this issue.
Get updates on progress being made.
Maintainer is rewarded once the issue is completed.
You're funding impactful open source efforts
You want to contribute to this effort
You want to get funding like this too