// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include #include #include "base/bind.h" #include "base/command_line.h" #include "base/compiler_specific.h" #include "base/files/file_path.h" #include "base/files/file_util.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "base/task/current_thread.h" #include "base/test/bind.h" #include "base/threading/thread_restrictions.h" #include "build/build_config.h" #include "content/public/browser/render_view_host.h" #include "content/public/browser/web_contents.h" #include "content/public/common/content_switches.h" #include "content/public/renderer/render_frame.h" #include "content/public/renderer/render_view.h" #include "content/public/test/browser_test.h" #include "content/public/test/content_browser_test.h" #include "content/public/test/content_browser_test_utils.h" #include "content/public/test/frame_load_waiter.h" #include "content/public/test/test_navigation_observer.h" #include "content/public/test/test_utils.h" #include "content/shell/browser/shell.h" #include "net/base/filename_util.h" #include "third_party/blink/public/platform/web_data.h" #include "third_party/blink/public/platform/web_string.h" #include "third_party/blink/public/platform/web_url.h" #include "third_party/blink/public/platform/web_vector.h" #include "third_party/blink/public/web/web_document.h" #include "third_party/blink/public/web/web_element.h" #include "third_party/blink/public/web/web_element_collection.h" #include "third_party/blink/public/web/web_frame_content_dumper.h" #include "third_party/blink/public/web/web_frame_serializer.h" #include "third_party/blink/public/web/web_frame_serializer_client.h" #include "third_party/blink/public/web/web_local_frame.h" #include "third_party/blink/public/web/web_meta_element.h" #include "third_party/blink/public/web/web_node.h" #include "third_party/blink/public/web/web_savable_resources_test_support.h" #include "third_party/blink/public/web/web_view.h" using blink::WebData; using blink::WebDocument; using blink::WebElement; using blink::WebMetaElement; using blink::WebElementCollection; using blink::WebFrame; using blink::WebFrameSerializer; using blink::WebFrameSerializerClient; using blink::WebLocalFrame; using blink::WebNode; using blink::WebString; using blink::WebURL; using blink::WebView; using blink::WebVector; namespace content { bool HasDocType(const WebDocument& doc) { return doc.FirstChild().IsDocumentTypeNode(); } // https://crbug.com/788788 #if defined(OS_ANDROID) && defined(ADDRESS_SANITIZER) #define MAYBE_DomSerializerTests DISABLED_DomSerializerTests #else #define MAYBE_DomSerializerTests DomSerializerTests #endif // defined(OS_ANDROID) && defined(ADDRESS_SANITIZER) class MAYBE_DomSerializerTests : public ContentBrowserTest, public WebFrameSerializerClient { public: MAYBE_DomSerializerTests() = default; void SetUpCommandLine(base::CommandLine* command_line) override { command_line->AppendSwitch(switches::kSingleProcess); #if defined(OS_WIN) // Don't want to try to create a GPU process. command_line->AppendSwitch(switches::kDisableGpu); #endif } void SetUpOnMainThread() override { render_view_routing_id_ = shell() ->web_contents() ->GetMainFrame() ->GetRenderViewHost() ->GetRoutingID(); } // DomSerializerDelegate. void DidSerializeDataForFrame(const WebVector& data, FrameSerializationStatus status) override { // Check finish status of current frame. ASSERT_FALSE(serialization_reported_end_of_data_); // Add data to corresponding frame's content. serialized_contents_.append(data.Data(), data.size()); // Current frame is completed saving, change the finish status. if (status == WebFrameSerializerClient::kCurrentFrameIsFinished) serialization_reported_end_of_data_ = true; } RenderView* GetRenderView() { return RenderView::FromRoutingID(render_view_routing_id_); } WebView* GetWebView() { return GetRenderView()->GetWebView(); } WebLocalFrame* GetMainFrame() { return GetRenderView()->GetMainRenderFrame()->GetWebFrame(); } WebLocalFrame* FindSubFrameByURL(const GURL& url) { for (WebFrame* frame = GetWebView()->MainFrame(); frame; frame = frame->TraverseNext()) { DCHECK(frame->IsWebLocalFrame()); if (GURL(frame->ToWebLocalFrame()->GetDocument().Url()) == url) return frame->ToWebLocalFrame(); } return nullptr; } // Load web page according to input content and relative URLs within // the document. void LoadContents(const std::string& contents, const GURL& base_url) { TestNavigationObserver navigation_observer(shell()->web_contents(), 1); shell()->LoadDataWithBaseURL( shell()->web_contents()->GetMainFrame()->GetLastCommittedURL(), contents, base_url); navigation_observer.Wait(); // After navigations, the RenderView for the new document might be a new // one. render_view_routing_id_ = shell() ->web_contents() ->GetMainFrame() ->GetRenderViewHost() ->GetRoutingID(); } class SingleLinkRewritingDelegate : public WebFrameSerializer::LinkRewritingDelegate { public: SingleLinkRewritingDelegate(const WebURL& url, const WebString& localPath) : url_(url), local_path_(localPath) {} bool RewriteFrameSource(WebFrame* frame, WebString* rewritten_link) override { return false; } bool RewriteLink(const WebURL& url, WebString* rewritten_link) override { if (url != url_) return false; *rewritten_link = local_path_; return true; } private: const WebURL url_; const WebString local_path_; }; // Serialize DOM belonging to a frame with the specified |frame_url|. void SerializeDomForURL(const GURL& frame_url) { SerializeDomForURL(frame_url, false); } void SerializeDomForURL(const GURL& frame_url, bool save_with_empty_url) { // Find corresponding WebFrame according to frame_url. WebFrame* web_frame = FindSubFrameByURL(frame_url); ASSERT_TRUE(web_frame != nullptr); WebString file_path = WebString::FromUTF8("c:\\dummy.htm"); SingleLinkRewritingDelegate delegate(frame_url, file_path); // Start serializing DOM. bool result = WebFrameSerializer::Serialize( web_frame->ToWebLocalFrame(), this, &delegate, save_with_empty_url); ASSERT_TRUE(result); } bool serialization_reported_end_of_data() const { return serialization_reported_end_of_data_; } const std::string& serialized_contents() const { return serialized_contents_; } private: // Written only on the browser main UI thread. Read only from the in-process // renderer thread via posted tasks: int32_t render_view_routing_id_ = -1; std::string serialized_contents_; bool serialization_reported_end_of_data_ = false; }; // If original contents have document type, the serialized contents also have // document type. // Disabled on OSX by ellyjones@ on 2015-05-18, see https://crbug.com/488495, // on all platforms by tsergeant@ on 2016-03-10, see https://crbug.com/593575 IN_PROC_BROWSER_TEST_F(MAYBE_DomSerializerTests, DISABLED_SerializeHTMLDOMWithDocType) { base::FilePath page_file_path = GetTestFilePath("dom_serializer", "youtube_1.htm"); GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. EXPECT_TRUE(NavigateToURL(shell(), file_url)); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Make sure original contents have document type. WebLocalFrame* web_frame = FindSubFrameByURL(file_url); ASSERT_TRUE(web_frame != nullptr); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(HasDocType(doc)); // Do serialization. SerializeDomForURL(file_url); })); // Load the serialized contents. ASSERT_TRUE(serialization_reported_end_of_data()); LoadContents(serialized_contents(), file_url); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Make sure serialized contents still have document type. WebLocalFrame* web_frame = GetMainFrame(); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(HasDocType(doc)); })); } // If original contents do not have document type, the serialized contents // also do not have document type. IN_PROC_BROWSER_TEST_F(MAYBE_DomSerializerTests, SerializeHTMLDOMWithoutDocType) { base::FilePath page_file_path = GetTestFilePath("dom_serializer", "youtube_2.htm"); GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. EXPECT_TRUE(NavigateToURL(shell(), file_url)); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Make sure original contents do not have document type. WebLocalFrame* web_frame = FindSubFrameByURL(file_url); ASSERT_TRUE(web_frame != nullptr); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(!HasDocType(doc)); // Do serialization. SerializeDomForURL(file_url); })); // Load the serialized contents. ASSERT_TRUE(serialization_reported_end_of_data()); LoadContents(serialized_contents(), file_url); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Make sure serialized contents do not have document type. WebLocalFrame* web_frame = GetMainFrame(); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(!HasDocType(doc)); })); } // Serialize XML document which has all 5 built-in entities. After // finishing serialization, the serialized contents should be same // with original XML document. IN_PROC_BROWSER_TEST_F(MAYBE_DomSerializerTests, SerializeXMLDocWithBuiltInEntities) { base::FilePath page_file_path = GetTestFilePath("dom_serializer", "note.html"); base::FilePath xml_file_path = GetTestFilePath("dom_serializer", "note.xml"); std::string original_contents; { // Read original contents for later comparison. base::ScopedAllowBlockingForTesting allow_blocking; ASSERT_TRUE(base::ReadFileToString(xml_file_path, &original_contents)); } // Get file URL. GURL file_url = net::FilePathToFileURL(page_file_path); GURL xml_file_url = net::FilePathToFileURL(xml_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. EXPECT_TRUE(NavigateToURL(shell(), file_url)); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Do serialization. SerializeDomForURL(xml_file_url); // Compare the serialized contents with original contents. ASSERT_TRUE(serialization_reported_end_of_data()); ASSERT_EQ(original_contents, serialized_contents()); })); } // When serializing DOM, we add MOTW declaration before html tag. IN_PROC_BROWSER_TEST_F(MAYBE_DomSerializerTests, SerializeHTMLDOMWithAddingMOTW) { base::FilePath page_file_path = GetTestFilePath("dom_serializer", "youtube_2.htm"); std::string original_contents; { // Read original contents for later comparison . base::ScopedAllowBlockingForTesting allow_blocking; ASSERT_TRUE(base::ReadFileToString(page_file_path, &original_contents)); } // Get file URL. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. EXPECT_TRUE(NavigateToURL(shell(), file_url)); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Make sure original contents does not have MOTW; std::string motw_declaration = WebFrameSerializer::GenerateMarkOfTheWebDeclaration(file_url).Utf8(); ASSERT_FALSE(motw_declaration.empty()); // The encoding of original contents is ISO-8859-1, so we convert the MOTW // declaration to ASCII and search whether original contents has it or not. ASSERT_TRUE(std::string::npos == original_contents.find(motw_declaration)); // Do serialization. SerializeDomForURL(file_url, false); // Make sure the serialized contents have MOTW ; ASSERT_TRUE(serialization_reported_end_of_data()); ASSERT_FALSE(std::string::npos == serialized_contents().find(motw_declaration)); })); } // When serializing DOM, we add MOTW declaration before html tag. IN_PROC_BROWSER_TEST_F(MAYBE_DomSerializerTests, SerializeOffTheRecordHTMLDOMWithAddingMOTW) { base::FilePath page_file_path = GetTestFilePath("dom_serializer", "youtube_2.htm"); std::string original_contents; { // Read original contents for later comparison . base::ScopedAllowBlockingForTesting allow_blocking; ASSERT_TRUE(base::ReadFileToString(page_file_path, &original_contents)); } // Get file URL. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. EXPECT_TRUE(NavigateToURL(shell(), file_url)); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Make sure original contents does not have MOTW; GURL frame_url = GURL("about:internet"); std::string motw_declaration = WebFrameSerializer::GenerateMarkOfTheWebDeclaration(frame_url).Utf8(); ASSERT_FALSE(motw_declaration.empty()); // The encoding of original contents is ISO-8859-1, so we convert the MOTW // declaration to ASCII and search whether original contents has it or not. ASSERT_TRUE(std::string::npos == original_contents.find(motw_declaration)); // Do serialization. SerializeDomForURL(file_url, true); // Make sure the serialized contents have MOTW ; ASSERT_TRUE(serialization_reported_end_of_data()); ASSERT_FALSE(std::string::npos == serialized_contents().find(motw_declaration)); })); } // When serializing DOM, we will add the META which have correct charset // declaration as first child of HEAD element for resolving WebKit bug: // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document // does not have META charset declaration. // Disabled on OSX by battre@ on 2015-05-21, see https://crbug.com/488495, // on all platforms by tsergeant@ on 2016-03-10, see https://crbug.com/593575 IN_PROC_BROWSER_TEST_F( MAYBE_DomSerializerTests, DISABLED_SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc) { base::FilePath page_file_path = GetTestFilePath("dom_serializer", "youtube_1.htm"); // Get file URL. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. EXPECT_TRUE(NavigateToURL(shell(), file_url)); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Make sure there is no META charset declaration in original document. WebLocalFrame* web_frame = FindSubFrameByURL(file_url); ASSERT_TRUE(web_frame != nullptr); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(doc.IsHTMLDocument()); WebElement head_element = doc.Head(); ASSERT_TRUE(!head_element.IsNull()); // Go through all children of HEAD element. WebElementCollection meta_elements = head_element.GetElementsByHTMLTagName("meta"); for (WebElement element = meta_elements.FirstItem(); !element.IsNull(); element = meta_elements.NextItem()) { ASSERT_TRUE(element.To().ComputeEncoding().IsEmpty()); } // Do serialization. SerializeDomForURL(file_url); })); // Load the serialized contents. ASSERT_TRUE(serialization_reported_end_of_data()); LoadContents(serialized_contents(), file_url); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Make sure the first child of HEAD element is META which has charset // declaration in serialized contents. WebLocalFrame* web_frame = GetMainFrame(); ASSERT_TRUE(web_frame != nullptr); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(doc.IsHTMLDocument()); WebElement head_element = doc.Head(); ASSERT_TRUE(!head_element.IsNull()); ASSERT_TRUE(!head_element.FirstChild().IsNull()); ASSERT_TRUE(head_element.FirstChild().IsElementNode()); WebMetaElement meta_element = head_element.FirstChild().To(); ASSERT_EQ(meta_element.ComputeEncoding(), web_frame->GetDocument().Encoding()); // Make sure no more additional META tags which have charset declaration. WebElementCollection meta_elements = head_element.GetElementsByHTMLTagName("meta"); for (WebElement element = meta_elements.FirstItem(); !element.IsNull(); element = meta_elements.NextItem()) { if (element == meta_element) continue; ASSERT_TRUE(element.To().ComputeEncoding().IsEmpty()); } })); } // When serializing DOM, if the original document has multiple META charset // declaration, we will add the META which have correct charset declaration // as first child of HEAD element and remove all original META charset // declarations. // Disabled due to http://crbug.com/812904 IN_PROC_BROWSER_TEST_F( MAYBE_DomSerializerTests, DISABLED_SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) { base::FilePath page_file_path = GetTestFilePath("dom_serializer", "youtube_2.htm"); // Get file URL. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. EXPECT_TRUE(NavigateToURL(shell(), file_url)); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Make sure there are multiple META charset declarations in original // document. WebLocalFrame* web_frame = FindSubFrameByURL(file_url); ASSERT_TRUE(web_frame != nullptr); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(doc.IsHTMLDocument()); WebElement head_element = doc.Head(); ASSERT_TRUE(!head_element.IsNull()); // Go through all children of HEAD element. int charset_declaration_count = 0; WebElementCollection meta_elements = head_element.GetElementsByHTMLTagName("meta"); for (WebElement element = meta_elements.FirstItem(); !element.IsNull(); element = meta_elements.NextItem()) { if (!element.To().ComputeEncoding().IsEmpty()) ++charset_declaration_count; } // The original doc has more than META tags which have charset declaration. ASSERT_GT(charset_declaration_count, 1); // Do serialization. SerializeDomForURL(file_url); })); // Load the serialized contents. ASSERT_TRUE(serialization_reported_end_of_data()); LoadContents(serialized_contents(), file_url); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Make sure only first child of HEAD element is META which has charset // declaration in serialized contents. WebLocalFrame* web_frame = GetMainFrame(); ASSERT_TRUE(web_frame != nullptr); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(doc.IsHTMLDocument()); WebElement head_element = doc.Head(); ASSERT_TRUE(!head_element.IsNull()); ASSERT_TRUE(!head_element.FirstChild().IsNull()); ASSERT_TRUE(head_element.FirstChild().IsElementNode()); WebMetaElement meta_element = head_element.FirstChild().To(); ASSERT_EQ(meta_element.ComputeEncoding(), web_frame->GetDocument().Encoding()); // Make sure no more additional META tags which have charset declaration. WebElementCollection meta_elements = head_element.GetElementsByHTMLTagName("meta"); for (WebElement element = meta_elements.FirstItem(); !element.IsNull(); element = meta_elements.NextItem()) { if (element == meta_element) continue; ASSERT_TRUE(element.To().ComputeEncoding().IsEmpty()); } })); } // Test situation of html entities in text when serializing HTML DOM. IN_PROC_BROWSER_TEST_F(MAYBE_DomSerializerTests, SerializeHTMLDOMWithEntitiesInText) { // Need to spin up the renderer and also navigate to a file url so that the // renderer code doesn't attempt a fork when it sees a load to file scheme // from non-file scheme. EXPECT_TRUE(NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"))); base::FilePath page_file_path = GetTestFilePath( "dom_serializer", "dom_serializer/htmlentities_in_text.htm"); // Get file URL. The URL is dummy URL to identify the following loading // actions. The test content is in constant:original_contents. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Test contents. static const char* const original_contents = "&<>\"\'"; // Load the test contents. LoadContents(original_contents, file_url); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Get BODY's text content in DOM. WebLocalFrame* web_frame = FindSubFrameByURL(file_url); ASSERT_TRUE(web_frame != nullptr); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(doc.IsHTMLDocument()); WebElement body_ele = doc.Body(); ASSERT_TRUE(!body_ele.IsNull()); WebNode text_node = body_ele.FirstChild(); ASSERT_TRUE(text_node.IsTextNode()); ASSERT_EQ(text_node.NodeValue().Utf8(), "&<>\"\'"); // Do serialization. SerializeDomForURL(file_url); // Compare the serialized contents with original contents. ASSERT_TRUE(serialization_reported_end_of_data()); // Compare the serialized contents with original contents to make sure // they are same. // Because we add MOTW when serializing DOM, so before comparison, we also // need to add MOTW to original_contents. std::string original_str = WebFrameSerializer::GenerateMarkOfTheWebDeclaration(file_url).Utf8(); original_str += original_contents; // Since WebCore now inserts a new HEAD element if there is no HEAD element // when creating BODY element. (Please see // HTMLParser::bodyCreateErrorCheck.) We need to append the HEAD content and // corresponding META content if we find WebCore-generated HEAD element. if (!doc.Head().IsNull()) { WebString encoding = web_frame->GetDocument().Encoding(); std::string htmlTag(""); std::string::size_type pos = original_str.find(htmlTag); ASSERT_NE(std::string::npos, pos); pos += htmlTag.length(); std::string head_part(""); head_part += WebFrameSerializer::GenerateMetaCharsetDeclaration(encoding).Utf8(); head_part += ""; original_str.insert(pos, head_part); } ASSERT_EQ(original_str, serialized_contents()); })); } // Test situation of html entities in attribute value when serializing // HTML DOM. // This test started to fail at WebKit r65388. See http://crbug.com/52279. IN_PROC_BROWSER_TEST_F(MAYBE_DomSerializerTests, SerializeHTMLDOMWithEntitiesInAttributeValue) { // Need to spin up the renderer and also navigate to a file url so that the // renderer code doesn't attempt a fork when it sees a load to file scheme // from non-file scheme. EXPECT_TRUE(NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"))); base::FilePath page_file_path = GetTestFilePath( "dom_serializer", "dom_serializer/htmlentities_in_attribute_value.htm"); // Get file URL. The URL is dummy URL to identify the following loading // actions. The test content is in constant:original_contents. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Test contents. static const char* const original_contents = ""; // Load the test contents. LoadContents(original_contents, file_url); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Get value of BODY's title attribute in DOM. WebLocalFrame* web_frame = FindSubFrameByURL(file_url); ASSERT_TRUE(web_frame != nullptr); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(doc.IsHTMLDocument()); WebElement body_ele = doc.Body(); ASSERT_TRUE(!body_ele.IsNull()); WebString value = body_ele.GetAttribute("title"); ASSERT_EQ(value.Utf8(), "&<>\""); // Do serialization. SerializeDomForURL(file_url); // Compare the serialized contents with original contents. ASSERT_TRUE(serialization_reported_end_of_data()); // Compare the serialized contents with original contents to make sure // they are same. std::string original_str = WebFrameSerializer::GenerateMarkOfTheWebDeclaration(file_url).Utf8(); original_str += original_contents; if (!doc.IsNull()) { WebString encoding = web_frame->GetDocument().Encoding(); std::string htmlTag(""); std::string::size_type pos = original_str.find(htmlTag); ASSERT_NE(std::string::npos, pos); pos += htmlTag.length(); std::string head_part(""); head_part += WebFrameSerializer::GenerateMetaCharsetDeclaration(encoding).Utf8(); head_part += ""; original_str.insert(pos, head_part); } ASSERT_EQ(original_str, serialized_contents()); })); } // Test situation of non-standard HTML entities when serializing HTML DOM. // This test started to fail at WebKit r65351. See http://crbug.com/52279. IN_PROC_BROWSER_TEST_F(MAYBE_DomSerializerTests, SerializeHTMLDOMWithNonStandardEntities) { // Make a test file URL and load it. base::FilePath page_file_path = GetTestFilePath("dom_serializer", "nonstandard_htmlentities.htm"); GURL file_url = net::FilePathToFileURL(page_file_path); EXPECT_TRUE(NavigateToURL(shell(), file_url)); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Get value of BODY's title attribute in DOM. WebLocalFrame* web_frame = FindSubFrameByURL(file_url); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(doc.IsHTMLDocument()); WebElement body_element = doc.Body(); // Unescaped string for "%⊅¹'". static const wchar_t parsed_value[] = {'%', 0x2285, 0x00b9, '\'', 0}; WebString value = body_element.GetAttribute("title"); WebString content = blink::WebFrameContentDumper::DumpWebViewAsText( web_frame->View(), 1024); ASSERT_TRUE(base::UTF16ToWide(value.Utf16()) == parsed_value); ASSERT_TRUE(base::UTF16ToWide(content.Utf16()) == parsed_value); // Do serialization. SerializeDomForURL(file_url); // Check the serialized string. ASSERT_TRUE(serialization_reported_end_of_data()); // Confirm that the serialized string has no non-standard HTML entities. ASSERT_EQ(std::string::npos, serialized_contents().find("%")); ASSERT_EQ(std::string::npos, serialized_contents().find("⊅")); ASSERT_EQ(std::string::npos, serialized_contents().find("¹")); ASSERT_EQ(std::string::npos, serialized_contents().find("'")); })); } // Test situation of BASE tag in original document when serializing HTML DOM. // When serializing, we should comment the BASE tag, append a new BASE tag. // rewrite all the savable URLs to relative local path, and change other URLs // to absolute URLs. IN_PROC_BROWSER_TEST_F(MAYBE_DomSerializerTests, SerializeHTMLDOMWithBaseTag) { base::FilePath page_file_path = GetTestFilePath("dom_serializer", "html_doc_has_base_tag.htm"); // Get page dir URL which is base URL of this file. base::FilePath dir_name = page_file_path.DirName(); GURL path_dir_url = net::FilePathToFileURL(dir_name.AsEndingWithSeparator()); // Get file URL. GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test file. EXPECT_TRUE(NavigateToURL(shell(), file_url)); // There are total 2 available base tags in this test file. const int kTotalBaseTagCountInTestFile = 2; PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Since for this test, we assume there is no savable sub-resource links for // this test file, also all links are relative URLs in this test file, so we // need to check those relative URLs and make sure document has BASE tag. WebLocalFrame* web_frame = FindSubFrameByURL(file_url); ASSERT_TRUE(web_frame != nullptr); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(doc.IsHTMLDocument()); // Go through all descent nodes. WebElementCollection all = doc.All(); int original_base_tag_count = 0; for (WebElement element = all.FirstItem(); !element.IsNull(); element = all.NextItem()) { if (element.HasHTMLTagName("base")) { original_base_tag_count++; } else { // Get link. WebString value = blink::GetSubResourceLinkFromElementForTesting(element); if (value.IsNull() && element.HasHTMLTagName("a")) { value = element.GetAttribute("href"); if (value.IsEmpty()) value = WebString(); } // Each link is relative link. if (!value.IsNull()) { GURL link(value.Utf8()); ASSERT_TRUE(link.scheme().empty()); } } } ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile); // Make sure in original document, the base URL is not equal with the // |path_dir_url|. GURL original_base_url(doc.BaseURL()); ASSERT_NE(original_base_url, path_dir_url); // Do serialization. SerializeDomForURL(file_url); })); // Load the serialized contents. ASSERT_TRUE(serialization_reported_end_of_data()); LoadContents(serialized_contents(), file_url); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Make sure all links are absolute URLs and doc there are some number of // BASE tags in serialized HTML data. Each of those BASE tags have same base // URL which is as same as URL of current test file. WebLocalFrame* web_frame = GetMainFrame(); ASSERT_TRUE(web_frame != nullptr); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(doc.IsHTMLDocument()); // Go through all descent nodes. WebElementCollection all = doc.All(); int new_base_tag_count = 0; for (WebNode node = all.FirstItem(); !node.IsNull(); node = all.NextItem()) { if (!node.IsElementNode()) continue; WebElement element = node.To(); if (element.HasHTMLTagName("base")) { new_base_tag_count++; } else { // Get link. WebString value = blink::GetSubResourceLinkFromElementForTesting(element); if (value.IsNull() && element.HasHTMLTagName("a")) { value = element.GetAttribute("href"); if (value.IsEmpty()) value = WebString(); } // Each link is absolute link. if (!value.IsNull()) { GURL link(std::string(value.Utf8())); ASSERT_FALSE(link.scheme().empty()); } } } // We have one more added BASE tag which is generated by JavaScript. ASSERT_EQ(new_base_tag_count, kTotalBaseTagCountInTestFile + 1); // Make sure in new document, the base URL is equal with the |path_dir_url|. GURL new_base_url(doc.BaseURL()); ASSERT_EQ(new_base_url, path_dir_url); })); } // Serializing page which has an empty HEAD tag. IN_PROC_BROWSER_TEST_F(MAYBE_DomSerializerTests, SerializeHTMLDOMWithEmptyHead) { // Need to spin up the renderer and also navigate to a file url so that the // renderer code doesn't attempt a fork when it sees a load to file scheme // from non-file scheme. EXPECT_TRUE(NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"))); base::FilePath page_file_path = GetTestFilePath("dom_serializer", "empty_head.htm"); GURL file_url = net::FilePathToFileURL(page_file_path); ASSERT_TRUE(file_url.SchemeIsFile()); // Load the test html content. static const char* const empty_head_contents = "hello world"; LoadContents(empty_head_contents, file_url); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { // Make sure the head tag is empty. WebLocalFrame* web_frame = GetMainFrame(); ASSERT_TRUE(web_frame != nullptr); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(doc.IsHTMLDocument()); WebElement head_element = doc.Head(); ASSERT_TRUE(!head_element.IsNull()); ASSERT_TRUE(head_element.FirstChild().IsNull()); // Do serialization. SerializeDomForURL(file_url); ASSERT_TRUE(serialization_reported_end_of_data()); })); // Reload serialized contents and make sure there is only one META tag. LoadContents(serialized_contents(), file_url); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { WebLocalFrame* web_frame = GetMainFrame(); ASSERT_TRUE(web_frame != nullptr); WebDocument doc = web_frame->GetDocument(); ASSERT_TRUE(doc.IsHTMLDocument()); WebElement head_element = doc.Head(); ASSERT_TRUE(!head_element.IsNull()); ASSERT_TRUE(!head_element.FirstChild().IsNull()); ASSERT_TRUE(head_element.FirstChild().IsElementNode()); ASSERT_TRUE(head_element.FirstChild().NextSibling().IsNull()); WebMetaElement meta_element = head_element.FirstChild().To(); ASSERT_EQ(meta_element.ComputeEncoding(), web_frame->GetDocument().Encoding()); // Check the body's first node is text node and its contents are // "hello world" WebElement body_element = doc.Body(); ASSERT_TRUE(!body_element.IsNull()); WebNode text_node = body_element.FirstChild(); ASSERT_TRUE(text_node.IsTextNode()); ASSERT_EQ("hello world", text_node.NodeValue()); })); } IN_PROC_BROWSER_TEST_F(MAYBE_DomSerializerTests, SubResourceForElementsInNonHTMLNamespace) { base::FilePath page_file_path = GetTestFilePath("dom_serializer", "non_html_namespace.htm"); GURL file_url = net::FilePathToFileURL(page_file_path); EXPECT_TRUE(NavigateToURL(shell(), file_url)); PostTaskToInProcessRendererAndWait(base::BindLambdaForTesting([=] { WebLocalFrame* web_frame = FindSubFrameByURL(file_url); ASSERT_TRUE(web_frame != nullptr); WebDocument doc = web_frame->GetDocument(); WebNode lastNodeInBody = doc.Body().LastChild(); ASSERT_TRUE(lastNodeInBody.IsElementNode()); WebString uri = blink::GetSubResourceLinkFromElementForTesting( lastNodeInBody.To()); EXPECT_TRUE(uri.IsNull()); })); } } // namespace content