DOMUtils.php
1 <?php
2 /**
3  * wCMF - wemove Content Management Framework
4  * Copyright (C) 2005-2020 wemove digital solutions GmbH
5  *
6  * Licensed under the terms of the MIT License.
7  *
8  * See the LICENSE file distributed with this work for
9  * additional information.
10  */
11 namespace wcmf\lib\util;
12 
13 /**
14  * DomUtils
15  *
16  * @author ingo herwig <ingo@wemove.com>
17  */
18 class DOMUtils {
19  /**
20  * Process the given html fragment using the given function
21  * @param $content Html string
22  * @param $processor Function that accepts a DOMDocument as only parameter
23  * @return String
24  */
25  public static function processHtml($content, callable $processor) {
26  $doc = new \DOMDocument();
27  $doc->loadHTML('<html>'.trim(mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8')).'</html>', LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
28  $processor($doc);
29  return trim(str_replace(['<html>', '</html>'], '', $doc->saveHTML()));
30  }
31 
32  /**
33  * Get the child nodes of a given element name
34  * @param \DOMElement $element
35  * @param $elementName
36  * @return \DOMNodeList[]
37  */
38  public static function getChildNodesOfName(\DOMElement $element, $elementName) {
39  $result = [];
40  foreach ($element->childNodes as $child) {
41  if ($child->nodeName == $elementName) {
42  $result[] = $child;
43  }
44  }
45  return $result;
46  }
47 
48  /**
49  * Get the next sibling of the given element type
50  * @param $element Reference element
51  * @param $elementType Element type (e.g. XML_ELEMENT_NODE)
52  * @return \DomElement
53  */
54  public static function getNextSiblingOfType(\DOMElement $element, $elementType) {
55  $nextSibling = $element->nextSibling;
56  while ($nextSibling && $nextSibling->nodeType != $elementType) {
57  $nextSibling = $nextSibling->nextSibling;
58  }
59  return $nextSibling;
60  }
61 
62  /**
63  * Get the inner html string of an element
64  * @param \DOMElement $element
65  * @return String
66  */
67  public static function getInnerHtml(\DOMElement $element) {
68  $innerHTML= '';
69  $children = $element->childNodes;
70  foreach ($children as $child) {
71  $innerHTML .= $child->ownerDocument->saveXML( $child );
72  }
73  return $innerHTML;
74  }
75 
76  /**
77  * Set the inner html string of an element
78  * @param \DOMElement $element
79  * @param $html
80  */
81  public static function setInnerHtml(\DOMElement $element, $html) {
82  $doc = new \DOMDocument();
83  $doc->loadHTML('<body>'.trim(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')).'</div>', LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
84  $contentNode = $doc->getElementsByTagName('body')->item(0);
85  $contentNode = $element->ownerDocument->importNode($contentNode, true);
86  $oldChildren = $element->childNodes;
87  foreach ($oldChildren as $child) {
88  $element->removeChild($child);
89  }
90  $newChildren = $contentNode->childNodes;
91  foreach ($newChildren as $child) {
92  $element->appendChild($child->cloneNode(true));
93  }
94  }
95 
96 
97  /**
98  * Remove double linebreaks and empty paragraphs
99  * @param $content
100  * @return String
101  */
102  public static function removeEmptyLines($html) {
103  // merge multiple linebreaks to one
104  $html = preg_replace("/(<br>\s*)+/", "<br>", $html);
105  // remove linebreaks at the beginning of a paragraph
106  $html = preg_replace("/<p>(\s|<br>)*/", "<p>", $html);
107  // remove linebreaks at the end of a paragraph
108  $html = preg_replace("/(\s|<br>)*<\/p>/", "</p>", $html);
109  // remove empty paragraphs
110  $html = preg_replace("/<p><\/p>/", "", $html);
111  return $html;
112  }
113 }
114 ?>
static setInnerHtml(\DOMElement $element, $html)
Set the inner html string of an element.
Definition: DOMUtils.php:81
static getNextSiblingOfType(\DOMElement $element, $elementType)
Get the next sibling of the given element type.
Definition: DOMUtils.php:54
static processHtml($content, callable $processor)
Process the given html fragment using the given function.
Definition: DOMUtils.php:25
static removeEmptyLines($html)
Remove double linebreaks and empty paragraphs.
Definition: DOMUtils.php:102
Utility classes.
Definition: namespaces.php:97
static getInnerHtml(\DOMElement $element)
Get the inner html string of an element.
Definition: DOMUtils.php:67
static getChildNodesOfName(\DOMElement $element, $elementName)
Get the child nodes of a given element name.
Definition: DOMUtils.php:38