LinkProcessor.php
1 <?php
2 /**
3  * wCMF - wemove Content Management Framework
4  * Copyright (C) 2005-2020 wemove digital solutions GmbH
5  *
6  * Licensed under the terms of the MIT License.
7  *
8  * See the LICENSE file distributed with this work for
9  * additional information.
10  */
12 
20 
21 /**
22  * LinkProcessor is used to process links in Node instances. This is used to
23  * convert internal application links or relocating relative links when displaying
24  * the Node content on a website. LinkProcessor uses a LinkProcessorStrategy for
25  * application specific link checking and conversion.
26  *
27  * @author ingo herwig <ingo@wemove.com>
28  */
30 
31  /**
32  * Check and convert links in the given node.
33  * @param $node Node instance
34  * @param $base The base url of relative links as seen from the executing script
35  * @param $strategy The strategy used to check and create urls
36  * @param recursive Boolean whether to process child nodes to (default: true)
37  * @return Array of invalid urls
38  */
39  public static function processLinks($node, $base, LinkProcessorStrategy $strategy,
40  $recursive=true) {
41  if (!$node) {
42  return;
43  }
44  $invalidURLs = [];
45  $logger = LogManager::getLogger(__CLASS__);
46 
47  // iterate over all node values
48  $iter = new NodeValueIterator($node, $recursive);
49  for($iter->rewind(); $iter->valid(); $iter->next()) {
50 
51  $currentNode = $iter->currentNode();
52  $valueName = $iter->key();
53  $value = $currentNode->getValue($valueName);
54  $oldValue = $value;
55 
56  // find links in texts
57  $urls = array_fill_keys(StringUtil::getUrls($value), 'embedded');
58  // find direct attribute urls
59  if (preg_match('/^[a-zA-Z]+:\/\//', $value) || InternalLink::isLink($value)) {
60  $urls[$value] = 'direct';
61  }
62 
63  // process urls
64  foreach ($urls as $url => $type) {
65  // translate relative urls
66  if (!InternalLink::isLink($url) && !preg_match('/^#|^{|^$|^[a-zA-Z]+:\/\/|^javascript:|^mailto:/', $url) &&
67  @file_exists($url) === false) {
68  // translate relative links
69  $urlConv = URIUtil::translate($url, $base);
70  $value = self::replaceUrl($value, $url, $urlConv['absolute'], $type);
71  $url = $urlConv['absolute'];
72  }
73 
74  // check url
75  $urlOK = self::checkUrl($url, $strategy);
76  if ($urlOK) {
77  $urlConv = null;
78  if (InternalLink::isLink($url)) {
79  // convert internal urls
80  $urlConv = self::convertInternalLink($url, $strategy);
81  }
82  elseif (preg_match('/^#/', $url)) {
83  // convert hash links
84  $urlConv = $strategy->getObjectUrl($node).$url;
85  }
86  if ($urlConv !== null) {
87  $value = self::replaceUrl($value, $url, $urlConv, $type);
88  }
89  }
90  else {
91  // invalid url
92  $logger->error("Invalid URL found: ".$url);
93  $oidStr = $currentNode->getOID()->__toString();
94  if (!isset($invalidURLs[$oidStr])) {
95  $invalidURLs[] = [];
96  }
97  $invalidURLs[$oidStr][] = $url;
98  $value = self::replaceUrl($value, $url, '#', $type);
99  }
100  }
101  if ($oldValue != $value) {
102  $currentNode->setValue($valueName, $value, true);
103  }
104  }
105  return $invalidURLs;
106  }
107 
108  /**
109  * Replace the url in the given value
110  * @param $value
111  * @param $url
112  * @param $urlConv
113  * @param $type embedded or direct
114  * @return String
115  */
116  protected static function replaceUrl($value, $url, $urlConv, $type) {
117  if ($type == 'embedded') {
118  $value = str_replace('"'.$url.'"', '"'.$urlConv.'"', $value);
119  }
120  else {
121  $value = str_replace($url, $urlConv, $value);
122  }
123  return $value;
124  }
125 
126  /**
127  * Check if an url is reachable (e.g. if an internal url is broken due to the target set offline).
128  * @param $url The url to check
129  * @param $strategy The strategy used to check and create urls
130  * @return Boolean whether the url is valid or not
131  */
132  protected static function checkUrl($url, LinkProcessorStrategy $strategy) {
133  if (preg_match('/^#|^{|^$|^mailto:/', $url) || (strpos($url, 'javascript:') === 0 && !InternalLink::isLink($url)) ||
134  @file_exists($url) !== false) {
135  return true;
136  }
137 
138  if (InternalLink::isLink($url)) {
139  $oid = InternalLink::getReferencedOID($url);
140  $persistenceFacade = ObjectFactory::getInstance('persistenceFacade');
141  $logger = LogManager::getLogger(__CLASS__);
142  if ($oid != null) {
143  $object = $persistenceFacade->load($oid);
144  if (!$object) {
145  $logger->error("Linked object ".$oid." does not exist");
146  return false;
147  }
148  else if (!$strategy->isValidTarget($object)) {
149  return false;
150  }
151  }
152  else {
153  $logger->error("Type of linked object ".$oid." is unknown");
154  return false;
155  }
156  }
157  else {
158  // ommit check for performance reasons
159  //return URIUtil::validateUrl($url);
160  return true;
161  }
162  return true;
163  }
164 
165  /**
166  * Convert an internal link.
167  * @param $url The url to convert
168  * @param $strategy The strategy used to check and create urls
169  * @return The converted url
170  */
171  protected static function convertInternalLink($url, LinkProcessorStrategy $strategy) {
172  $urlConv = $url;
173  if (InternalLink::isLink($url)) {
174  $oid = InternalLink::getReferencedOID($url);
175  if ($oid != null) {
176  $persistenceFacade = ObjectFactory::getInstance('persistenceFacade');
177  $object = $persistenceFacade->load($oid);
178  $urlConv = $strategy->getObjectUrl($object);
179  }
180  else {
181  $urlConv = '#';
182  }
183  $anchorOID = InternalLink::getAnchorOID($url);
184  if ($anchorOID != null) {
185  if (strrpos($urlConv) !== 0) {
186  $urlConv .= '#';
187  }
188  $urlConv .= $anchorOID;
189  }
190  else {
191  $anchorName = InternalLink::getAnchorName($url);
192  if ($anchorName != null) {
193  if (strrpos($urlConv) !== 0) {
194  $urlConv .= '#';
195  }
196  $urlConv .= $anchorName;
197  }
198  }
199  }
200  return $urlConv;
201  }
202 }
203 ?>
NodeValueIterator is used to iterate over all persistent values of a Node (not including relations).
StringUtil provides support for string manipulation.
Definition: StringUtil.php:18
static translate($pathFromA, $pathFromScriptToA)
Translate a relative URI from one location to the script location.
Definition: URIUtil.php:111
URIUtil provides support for uri manipulation.
Definition: URIUtil.php:18
static getLogger($name)
Get the logger with the given name.
Definition: LogManager.php:37
static getInstance($name, $dynamicConfiguration=[])
LogManager is used to retrieve Logger instances.
Definition: LogManager.php:20
ObjectFactory implements the service locator pattern by wrapping a Factory instance and providing sta...
static getUrls($string)
Extraxt urls from a string.
Definition: StringUtil.php:235