LinkProcessor.php
1 <?php
2 /**
3  * wCMF - wemove Content Management Framework
4  * Copyright (C) 2005-2015 wemove digital solutions GmbH
5  *
6  * Licensed under the terms of the MIT License.
7  *
8  * See the LICENSE file distributed with this work for
9  * additional information.
10  */
12 
20 
21 /**
22  * LinkProcessor is used to process links in Node instances. This is used to
23  * convert internal application links or relocating relative links when displaying
24  * the Node content on a website. LinkProcessor uses a LinkProcessorStrategy for
25  * application specific link checking and conversion.
26  *
27  * @author ingo herwig <ingo@wemove.com>
28  */
30 
31  /**
32  * Check and convert links in the given node.
33  * @param $node A reference to the node
34  * @param $base The base url of relative links as seen from the executing script
35  * @param $strategy The strategy used to check and create urls
36  * @param recursive Boolean whether to process child nodes to (default: true)
37  * @return Array of invalid urls
38  */
39  public static function processLinks($node, $base, LinkProcessorStrategy $strategy,
40  $recursive=true) {
41  if (!$node) {
42  return;
43  }
44  $invalidURLs = array();
45  $logger = LogManager::getLogger(__CLASS__);
46 
47  // iterate over all node values
48  $iter = new NodeValueIterator($node, $recursive);
49  for($iter->rewind(); $iter->valid(); $iter->next()) {
50 
51  $currentNode = $iter->currentNode();
52  $valueName = $iter->key();
53  $value = $currentNode->getValue($valueName);
54  $oldValue = $value;
55 
56  // find links in texts
57  $urls = array_fill_keys(StringUtil::getUrls($value), 'embedded');
58  // find direct attribute urls
59  if (preg_match('/^[a-zA-Z]+:\/\//', $value) || InternalLink::isLink($value)) {
60  $urls[$value] = 'direct';
61  }
62 
63  // process urls
64  foreach ($urls as $url => $type) {
65  // translate relative urls
66  if (!InternalLink::isLink($url) && !preg_match('/^#|^{|^$|^[a-zA-Z]+:\/\/|^javascript:|^mailto:/', $url) &&
67  @file_exists($url) === false) {
68  // translate relative links
69  $urlConv = URIUtil::translate($url, $base);
70  $value = self::replaceUrl($value, $url, $urlConv['absolute'], $type);
71  $url = $urlConv['absolute'];
72  }
73 
74  // check url
75  $urlOK = self::checkUrl($url, $strategy);
76  if ($urlOK) {
77  $urlConv = null;
78  if (InternalLink::isLink($url)) {
79  // convert internal urls
80  $urlConv = self::convertInternalLink($url, $strategy);
81  }
82  elseif (preg_match('/^#/', $url)) {
83  // convert hash links
84  $urlConv = $strategy->getObjectUrl($node).$url;
85  }
86  if ($urlConv !== null) {
87  $value = self::replaceUrl($value, $url, $urlConv, $type);
88  }
89  }
90  else {
91  // invalid url
92  $logger->error("Invalid URL found: ".$url);
93  $oidStr = $currentNode->getOID()->__toString();
94  if (!isset($invalidURLs[$oidStr])) {
95  $invalidURLs[] = array();
96  }
97  $invalidURLs[$oidStr][] = $url;
98  $value = self::replaceUrl($value, $url, '#', $type);
99  }
100  }
101  if ($oldValue != $value) {
102  $currentNode->setValue($valueName, $value, true);
103  }
104  }
105  return $invalidURLs;
106  }
107 
108  /**
109  * Replace the url in the given value
110  * @param $value
111  * @param $url
112  * @param $urlConv
113  * @param $type embedded or direct
114  * @return String
115  */
116  protected static function replaceUrl($value, $url, $urlConv, $type) {
117  if ($type == 'embedded') {
118  $value = str_replace('"'.$url.'"', '"'.$urlConv.'"', $value);
119  }
120  else {
121  $value = str_replace($url, $urlConv, $value);
122  }
123  return $value;
124  }
125 
126  /**
127  * Check if an url is reachable (e.g. if an internal url is broken due to the target set offline).
128  * @param $url The url to check
129  * @param $strategy The strategy used to check and create urls
130  * @return Boolean whether the url is valid or not
131  */
132  protected static function checkUrl($url, LinkProcessorStrategy $strategy) {
133  if (preg_match('/^#|^{|^$|^mailto:/', $url) || (strpos($url, 'javascript:') === 0 && !InternalLink::isLink($url)) ||
134  @file_exists($url) !== false) {
135  return true;
136  }
137 
138  if (InternalLink::isLink($url)) {
139  $oid = InternalLink::getReferencedOID($url);
140  $persistenceFacade = ObjectFactory::getInstance('persistenceFacade');
141  $logger = LogManager::getLogger(__CLASS__);
142  if ($oid != null) {
143  $type = $oid->getType();
144  $object = $persistenceFacade->load($oid);
145  if (!$object) {
146  $logger->error("Linked object ".$oid." does not exist");
147  return false;
148  }
149  else if (!$strategy->isValidTarget($object)) {
150  return false;
151  }
152  }
153  else {
154  $logger->error("Type of linked object ".$oid." is unknown");
155  return false;
156  }
157  }
158  else {
159  // ommit check for performance reasons
160  //return URIUtil::validateUrl($url);
161  return true;
162  }
163  return true;
164  }
165 
166  /**
167  * Convert an internal link.
168  * @param $url The url to convert
169  * @param $strategy The strategy used to check and create urls
170  * @return The converted url
171  */
172  protected static function convertInternalLink($url, LinkProcessorStrategy $strategy) {
173  $urlConv = $url;
174  if (InternalLink::isLink($url)) {
175  $oid = InternalLink::getReferencedOID($url);
176  if ($oid != null) {
177  $persistenceFacade = ObjectFactory::getInstance('persistenceFacade');
178  $object = $persistenceFacade->load($oid);
179  $urlConv = $strategy->getObjectUrl($object);
180  }
181  else {
182  $urlConv = '#';
183  }
184  $anchorOID = InternalLink::getAnchorOID($url);
185  if ($anchorOID != null) {
186  if (strrpos($urlConv) !== 0) {
187  $urlConv .= '#';
188  }
189  $urlConv .= $anchorOID;
190  }
191  }
192  return $urlConv;
193  }
194 }
195 ?>
static translate($rel_uri, $base)
Translate a relative URI from one location to the script location.
Definition: URIUtil.php:112
NodeValueIterator is used to iterate over all persistent values of a Node (not including relations)...
static getLogger($name)
Get the logger with the given name.
Definition: LogManager.php:35
static getInstance($name, $dynamicConfiguration=array())
static getUrls($string)
Extraxt urls from a string.
Definition: StringUtil.php:135