StringUtil.php
1 <?php
2 /**
3  * wCMF - wemove Content Management Framework
4  * Copyright (C) 2005-2020 wemove digital solutions GmbH
5  *
6  * Licensed under the terms of the MIT License.
7  *
8  * See the LICENSE file distributed with this work for
9  * additional information.
10  */
11 namespace wcmf\lib\util;
12 
13 /**
14  * StringUtil provides support for string manipulation.
15  *
16  * @author ingo herwig <ingo@wemove.com>
17  */
18 class StringUtil {
19 
20  /**
21  * Get the dump of a variable as string.
22  * code from: https://www.leaseweb.com/labs/2013/10/smart-alternative-phps-var_dump-function/
23  * @param $variable Variable to dump.
24  * @param $strlen Max length of characters of each string to display (full length is shown)
25  * @param $width Max number of elements of an array to display (full length is shown)
26  * @param $depth Max number of levels of nested objects/array to display
27  * @return String
28  */
29  public static function getDump($variable, $strlen=100, $width=25, $depth=10, $i=0, &$objects = []) {
30  $search = ["\0", "\a", "\b", "\f", "\n", "\r", "\t", "\v"];
31  $replace = ['\0', '\a', '\b', '\f', '\n', '\r', '\t', '\v'];
32 
33  $string = '';
34 
35  switch (gettype($variable)) {
36  case 'boolean':
37  $string .= $variable ? 'true' : 'false';
38  break;
39  case 'integer':
40  $string .= $variable;
41  break;
42  case 'double':
43  $string .= $variable;
44  break;
45  case 'resource':
46  $string .= '[resource]';
47  break;
48  case 'NULL':
49  $string .= "null";
50  break;
51  case 'unknown type':
52  $string .= '???';
53  break;
54  case 'string':
55  $len = strlen($variable);
56  $variable = str_replace($search, $replace, substr($variable, 0, $strlen), $count);
57  $variable = substr($variable, 0, $strlen);
58  if ($len < $strlen) {
59  $string .= '"'.$variable.'"';
60  }
61  else {
62  $string .= 'string('.$len.'): "'.$variable.'"...';
63  }
64  break;
65  case 'array':
66  $len = count($variable);
67  if ($i == $depth) {
68  $string .= 'array('.$len.') {...}';
69  }
70  elseif (!$len) {
71  $string .= 'array(0) {}';
72  }
73  else {
74  $keys = array_keys($variable);
75  $spaces = str_repeat(' ', $i*2);
76  $string .= "array($len)\n".$spaces.'{';
77  $count=0;
78  foreach ($keys as $key) {
79  if ($count == $width) {
80  $string .= "\n".$spaces." ...";
81  break;
82  }
83  $string .= "\n".$spaces." [$key] => ";
84  $string .= self::getDump($variable[$key], $strlen, $width, $depth, $i+1, $objects);
85  $count++;
86  }
87  $string .="\n".$spaces.'}';
88  }
89  break;
90  case 'object':
91  $id = array_search($variable, $objects, true);
92  if ($id !== false) {
93  $string .= get_class($variable).'#'.($id+1).' {...}';
94  }
95  elseif ($i == $depth) {
96  $string .= get_class($variable).' {...}';
97  }
98  else {
99  $id = array_push($objects, $variable);
100  $array = (array)$variable;
101  $spaces = str_repeat(' ', $i*2);
102  $string .= get_class($variable)."#$id\n".$spaces.'{';
103  $properties = array_keys($array);
104  foreach ($properties as $property) {
105  $name = str_replace("\0", ':', trim($property));
106  $string .= "\n".$spaces." [$name] => ";
107  $string .= self::getDump($array[$property], $strlen, $width, $depth, $i+1, $objects);
108  }
109  $string .= "\n".$spaces.'}';
110  }
111  break;
112  }
113 
114  if ($i>0) {
115  return $string;
116  }
117  $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
118  do {
119  $caller = array_shift($backtrace);
120  }
121  while ($caller && !isset($caller['file']));
122  if ($caller) {
123  $string = $caller['file'].':'.$caller['line']."\n".$string;
124  }
125  return $string;
126  }
127 
128  /**
129  * Truncate a string up to a number of characters while preserving whole words and HTML tags.
130  * Based on https://stackoverflow.com/questions/16583676/shorten-text-without-splitting-words-or-breaking-html-tags#answer-16584383
131  * @param $text String to truncate.
132  * @param $length Length of returned string (optional, default: 100)
133  * @param $suffix Ending to be appended to the trimmed string (optional, default: …)
134  * @param $exact Boolean whether to allow to cut inside a word or not (optional, default: false)
135  * @return String
136  */
137  public static function cropString($text, $length=100, $suffix='…', $exact=false) {
138  if (strlen($text) <= $length) {
139  return $text;
140  }
141 
142  $isHtml = strip_tags($text) !== $text;
143 
144  $dom = new \DomDocument();
145  $dom->loadHTML(mb_convert_encoding($text, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
146 
147  $reachedLimit = false;
148  $totalLen = 0;
149  $toRemove = [];
150  $walk = function(\DomNode $node) use (&$reachedLimit, &$totalLen, &$toRemove, &$walk, $length, $suffix, $exact) {
151  if ($reachedLimit) {
152  $toRemove[] = $node;
153  }
154  else {
155  // only text nodes should have text,
156  // so do the splitting here
157  if ($node instanceof \DomText) {
158  $totalLen += $nodeLen = strlen($node->nodeValue);
159  if ($totalLen > $length) {
160  $spacePos = strpos($node->nodeValue, ' ', $nodeLen-($totalLen-$length)-1);
161  $node->nodeValue = $exact ? substr($node->nodeValue, 0, $nodeLen-($totalLen-$length)) : substr($node->nodeValue, 0, $spacePos);
162  // don't add suffix to empty node
163  $node->nodeValue .= (strlen($node->nodeValue) > 0 ? $suffix : '');
164  $reachedLimit = true;
165  }
166  }
167 
168  // if node has children, walk its child elements
169  if (isset($node->childNodes)) {
170  foreach ($node->childNodes as $child) {
171  $walk($child);
172  }
173  }
174  }
175  return $toRemove;
176  };
177 
178  // remove any nodes that exceed limit
179  $toRemove = $walk($dom);
180  foreach ($toRemove as $child) {
181  $child->parentNode->removeChild($child);
182  }
183 
184  $result = $dom->saveHTML();
185  return $isHtml ? $result : html_entity_decode(strip_tags($result));
186  }
187 
188  /**
189  * Create an excerpt from the given text around the given phrase
190  * code based on: http://stackoverflow.com/questions/1292121/how-to-generate-the-snippet-like-generated-by-google-with-php-and-mysql
191  * @param $string
192  * @param $phrase
193  * @param $radius
194  */
195  public static function excerpt($string, $phrase, $radius = 100) {
196  if ($radius > strlen($string)) {
197  return $string;
198  }
199  $phraseLen = strlen($phrase);
200  if ($radius < $phraseLen) {
201  $radius = $phraseLen;
202  }
203  $pos = strpos(strtolower($string), strtolower($phrase));
204 
205  $startPos = 0;
206  if ($pos > $radius) {
207  $startPos = $pos - $radius;
208  }
209  $textLen = strlen($string);
210 
211  $endPos = $pos + $phraseLen + $radius;
212  if ($endPos >= $textLen) {
213  $endPos = $textLen;
214  }
215 
216  // make sure to cut at spaces
217  $firstSpacePos = strpos($string, " ", $startPos);
218  $lastSpacePos = strrpos($string, " ", -(strlen($string)-$endPos));
219 
220  $excerpt1 = substr($string, $firstSpacePos, $lastSpacePos-$firstSpacePos);
221 
222  // remove open tags
223  $excerpt = preg_replace('/^[^<]*?>|<[^>]*?$/', '', $excerpt1);
224  return $excerpt;
225  }
226 
227  /**
228  * Extraxt urls from a string.
229  * @param $string The string to search in
230  * @return An array with urls
231  * @note This method searches for occurences of <a..href="xxx"..>, <img..src="xxx"..>, <video..src="xxx"..>,
232  * <audio..src="xxx"..>, <input..src="xxx"..>, <form..action="xxx"..>, <link..href="xxx"..>, <script..src="xxx"..>
233  * and extracts xxx.
234  */
235  public static function getUrls($string) {
236  preg_match_all("/<a[^>]+href=\"([^\">]+)/i", $string, $links);
237 
238  // find urls in javascript popup links
239  for ($i=0; $i<sizeof($links[1]); $i++) {
240  if (preg_match_all("/javascript:.*window.open[\(]*'([^']+)/i", $links[1][$i], $popups)) {
241  $links[1][$i] = $popups[1][0];
242  }
243  }
244  // remove mailto links
245  for ($i=0; $i<sizeof($links[1]); $i++) {
246  if (preg_match("/^mailto:/i", $links[1][$i])) {
247  unset($links[1][$i]);
248  }
249  }
250  preg_match_all("/<img[^>]+src=\"([^\">]+)/i", $string, $images);
251  preg_match_all("/<video[^>]+src=\"([^\">]+)/i", $string, $videos);
252  preg_match_all("/<audios[^>]+src=\"([^\">]+)/i", $string, $audios);
253  preg_match_all("/<input[^>]+src=\"([^\">]+)/i", $string, $buttons);
254  preg_match_all("/<form[^>]+action=\"([^\">]+)/i", $string, $actions);
255  preg_match_all("/<link[^>]+href=\"([^\">]+)/i", $string, $css);
256  preg_match_all("/<script[^>]+src=\"([^\">]+)/i", $string, $scripts);
257  return array_merge($links[1], $images[1], $videos[1], $audios[1], $buttons[1], $actions[1], $css[1], $scripts[1]);
258  }
259 
260  /**
261  * Split a quoted string
262  * code from: http://php3.de/manual/de/function.split.php
263  * @code
264  * $string = '"hello, world", "say \"hello\"", 123, unquotedtext';
265  * $result = quotsplit($string);
266  *
267  * // results in:
268  * // ['hello, world'] [say "hello"] [123] [unquotedtext]
269  *
270  * @endcode
271  *
272  * @param $string The string to split
273  * @return An array of strings
274  */
275  public static function quotesplit($string) {
276  $r = [];
277  $p = 0;
278  $l = strlen($string);
279  while ($p < $l) {
280  while (($p < $l) && (strpos(" \r\t\n", $string[$p]) !== false)) {
281  $p++;
282  }
283  if ($string[$p] == '"') {
284  $p++;
285  $q = $p;
286  while (($p < $l) && ($string[$p] != '"')) {
287  if ($string[$p] == '\\') {
288  $p+=2;
289  continue;
290  }
291  $p++;
292  }
293  $r[] = stripslashes(substr($string, $q, $p-$q));
294  $p++;
295  while (($p < $l) && (strpos(" \r\t\n", $string[$p]) !== false)) {
296  $p++;
297  }
298  $p++;
299  }
300  else if ($string[$p] == "'") {
301  $p++;
302  $q = $p;
303  while (($p < $l) && ($string[$p] != "'")) {
304  if ($string[$p] == '\\') {
305  $p+=2;
306  continue;
307  }
308  $p++;
309  }
310  $r[] = stripslashes(substr($string, $q, $p-$q));
311  $p++;
312  while (($p < $l) && (strpos(" \r\t\n", $string[$p]) !== false)) {
313  $p++;
314  }
315  $p++;
316  }
317  else {
318  $q = $p;
319  while (($p < $l) && (strpos(",;", $string[$p]) === false)) {
320  $p++;
321  }
322  $r[] = stripslashes(trim(substr($string, $q, $p-$q)));
323  while (($p < $l) && (strpos(" \r\t\n", $string[$p]) !== false)) {
324  $p++;
325  }
326  $p++;
327  }
328  }
329  return $r;
330  }
331 
332  /**
333  * Split string preserving quoted strings
334  * code based on: http://www.php.net/manual/en/function.explode.php#94024
335  * @param $string String to split
336  * @param $delim Regexp to use in preg_split
337  * @param $quoteChr Quote character
338  * @param $preserve Boolean whether to preserve the quote character or not
339  * @return Array
340  */
341  public static function splitQuoted($string, $delim='/ /', $quoteChr='"', $preserve=false){
342  $resArr = [];
343  $n = 0;
344  $expEncArr = explode($quoteChr, $string);
345  foreach($expEncArr as $encItem) {
346  if ($n++%2) {
347  $resArr[] = array_pop($resArr) . ($preserve?$quoteChr:'') . $encItem.($preserve?$quoteChr:'');
348  }
349  else {
350  $expDelArr = preg_split($delim, $encItem);
351  $resArr[] = array_pop($resArr) . array_shift($expDelArr);
352  $resArr = array_merge($resArr, $expDelArr);
353  }
354  }
355  return $resArr;
356  }
357 
358  /**
359  * Convert a string in underscore notation to camel case notation.
360  * Code from http://snipt.net/hongster/underscore-to-camelcase/
361  * @param $string The string to convert
362  * @param $firstLowerCase Boolean whether the first character should be lowercase or not (default: _false_)
363  * @return The converted string
364  */
365  public static function underScoreToCamelCase($string, $firstLowerCase=false) {
366  if (is_string($string)) {
367  $str = str_replace(' ', '', ucwords(str_replace('_', ' ', $string)));
368  if ($firstLowerCase) {
369  $str[0] = strtolower($str[0]);
370  }
371  return $str;
372  }
373  else {
374  return '';
375  }
376  }
377 
378  /**
379  * Escape characters of a string for use in a regular expression
380  * Code from http://php.net/manual/de/function.preg-replace.php
381  * @param $string The string
382  * @return The escaped string
383  */
384  public static function escapeForRegex($string) {
385  $patterns = ['/\//', '/\^/', '/\./', '/\$/', '/\|/', '/\(/', '/\)/', '/\[/', '/\]/', '/\*/', '/\+/', '/\?/', '/\{/', '/\}/'];
386  $replace = ['\/', '\^', '\.', '\$', '\|', '\(', '\)', '\[', '\]', '\*', '\+', '\?', '\{', '\}'];
387 
388  return preg_replace($patterns, $replace, $string);
389  }
390 
391  /**
392  * Remove a trailing comma, if existing.
393  * @param $string The string to crop
394  * @return The string
395  */
396  public static function removeTrailingComma($string) {
397  return preg_replace('/, ?$/', '', $string);
398  }
399 
400  /**
401  * Get the boolean value of a string
402  * @param $string
403  * @return Boolean or the string, if it does not represent a boolean.
404  */
405  public static function getBoolean($string) {
406  $val = filter_var($string, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
407  if ($val === null) {
408  return $string;
409  }
410  return $val;
411  }
412 
413  /**
414  * Converts all accent characters to ASCII characters.
415  * Code from http://stackoverflow.com/questions/2103797/url-friendly-username-in-php/2103815#2103815
416  * @param $string Text that might have accent characters
417  * @return string Filtered string with replaced "nice" characters.
418  */
419  public static function slug($string) {
420  $search = ['Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', 'ß'];
421  $replace = ['AE', 'OE', 'UE', 'ae', 'oe', 'ue', 'ss'];
422  $string = str_replace($search, $replace, $string);
423  return strtolower(trim(preg_replace('~[^0-9a-z]+~i', '-',
424  html_entity_decode(preg_replace('~&([a-z]{1,2})(?:acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml);~i', '$1',
425  htmlentities($string, ENT_QUOTES, 'UTF-8')), ENT_QUOTES, 'UTF-8')), '-'));
426  }
427 
428  /**
429  * Generate a v4 UUID
430  * Code from https://stackoverflow.com/questions/2040240/php-function-to-generate-v4-uuid#15875555
431  * @return string
432  */
433  public static function guidv4() {
434  $data = random_bytes(16);
435  $data[6] = chr(ord($data[6]) & 0x0f | 0x40); // set version to 0100
436  $data[8] = chr(ord($data[8]) & 0x3f | 0x80); // set bits 6-7 to 10
437 
438  return vsprintf('%s%s-%s-%s-%s-%s%s%s', str_split(bin2hex($data), 4));
439  }
440 }
441 ?>
static removeTrailingComma($string)
Remove a trailing comma, if existing.
Definition: StringUtil.php:396
static getDump($variable, $strlen=100, $width=25, $depth=10, $i=0, &$objects=[])
Get the dump of a variable as string.
Definition: StringUtil.php:29
static splitQuoted($string, $delim='//', $quoteChr='"', $preserve=false)
Split string preserving quoted strings code based on: http://www.php.net/manual/en/function....
Definition: StringUtil.php:341
static getBoolean($string)
Get the boolean value of a string.
Definition: StringUtil.php:405
static underScoreToCamelCase($string, $firstLowerCase=false)
Convert a string in underscore notation to camel case notation.
Definition: StringUtil.php:365
StringUtil provides support for string manipulation.
Definition: StringUtil.php:18
static excerpt($string, $phrase, $radius=100)
Create an excerpt from the given text around the given phrase code based on: http://stackoverflow....
Definition: StringUtil.php:195
static slug($string)
Converts all accent characters to ASCII characters.
Definition: StringUtil.php:419
static escapeForRegex($string)
Escape characters of a string for use in a regular expression Code from http://php....
Definition: StringUtil.php:384
Utility classes.
Definition: namespaces.php:97
static guidv4()
Generate a v4 UUID Code from https://stackoverflow.com/questions/2040240/php-function-to-generate-v4-...
Definition: StringUtil.php:433
static quotesplit($string)
Split a quoted string code from: http://php3.de/manual/de/function.split.php.
Definition: StringUtil.php:275
static getUrls($string)
Extraxt urls from a string.
Definition: StringUtil.php:235
static cropString($text, $length=100, $suffix='…', $exact=false)
Truncate a string up to a number of characters while preserving whole words and HTML tags.
Definition: StringUtil.php:137