StringUtil.php
1 <?php
2 /**
3  * wCMF - wemove Content Management Framework
4  * Copyright (C) 2005-2015 wemove digital solutions GmbH
5  *
6  * Licensed under the terms of the MIT License.
7  *
8  * See the LICENSE file distributed with this work for
9  * additional information.
10  */
11 namespace wcmf\lib\util;
12 
13 /**
14  * StringUtil provides support for string manipulation.
15  *
16  * @author ingo herwig <ingo@wemove.com>
17  */
18 class StringUtil {
19 
20  /**
21  * Get the dump of a variable as string.
22  * @param $var The variable to dump.
23  * @return String
24  */
25  public static function getDump($var) {
26  ob_start();
27  var_dump($var);
28  $out = ob_get_contents();
29  ob_end_clean();
30  return $out;
31  }
32 
33  /**
34  * Truncate a string up to a number of characters while preserving whole words and HTML tags
35  * code based on: http://www.dzone.com/snippets/truncate-text-preserving-html
36  * @param $text String to truncate.
37  * @param $length Length of returned string, excluding suffix.
38  * @param $suffix Ending to be appended to the trimmed string.
39  * @param $isHTML If true, HTML tags would be handled correctly
40  * @return String
41  */
42  public static function cropString($text, $length=100, $suffix='…', $isHTML=true) {
43  $i = 0;
44  $simpleTags=array('br'=>true,'hr'=>true,'input'=>true,'image'=>true,'link'=>true,'meta'=>true);
45  $tags = array();
46  if($isHTML) {
47  preg_match_all('/<[^>]+>([^<]*)/', $text, $m, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
48  foreach($m as $o) {
49  if($o[0][1] - $i >= $length) {
50  break;
51  }
52  $t = substr(strtok($o[0][0], " \t\n\r\0\x0B>"), 1);
53  // test if the tag is unpaired, then we mustn't save them
54  if($t[0] != '/' && (!isset($simpleTags[$t]))) {
55  $tags[] = $t;
56  }
57  elseif(end($tags) == substr($t, 1)) {
58  array_pop($tags);
59  }
60  $i += $o[1][1] - $o[0][1];
61  }
62  }
63  // output without closing tags
64  $output = substr($text, 0, $length = min(strlen($text), $length + $i));
65  // closing tags
66  $output2 = (count($tags = array_reverse($tags)) ? '' : '');
67  // Find last space or HTML tag (solving problem with last space in HTML tag eg. )
68  $pos = @(int)end(end(preg_split('/<.*>| /', $output, -1, PREG_SPLIT_OFFSET_CAPTURE)));
69  // Append closing tags to output
70  $output.=$output2;
71  // Get everything until last space
72  $one = substr($output, 0, $pos);
73  // Get the rest
74  $two = substr($output, $pos, (strlen($output) - $pos));
75  // Extract all tags from the last bit
76  preg_match_all('/<(.*?)>/s', $two, $tags);
77  // Add suffix if needed
78  if (strlen($text) > $length) {
79  $one .= $suffix;
80  }
81  // Re-attach tags
82  $output = $one . implode($tags[0]);
83  // Added to remove unnecessary closure
84  $output = str_replace('','',$output);
85  return $output;
86  }
87 
88  /**
89  * Create an excerpt from the given text around the given phrase
90  * code based on: http://stackoverflow.com/questions/1292121/how-to-generate-the-snippet-like-generated-by-google-with-php-and-mysql
91  * @param $string
92  * @param $phrase
93  * @param $radius
94  */
95  public static function excerpt($string, $phrase, $radius = 100) {
96  if ($radius > strlen($string)) {
97  return $string;
98  }
99  $phraseLen = strlen($phrase);
100  if ($radius < $phraseLen) {
101  $radius = $phraseLen;
102  }
103  $pos = strpos(strtolower($string), strtolower($phrase));
104 
105  $startPos = 0;
106  if ($pos > $radius) {
107  $startPos = $pos - $radius;
108  }
109  $textLen = strlen($string);
110 
111  $endPos = $pos + $phraseLen + $radius;
112  if ($endPos >= $textLen) {
113  $endPos = $textLen;
114  }
115 
116  // make sure to cut at spaces
117  $firstSpacePos = strpos($string, " ", $startPos);
118  $lastSpacePos = strrpos($string, " ", -(strlen($string)-$endPos));
119 
120  $excerpt1 = substr($string, $firstSpacePos, $lastSpacePos-$firstSpacePos);
121 
122  // remove open tags
123  $excerpt = preg_replace('/^[^<]*?>|<[^>]*?$/', '', $excerpt1);
124  return $excerpt;
125  }
126 
127  /**
128  * Extraxt urls from a string.
129  * @param $string The string to search in
130  * @return An array with urls
131  * @note This method searches for occurences of <a..href="xxx"..>, <img..src="xxx"..>,
132  * <input..src="xxx"..>, <form..action="xxx"..>, <link..href="xxx"..>, <script..src="xxx"..>
133  * and extracts xxx.
134  */
135  public static function getUrls($string) {
136  preg_match_all("/<a[^>]+href=\"([^\">]+)/i", $string, $links);
137 
138  // find urls in javascript popup links
139  for ($i=0; $i<sizeof($links[1]); $i++) {
140  if (preg_match_all("/javascript:.*window.open[\(]*'([^']+)/i", $links[1][$i], $popups)) {
141  $links[1][$i] = $popups[1][0];
142  }
143  }
144  // remove mailto links
145  for ($i=0; $i<sizeof($links[1]); $i++) {
146  if (preg_match("/^mailto:/i", $links[1][$i])) {
147  unset($links[1][$i]);
148  }
149  }
150  preg_match_all("/<img[^>]+src=\"([^\">]+)/i", $string, $images);
151  preg_match_all("/<input[^>]+src=\"([^\">]+)/i", $string, $buttons);
152  preg_match_all("/<form[^>]+action=\"([^\">]+)/i", $string, $actions);
153  preg_match_all("/<link[^>]+href=\"([^\">]+)/i", $string, $css);
154  preg_match_all("/<script[^>]+src=\"([^\">]+)/i", $string, $scripts);
155  return array_merge($links[1], $images[1], $buttons[1], $actions[1], $css[1], $scripts[1]);
156  }
157 
158  /**
159  * Split a quoted string
160  * code from: http://php3.de/manual/de/function.split.php
161  * @code
162  * $string = '"hello, world", "say \"hello\"", 123, unquotedtext';
163  * $result = quotsplit($string);
164  *
165  * // results in:
166  * // ['hello, world'] [say "hello"] [123] [unquotedtext]
167  *
168  * @endcode
169  *
170  * @param $string The string to split
171  * @return An array of strings
172  */
173  public static function quotesplit($string) {
174  $r = Array();
175  $p = 0;
176  $l = strlen($string);
177  while ($p < $l) {
178  while (($p < $l) && (strpos(" \r\t\n",$string[$p]) !== false)) {
179  $p++;
180  }
181  if ($string[$p] == '"') {
182  $p++;
183  $q = $p;
184  while (($p < $l) && ($string[$p] != '"')) {
185  if ($string[$p] == '\\') {
186  $p+=2;
187  continue;
188  }
189  $p++;
190  }
191  $r[] = stripslashes(substr($string, $q, $p-$q));
192  $p++;
193  while (($p < $l) && (strpos(" \r\t\n",$string[$p]) !== false)) {
194  $p++;
195  }
196  $p++;
197  }
198  else if ($string[$p] == "'") {
199  $p++;
200  $q = $p;
201  while (($p < $l) && ($string[$p] != "'")) {
202  if ($string[$p] == '\\') {
203  $p+=2;
204  continue;
205  }
206  $p++;
207  }
208  $r[] = stripslashes(substr($string, $q, $p-$q));
209  $p++;
210  while (($p < $l) && (strpos(" \r\t\n",$string[$p]) !== false)) {
211  $p++;
212  }
213  $p++;
214  }
215  else {
216  $q = $p;
217  while (($p < $l) && (strpos(",;",$string[$p]) === false)) {
218  $p++;
219  }
220  $r[] = stripslashes(trim(substr($string, $q, $p-$q)));
221  while (($p < $l) && (strpos(" \r\t\n",$string[$p]) !== false)) {
222  $p++;
223  }
224  $p++;
225  }
226  }
227  return $r;
228  }
229 
230  /**
231  * Split string preserving quoted strings
232  * code based on: http://www.php.net/manual/en/function.explode.php#94024
233  * @param $string String to split
234  * @param $delim Regexp to use in preg_split
235  * @param $quoteChr Quote character
236  * @param $preserve Boolean whether to preserve the quote character or not
237  * @return Array
238  */
239  public static function splitQuoted($string, $delim='/ /', $quoteChr='"', $preserve=false){
240  $resArr = array();
241  $n = 0;
242  $expEncArr = explode($quoteChr, $string);
243  foreach($expEncArr as $encItem) {
244  if ($n++%2) {
245  $resArr[] = array_pop($resArr) . ($preserve?$quoteChr:'') . $encItem.($preserve?$quoteChr:'');
246  }
247  else {
248  $expDelArr = preg_split($delim, $encItem);
249  $resArr[] = array_pop($resArr) . array_shift($expDelArr);
250  $resArr = array_merge($resArr, $expDelArr);
251  }
252  }
253  return $resArr;
254  }
255 
256  /**
257  * Convert a string in underscore notation to camel case notation.
258  * Code from http://snipt.net/hongster/underscore-to-camelcase/
259  * @param $string The string to convert
260  * @param $firstLowerCase Boolean whether the first character should be lowercase or not (default: _false_)
261  * @return The converted string
262  */
263  public static function underScoreToCamelCase($string, $firstLowerCase=false) {
264  if (is_string($string)) {
265  $str = str_replace(' ', '', ucwords(str_replace('_', ' ', $string)));
266  if ($firstLowerCase) {
267  $str{0} = strtolower($str{0});
268  }
269  return $str;
270  }
271  else {
272  return '';
273  }
274  }
275 
276  /**
277  * Escape characters of a string for use in a regular expression
278  * Code from http://php.net/manual/de/function.preg-replace.php
279  * @param $string The string
280  * @return The escaped string
281  */
282  public static function escapeForRegex($string) {
283  $patterns = array('/\//', '/\^/', '/\./', '/\$/', '/\|/', '/\(/', '/\)/', '/\[/', '/\]/', '/\*/', '/\+/', '/\?/', '/\{/', '/\}/');
284  $replace = array('\/', '\^', '\.', '\$', '\|', '\(', '\)', '\[', '\]', '\*', '\+', '\?', '\{', '\}');
285 
286  return preg_replace($patterns, $replace, $string);
287  }
288 
289  /**
290  * Remove a trailing comma, if existing.
291  * @param $string The string to crop
292  * @return The string
293  */
294  public static function removeTrailingComma($string) {
295  return preg_replace('/, ?$/', '', $string);
296  }
297 
298  /**
299  * Get the boolean value of a string
300  * @param $string
301  * @return Boolean or the string, if it does not represent a boolean.
302  */
303  public static function getBoolean($string) {
304  $val = filter_var($string, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
305  if ($val === null) {
306  return $string;
307  }
308  return $val;
309  }
310 
311  /**
312  * Converts all accent characters to ASCII characters.
313  * Code from http://stackoverflow.com/questions/2103797/url-friendly-username-in-php/2103815#2103815
314  * @param $string Text that might have accent characters
315  * @return string Filtered string with replaced "nice" characters.
316  */
317  public static function slug($string) {
318  $search = array('Ä', 'Ö', 'Ü', 'ä', 'ö', 'ü', 'ß');
319  $replace = array('AE', 'OE', 'UE', 'ae', 'oe', 'ue', 'ss');
320  $string = str_replace($search, $replace, $string);
321  return strtolower(trim(preg_replace('~[^0-9a-z]+~i', '-',
322  html_entity_decode(preg_replace('~&([a-z]{1,2})(?:acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml);~i', '$1',
323  htmlentities($string, ENT_QUOTES, 'UTF-8')), ENT_QUOTES, 'UTF-8')), '-'));
324  }
325 }
326 ?>
static excerpt($string, $phrase, $radius=100)
Create an excerpt from the given text around the given phrase code based on: http://stackoverflow.com/questions/1292121/how-to-generate-the-snippet-like-generated-by-google-with-php-and-mysql.
Definition: StringUtil.php:95
static removeTrailingComma($string)
Remove a trailing comma, if existing.
Definition: StringUtil.php:294
Utility classes.
Definition: namespaces.php:97
StringUtil provides support for string manipulation.
Definition: StringUtil.php:18
static getBoolean($string)
Get the boolean value of a string.
Definition: StringUtil.php:303
static cropString($text, $length=100, $suffix='…', $isHTML=true)
Truncate a string up to a number of characters while preserving whole words and HTML tags code based ...
Definition: StringUtil.php:42
static splitQuoted($string, $delim='//', $quoteChr='"', $preserve=false)
Split string preserving quoted strings code based on: http://www.php.net/manual/en/function.explode.php#94024.
Definition: StringUtil.php:239
static escapeForRegex($string)
Escape characters of a string for use in a regular expression Code from http://php.net/manual/de/function.preg-replace.php.
Definition: StringUtil.php:282
static quotesplit($string)
Split a quoted string code from: http://php3.de/manual/de/function.split.php.
Definition: StringUtil.php:173
static getUrls($string)
Extraxt urls from a string.
Definition: StringUtil.php:135
static underScoreToCamelCase($string, $firstLowerCase=false)
Convert a string in underscore notation to camel case notation.
Definition: StringUtil.php:263
static slug($string)
Converts all accent characters to ASCII characters.
Definition: StringUtil.php:317
static getDump($var)
Get the dump of a variable as string.
Definition: StringUtil.php:25