1: <?php
2: /**
3: * Utility Class for Horde_Ldap
4: *
5: * This class servers some functionality to the other classes of Horde_Ldap but
6: * most of the methods can be used separately as well.
7: *
8: * Copyright 2009 Benedikt Hallinger
9: * Copyright 2010-2012 Horde LLC (http://www.horde.org/)
10: *
11: * @category Horde
12: * @package Ldap
13: * @author Benedikt Hallinger <beni@php.net>
14: * @author Jan Schneider <jan@horde.org>
15: * @license http://www.gnu.org/licenses/lgpl-3.0.html LGPL-3.0
16: */
17: class Horde_Ldap_Util
18: {
19: /**
20: * Explodes the given DN into its elements
21: *
22: * {@link http://www.ietf.org/rfc/rfc2253.txt RFC 2253} says, a
23: * Distinguished Name is a sequence of Relative Distinguished Names (RDNs),
24: * which themselves are sets of Attributes. For each RDN a array is
25: * constructed where the RDN part is stored.
26: *
27: * For example, the DN 'OU=Sales+CN=J. Smith,DC=example,DC=net' is exploded
28: * to:
29: * <code>
30: * array(array('OU=Sales', 'CN=J. Smith'),
31: * 'DC=example',
32: * 'DC=net')
33: * </code>
34: *
35: * [NOT IMPLEMENTED] DNs might also contain values, which are the bytes of
36: * the BER encoding of the X.500 AttributeValue rather than some LDAP
37: * string syntax. These values are hex-encoded and prefixed with a #. To
38: * distinguish such BER values, explodeDN uses references to the
39: * actual values, e.g. '1.3.6.1.4.1.1466.0=#04024869,DC=example,DC=com' is
40: * exploded to:
41: * <code>
42: * array(array('1.3.6.1.4.1.1466.0' => "\004\002Hi"),
43: * array('DC' => 'example',
44: * array('DC' => 'com'))
45: * <code>
46: * See {@link http://www.vijaymukhi.com/vmis/berldap.htm} for more
47: * information on BER.
48: *
49: * It also performs the following operations on the given DN:
50: * - Unescape "\" followed by ",", "+", """, "\", "<", ">", ";", "#", "=",
51: * " ", or a hexpair and strings beginning with "#".
52: * - Removes the leading 'OID.' characters if the type is an OID instead of
53: * a name.
54: * - If an RDN contains multiple parts, the parts are re-ordered so that
55: * the attribute type names are in alphabetical order.
56: *
57: * $options is a list of name/value pairs, valid options are:
58: * - casefold: Controls case folding of attribute types names.
59: * Attribute values are not affected by this option.
60: * The default is to uppercase. Valid values are:
61: * - lower: Lowercase attribute types names.
62: * - upper: Uppercase attribute type names. This is the
63: * default.
64: * - none: Do not change attribute type names.
65: * - reverse: If true, the RDN sequence is reversed.
66: * - onlyvalues: If true, then only attributes values are returned ('foo'
67: * instead of 'cn=foo')
68: *
69: * @todo implement BER
70: * @todo replace preg_replace() callbacks.
71: *
72: * @param string $dn The DN that should be exploded.
73: * @param array $options Options to use.
74: *
75: * @return array Parts of the exploded DN.
76: */
77: public static function explodeDN($dn, array $options = array())
78: {
79: if (!isset($options['onlyvalues'])) {
80: $options['onlyvalues'] = false;
81: }
82: if (!isset($options['reverse'])) {
83: $options['reverse'] = false;
84: }
85: if (!isset($options['casefold'])) {
86: $options['casefold'] = 'upper';
87: }
88:
89: // Escaping of DN and stripping of "OID.".
90: $dn = self::canonicalDN($dn, array('casefold' => $options['casefold']));
91:
92: // Splitting the DN.
93: $dn_array = preg_split('/(?<!\\\\),/', $dn);
94:
95: // Clear wrong splitting (possibly we have split too much).
96: // Not clear, if this is neccessary here:
97: //$dn_array = self::_correctDNSplitting($dn_array, ',');
98:
99: // Construct subarrays for multivalued RDNs and unescape DN value, also
100: // convert to output format and apply casefolding.
101: foreach ($dn_array as $key => $value) {
102: $value_u = self::unescapeDNValue($value);
103: $rdns = self::splitRDNMultivalue($value_u[0]);
104: // TODO: nuke code duplication
105: if (count($rdns) > 1) {
106: // Multivalued RDN!
107: foreach ($rdns as $subrdn_k => $subrdn_v) {
108: // Casefolding.
109: if ($options['casefold'] == 'upper') {
110: $subrdn_v = preg_replace('/^(\w+=)/e', "Horde_String::upper('\\1')", $subrdn_v);
111: }
112: if ($options['casefold'] == 'lower') {
113: $subrdn_v = preg_replace('/^(\w+=)/e', "Horde_String::lower('\\1')", $subrdn_v);
114: }
115:
116: if ($options['onlyvalues']) {
117: preg_match('/(.+?)(?<!\\\\)=(.+)/', $subrdn_v, $matches);
118: $rdn_val = $matches[2];
119: $unescaped = self::unescapeDNValue($rdn_val);
120: $rdns[$subrdn_k] = $unescaped[0];
121: } else {
122: $unescaped = self::unescapeDNValue($subrdn_v);
123: $rdns[$subrdn_k] = $unescaped[0];
124: }
125: }
126:
127: $dn_array[$key] = $rdns;
128: } else {
129: // Singlevalued RDN.
130: // Casefolding.
131: if ($options['casefold'] == 'upper') {
132: $value = preg_replace('/^(\w+=)/e', "Horde_String::upper('\\1')", $value);
133: }
134: if ($options['casefold'] == 'lower') {
135: $value = preg_replace('/^(\w+=)/e', "Horde_String::lower('\\1')", $value);
136: }
137:
138: if ($options['onlyvalues']) {
139: preg_match('/(.+?)(?<!\\\\)=(.+)/', $value, $matches);
140: $dn_val = $matches[2];
141: $unescaped = self::unescapeDNValue($dn_val);
142: $dn_array[$key] = $unescaped[0];
143: } else {
144: $unescaped = self::unescapeDNValue($value);
145: $dn_array[$key] = $unescaped[0];
146: }
147: }
148: }
149:
150: if ($options['reverse']) {
151: return array_reverse($dn_array);
152: }
153:
154: return $dn_array;
155: }
156:
157: /**
158: * Escapes DN values according to RFC 2253.
159: *
160: * Escapes the given VALUES according to RFC 2253 so that they can be
161: * safely used in LDAP DNs. The characters ",", "+", """, "\", "<", ">",
162: * ";", "#", "=" with a special meaning in RFC 2252 are preceeded by ba
163: * backslash. Control characters with an ASCII code < 32 are represented as
164: * \hexpair. Finally all leading and trailing spaces are converted to
165: * sequences of \20.
166: *
167: * @param string|array $values DN values that should be escaped.
168: *
169: * @return array The escaped values.
170: */
171: public static function escapeDNValue($values)
172: {
173: // Parameter validation.
174: if (!is_array($values)) {
175: $values = array($values);
176: }
177:
178: foreach ($values as $key => $val) {
179: // Escaping of filter meta characters.
180: $val = addcslashes($val, '\\,+"<>;#=');
181:
182: // ASCII < 32 escaping.
183: $val = self::asc2hex32($val);
184:
185: // Convert all leading and trailing spaces to sequences of \20.
186: if (preg_match('/^(\s*)(.+?)(\s*)$/', $val, $matches)) {
187: $val = str_repeat('\20', strlen($matches[1])) . $matches[2] . str_repeat('\20', strlen($matches[3]));
188: }
189:
190: if (null === $val) {
191: // Apply escaped "null" if string is empty.
192: $val = '\0';
193: }
194:
195: $values[$key] = $val;
196: }
197:
198: return $values;
199: }
200:
201: /**
202: * Unescapes DN values according to RFC 2253.
203: *
204: * Reverts the conversion done by escapeDNValue().
205: *
206: * Any escape sequence starting with a baskslash - hexpair or special
207: * character - will be transformed back to the corresponding character.
208: *
209: * @param array $values DN values.
210: *
211: * @return array Unescaped DN values.
212: */
213: public static function unescapeDNValue($values)
214: {
215: // Parameter validation.
216: if (!is_array($values)) {
217: $values = array($values);
218: }
219:
220: foreach ($values as $key => $val) {
221: // Strip slashes from special chars.
222: $val = str_replace(
223: array('\\\\', '\,', '\+', '\"', '\<', '\>', '\;', '\#', '\='),
224: array('\\', ',', '+', '"', '<', '>', ';', '#', '='),
225: $val);
226:
227: // Translate hex code into ascii.
228: $values[$key] = self::hex2asc($val);
229: }
230:
231: return $values;
232: }
233:
234: /**
235: * Converts a DN into a canonical form.
236: *
237: * DN can either be a string or an array as returned by explodeDN(),
238: * which is useful when constructing a DN. The DN array may have be
239: * indexed (each array value is a OCL=VALUE pair) or associative (array key
240: * is OCL and value is VALUE).
241: *
242: * It performs the following operations on the given DN:
243: * - Removes the leading 'OID.' characters if the type is an OID instead of
244: * a name.
245: * - Escapes all RFC 2253 special characters (",", "+", """, "\", "<", ">",
246: * ";", "#", "="), slashes ("/"), and any other character where the ASCII
247: * code is < 32 as \hexpair.
248: * - Converts all leading and trailing spaces in values to be \20.
249: * - If an RDN contains multiple parts, the parts are re-ordered so that
250: * the attribute type names are in alphabetical order.
251: *
252: * $options is a list of name/value pairs, valid options are:
253: *
254: * - casefold: Controls case folding of attribute type names. Attribute
255: * values are not affected by this option. The default is to
256: * uppercase. Valid values are:
257: * - lower: Lowercase attribute type names.
258: * - upper: Uppercase attribute type names.
259: * - none: Do not change attribute type names.
260: * - reverse: If true, the RDN sequence is reversed.
261: * - separator: Separator to use between RDNs. Defaults to comma (',').
262: *
263: * The empty string "" is a valid DN, so be sure not to do a "$can_dn ==
264: * false" test, because an empty string evaluates to false. Use the "==="
265: * operator instead.
266: *
267: * @param array|string $dn The DN.
268: * @param array $options Options to use.
269: *
270: * @return boolean|string The canonical DN or false if the DN is not valid.
271: */
272: public static function canonicalDN($dn, $options = array())
273: {
274: if ($dn === '') {
275: // Empty DN is valid.
276: return $dn;
277: }
278:
279: // Options check.
280: $options['reverse'] = !empty($options['reverse']);
281: if (!isset($options['casefold'])) {
282: $options['casefold'] = 'upper';
283: }
284: if (!isset($options['separator'])) {
285: $options['separator'] = ',';
286: }
287:
288: if (!is_array($dn)) {
289: // It is not clear to me if the perl implementation splits by the
290: // user defined separator or if it just uses this separator to
291: // construct the new DN.
292: $dn = preg_split('/(?<!\\\\)' . $options['separator'] . '/', $dn);
293:
294: // Clear wrong splitting (possibly we have split too much).
295: $dn = self::_correctDNSplitting($dn, $options['separator']);
296: } else {
297: // Is array, check if the array is indexed or associative.
298: $assoc = false;
299: foreach ($dn as $dn_key => $dn_part) {
300: if (!is_int($dn_key)) {
301: $assoc = true;
302: break;
303: }
304: }
305:
306: // Convert to indexed, if associative array detected.
307: if ($assoc) {
308: $newdn = array();
309: foreach ($dn as $dn_key => $dn_part) {
310: if (is_array($dn_part)) {
311: // We assume here that the RDN parts are also
312: // associative.
313: ksort($dn_part, SORT_STRING);
314: // Copy array as-is, so we can resolve it later.
315: $newdn[] = $dn_part;
316: } else {
317: $newdn[] = $dn_key . '=' . $dn_part;
318: }
319: }
320: $dn =& $newdn;
321: }
322: }
323:
324: // Escaping and casefolding.
325: foreach ($dn as $pos => $dnval) {
326: if (is_array($dnval)) {
327: // Subarray detected, this means most probably that we had a
328: // multivalued DN part, which must be resolved.
329: $dnval_new = '';
330: foreach ($dnval as $subkey => $subval) {
331: // Build RDN part.
332: if (!is_int($subkey)) {
333: $subval = $subkey . '=' . $subval;
334: }
335: $subval_processed = self::canonicalDN($subval);
336: if (false === $subval_processed) {
337: return false;
338: }
339: $dnval_new .= $subval_processed . '+';
340: }
341: // Store RDN part, strip last plus.
342: $dn[$pos] = substr($dnval_new, 0, -1);
343: } else {
344: // Try to split multivalued RDNs into array.
345: $rdns = self::splitRDNMultivalue($dnval);
346: if (count($rdns) > 1) {
347: // Multivalued RDN was detected. The RDN value is expected
348: // to be correctly split by splitRDNMultivalue(). It's time
349: // to sort the RDN and build the DN.
350: $rdn_string = '';
351: // Sort RDN keys alphabetically.
352: sort($rdns, SORT_STRING);
353: foreach ($rdns as $rdn) {
354: $subval_processed = self::canonicalDN($rdn);
355: if (false === $subval_processed) {
356: return false;
357: }
358: $rdn_string .= $subval_processed . '+';
359: }
360:
361: // Store RDN part, strip last plus.
362: $dn[$pos] = substr($rdn_string, 0, -1);
363: } else {
364: // No multivalued RDN. Split at first unescaped "=".
365: $dn_comp = self::splitAttributeString($rdns[0]);
366: if (count($dn_comp) != 2) {
367: throw new Horde_Ldap_Exception('Invalid RDN: ' . $rdns[0]);
368: }
369: // Trim left whitespaces because of "cn=foo, l=bar" syntax
370: // (whitespace after comma).
371: $ocl = ltrim($dn_comp[0]);
372: $val = $dn_comp[1];
373:
374: // Strip 'OID.', otherwise apply casefolding and escaping.
375: if (substr(Horde_String::lower($ocl), 0, 4) == 'oid.') {
376: $ocl = substr($ocl, 4);
377: } else {
378: if ($options['casefold'] == 'upper') {
379: $ocl = Horde_String::upper($ocl);
380: }
381: if ($options['casefold'] == 'lower') {
382: $ocl = Horde_String::lower($ocl);
383: }
384: $ocl = self::escapeDNValue(array($ocl));
385: $ocl = $ocl[0];
386: }
387:
388: // Escaping of DN value.
389: // TODO: if the value is already correctly escaped, we get
390: // double escaping.
391: $val = self::escapeDNValue(array($val));
392: $val = str_replace('/', '\/', $val[0]);
393:
394: $dn[$pos] = $ocl . '=' . $val;
395: }
396: }
397: }
398:
399: if ($options['reverse']) {
400: $dn = array_reverse($dn);
401: }
402:
403: return implode($options['separator'], $dn);
404: }
405:
406: /**
407: * Escapes the given values according to RFC 2254 so that they can be
408: * safely used in LDAP filters.
409: *
410: * Any control characters with an ACII code < 32 as well as the characters
411: * with special meaning in LDAP filters "*", "(", ")", and "\" (the
412: * backslash) are converted into the representation of a backslash followed
413: * by two hex digits representing the hexadecimal value of the character.
414: *
415: * @param array $values Values to escape.
416: *
417: * @return array Escaped values.
418: */
419: public static function escapeFilterValue($values)
420: {
421: // Parameter validation.
422: if (!is_array($values)) {
423: $values = array($values);
424: }
425:
426: foreach ($values as $key => $val) {
427: // Escaping of filter meta characters.
428: $val = str_replace(array('\\', '*', '(', ')'),
429: array('\5c', '\2a', '\28', '\29'),
430: $val);
431:
432: // ASCII < 32 escaping.
433: $val = self::asc2hex32($val);
434:
435: if (null === $val) {
436: // Apply escaped "null" if string is empty.
437: $val = '\0';
438: }
439:
440: $values[$key] = $val;
441: }
442:
443: return $values;
444: }
445:
446: /**
447: * Unescapes the given values according to RFC 2254.
448: *
449: * Reverses the conversion done by {@link escapeFilterValue()}.
450: *
451: * Converts any sequences of a backslash followed by two hex digits into
452: * the corresponding character.
453: *
454: * @param array $values Values to unescape.
455: *
456: * @return array Unescaped values.
457: */
458: public static function unescapeFilterValue($values = array())
459: {
460: // Parameter validation.
461: if (!is_array($values)) {
462: $values = array($values);
463: }
464:
465: foreach ($values as $key => $value) {
466: // Translate hex code into ascii.
467: $values[$key] = self::hex2asc($value);
468: }
469:
470: return $values;
471: }
472:
473: /**
474: * Converts all ASCII chars < 32 to "\HEX".
475: *
476: * @param string $string String to convert.
477: *
478: * @return string Hexadecimal representation of $string.
479: */
480: public static function asc2hex32($string)
481: {
482: for ($i = 0, $len = strlen($string); $i < $len; $i++) {
483: $char = substr($string, $i, 1);
484: if (ord($char) < 32) {
485: $hex = dechex(ord($char));
486: if (strlen($hex) == 1) {
487: $hex = '0' . $hex;
488: }
489: $string = str_replace($char, '\\' . $hex, $string);
490: }
491: }
492: return $string;
493: }
494:
495: /**
496: * Converts all hexadecimal expressions ("\HEX") to their original ASCII
497: * characters.
498: *
499: * @author beni@php.net, heavily based on work from DavidSmith@byu.net
500: *
501: * @param string $string String to convert.
502: *
503: * @return string ASCII representation of $string.
504: */
505: public static function hex2asc($string)
506: {
507: return preg_replace('/\\\([0-9A-Fa-f]{2})/e', "chr(hexdec('\\1'))", $string);
508: }
509:
510: /**
511: * Splits a multivalued RDN value into an array.
512: *
513: * A RDN can contain multiple values, spearated by a plus sign. This method
514: * returns each separate ocl=value pair of the RDN part.
515: *
516: * If no multivalued RDN is detected, an array containing only the original
517: * RDN part is returned.
518: *
519: * For example, the multivalued RDN 'OU=Sales+CN=J. Smith' is exploded to:
520: * <kbd>array([0] => 'OU=Sales', [1] => 'CN=J. Smith')</kbd>
521: *
522: * The method tries to be smart if it encounters unescaped "+" characters,
523: * but may fail, so better ensure escaped "+" in attribute names and
524: * values.
525: *
526: * [BUG] If you have a multivalued RDN with unescaped plus characters and
527: * there is a unescaped plus sign at the end of an value followed by
528: * an attribute name containing an unescaped plus, then you will get
529: * wrong splitting:
530: * $rdn = 'OU=Sales+C+N=J. Smith';
531: * returns:
532: * array('OU=Sales+C', 'N=J. Smith');
533: * The "C+" is treaten as the value of the first pair instead of as
534: * the attribute name of the second pair. To prevent this, escape
535: * correctly.
536: *
537: * @param string $rdn Part of a (multivalued) escaped RDN (e.g. ou=foo or
538: * ou=foo+cn=bar)
539: *
540: * @return array The components of the multivalued RDN.
541: */
542: public static function splitRDNMultivalue($rdn)
543: {
544: $rdns = preg_split('/(?<!\\\\)\+/', $rdn);
545: $rdns = self::_correctDNSplitting($rdns, '+');
546: return array_values($rdns);
547: }
548:
549: /**
550: * Splits a attribute=value syntax into an array.
551: *
552: * The split will occur at the first unescaped '=' character.
553: *
554: * @param string $attr An attribute-value string.
555: *
556: * @return array Indexed array: 0=attribute name, 1=attribute value.
557: */
558: public static function splitAttributeString($attr)
559: {
560: return preg_split('/(?<!\\\\)=/', $attr, 2);
561: }
562:
563: /**
564: * Corrects splitting of DN parts.
565: *
566: * @param array $dn Raw DN array.
567: * @param array $separator Separator that was used when splitting.
568: *
569: * @return array Corrected array.
570: */
571: protected static function _correctDNSplitting($dn = array(),
572: $separator = ',')
573: {
574: foreach ($dn as $key => $dn_value) {
575: // Refresh value (foreach caches!)
576: $dn_value = $dn[$key];
577: // If $dn_value is not in attr=value format, we had an unescaped
578: // separator character inside the attr name or the value. We assume
579: // that it was the attribute value.
580:
581: // TODO: To solve this, we might ask the schema. The
582: // Horde_Ldap_Util class must remain independent from the
583: // other classes or connections though.
584: if (!preg_match('/.+(?<!\\\\)=.+/', $dn_value)) {
585: unset($dn[$key]);
586: if (array_key_exists($key - 1, $dn)) {
587: // Append to previous attribute value.
588: $dn[$key - 1] = $dn[$key - 1] . $separator . $dn_value;
589: } elseif (array_key_exists($key + 1, $dn)) {
590: // First element: prepend to next attribute name.
591: $dn[$key + 1] = $dn_value . $separator . $dn[$key + 1];
592: } else {
593: $dn[$key] = $dn_value;
594: }
595: }
596: }
597: return array_values($dn);
598: }
599: }
600: