1: <?php
2: /**
3: * This class provides a parser which can construct an SQL WHERE clause from a
4: * Google-like search expression.
5: *
6: * Copyright 2004-2012 Horde LLC (http://www.horde.org/)
7: *
8: * The expression recognizes boolean "AND", "OR", and "NOT" (providing no
9: * operator between keywords implies "AND"), like so:
10: *
11: * cat and dog
12: * cat or dog
13: * cat and not dog
14: *
15: * If no operator appears between keywords or quoted strings, "AND" is assumed.
16: * A comma can be used instead of "OR":
17: *
18: * cat dog
19: * cat, dog
20: * cat not dog
21: *
22: * The parser recognizes parentheses, so complex expressions can be created:
23: *
24: * cat and not (dog or puppy)
25: *
26: * Quoted strings are also recognized, and are taken as literal keywords:
27: *
28: * "cat and dog"
29: *
30: * Parsing is designed to be as fuzzy as possible, so it shouldn't error unless
31: * people search for "AND", "OR", or "NOT" without quoting it or use unbalanced
32: * parentheses.
33: *
34: * @author Jason M. Felice <jason.m.felice@gmail.com>
35: * @license http://www.horde.org/licenses/bsd
36: * @category Horde
37: * @package Db
38: */
39: class Horde_Db_SearchParser
40: {
41: /**
42: * Parses a keyword expression.
43: *
44: * @param string $column This is the SQL field name the resulting
45: * expression should test against.
46: * @param string $expr This is the keyword expression we want to parse.
47: *
48: * @return string The query expression.
49: * @throws Horde_Db_Exception
50: */
51: static public function parse($column, $expr)
52: {
53: /* First pass - scan the string for tokens. Bare words are tokens, or
54: * the user can quote strings to have embedded spaces, keywords, or
55: * parentheses. Parentheses can be used for grouping boolean
56: * operators, and the boolean operators AND, OR, and NOT are all
57: * recognized.
58: *
59: * The tokens are returned in the $tokens array -- an array of strings.
60: * Each string in the array starts with either a `!' or a `='. `=' is
61: * a bare word or quoted string we are searching for, and `!' indicates
62: * a boolean operator or parenthesis. A token that starts with a '.'
63: * indicates a PostgreSQL word boundary search. */
64: $tokens = array();
65: while (!empty($expr)) {
66: $expr = preg_replace('/^\s+/', '', $expr);
67: if (empty($expr)) {
68: break;
69: }
70: if (substr($expr,0,1) == '(') {
71: $expr = substr($expr, 1);
72: $token = '!(';
73: } elseif (substr($expr, 0, 1) == ')') {
74: $expr = substr($expr, 1);
75: $token = '!)';
76: } elseif (substr($expr, 0, 1) == ',') {
77: $expr = substr($expr, 1);
78: $token = '!OR';
79: } elseif (preg_match('/^(AND|OR|NOT)([^a-z].*)?$/i', $expr,
80: $matches)) {
81: $token = '!' . strtoupper($matches[1]);
82: $expr = substr($expr, strlen($matches[1]));
83: } elseif (preg_match('/^"(([^"]|\\[0-7]+|\\[Xx][0-9a-fA-F]+|\\[^Xx0-7])*)"/',
84: $expr, $matches)) {
85: $token = '=' . stripcslashes($matches[1]);
86: $expr = substr($expr, strlen($matches[0]));
87: } elseif (preg_match('/^[^\\s\\(\\),]+/', $expr, $matches)) {
88: $token = '=' . $matches[0];
89: $expr = substr($expr,strlen($token)-1);
90: } else {
91: throw new Horde_Db_Exception('Syntax error in search terms');
92: }
93: if ($token == '!AND') {
94: /* !AND is implied by concatenation. */
95: continue;
96: }
97: $tokens[] = $token;
98: }
99:
100: /* Call the expression parser. */
101: return self::_parseKeywords1($column, $tokens);
102: }
103:
104: static protected function _parseKeywords1($column, &$tokens)
105: {
106: if (count($tokens) == 0) {
107: throw new Horde_Db_Exception('Empty search terms');
108: }
109: $lhs = self::_parseKeywords2($column, $tokens);
110: if (count($tokens) == 0 || $tokens[0] != '!OR') {
111: return $lhs;
112: }
113: array_shift($tokens);
114: $rhs = self::_parseKeywords1($column, $tokens);
115: return "($lhs OR $rhs)";
116: }
117:
118: static protected function _parseKeywords2($column, &$tokens)
119: {
120: $lhs = self::_parseKeywords3($column, $tokens);
121: if (sizeof($tokens) == 0 || $tokens[0] == '!)' || $tokens[0] == '!OR') {
122: return $lhs;
123: }
124: $rhs = self::_parseKeywords2($column, $tokens);
125: return "($lhs AND $rhs)";
126: }
127:
128: static protected function _parseKeywords3($column, &$tokens)
129: {
130: if ($tokens[0] == '!NOT') {
131: array_shift($tokens);
132: $lhs = self::_parseKeywords4($column, $tokens);
133: if (is_a($lhs, 'PEAR_Error')) {
134: return $lhs;
135: }
136: return "(NOT $lhs)";
137: }
138: return self::_parseKeywords4($column, $tokens);
139: }
140:
141: static protected function _parseKeywords4($column, &$tokens)
142: {
143: if ($tokens[0] == '!(') {
144: array_shift($tokens);
145: $lhs = self::_parseKeywords1($column, $tokens);
146: if (sizeof($tokens) == 0 || $tokens[0] != '!)') {
147: throw new Horde_Db_Exception('Expected ")"');
148: }
149: array_shift($tokens);
150: return $lhs;
151: }
152:
153: if (substr($tokens[0], 0, 1) != '=' &&
154: substr($tokens[0], 0, 2) != '=.') {
155: throw new Horde_Db_Exception('Expected bare word or quoted search term');
156: }
157:
158: $val = strtolower(substr(array_shift($tokens), 1));
159: $val = addslashes(ereg_replace("([\\%])", "\\\\1", $val));
160:
161: return "(LOWER($column) LIKE '%$val%')";
162: }
163: }
164: