1: <?php
2: /**
3: * $Horde: luxor/lib/SimpleParse.php,v 1.10 2005/06/13 03:20:27 selsky Exp $
4: *
5: * @author Jan Schneider <jan@horde.org>
6: * @since Luxor 0.1
7: * @package Luxor
8: */
9: class Luxor_SimpleParse {
10:
11: /** File handle. */
12: var $_fileh;
13:
14: /** Current linenumber. */
15: var $_line = 0;
16:
17: /** Fragments in queue. */
18: var $_frags = array();
19:
20: /** Array of body type ids. */
21: var $_bodyid = array();
22:
23: /** Fragment closing delimiters. */
24: var $_term = array();
25:
26: /** Fragmentation regexp. */
27: var $_split = '';
28:
29: /** Fragment opening regexp. */
30: var $_open = '';
31:
32: /** Tab width. */
33: var $_tabwidth = 8;
34:
35: /**
36: * Constructor for the source code parser.
37: *
38: * @param ressource $file The file handler of the file to parse.
39: *
40: * @param int $tabhint (Unused?)
41: *
42: * @param array $blksep An array containing block seperators for
43: * this file's type.
44: * Essentially a 'spec' entry from the
45: * $languages array.
46: */
47: function Luxor_SimpleParse(&$fileh, $tabhint, $blksep)
48: {
49: $this->_fileh = $fileh;
50:
51: /* Get possible block opening and closing delimiters and their meaning. */
52: $open_a = array();
53: while ($splice = array_splice($blksep, 0, 3)) {
54: $this->_bodyid[] = $splice[0];
55: $open_a[] = $splice[1];
56: $this->_term[] = $splice[2];
57: }
58:
59: /* Build regexps for opening and delimiters and fragment splitting. */
60: foreach ($open_a as $open_s) {
61: $this->_open .= "($open_s)|";
62: $this->_split .= "$open_s|";
63: }
64: $this->_open = substr($this->_open, 0, -1);
65: $this->_open = str_replace('/', '\\/', $this->_open);
66:
67: foreach ($this->_term as $term) {
68: if (empty($term)) {
69: continue;
70: }
71: $this->_split .= "$term|";
72: }
73: $this->_split = substr($this->_split, 0, -1);
74: $this->_split = str_replace('/', '\\/', $this->_split);
75: }
76:
77: /**
78: * Returns the content and type of the next code fragment.
79: */
80: function nextFrag()
81: {
82: $btype = null;
83: $frag = null;
84: $line = '';
85:
86: while (true) {
87: // read one more line if we have processed
88: // all of the previously read line
89: if (!count($this->_frags)) {
90: $line = fgets($this->_fileh);
91: $this->_line++;
92:
93: if ($this->_line <= 2 &&
94: preg_match('/^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/',
95: $line, $match)) {
96: $this->_tabwidth = $match[1];
97: }
98:
99: // Optimize for common case.
100: if (!empty($line)) {
101: $line = preg_replace('/^(\t+)/e', "str_repeat(' ', $this->_tabwidth * strlen('\\1'))", $line);
102: if (preg_match('/([^\t]*)\t/e', $line, $match)) {
103: $tabs = str_repeat(' ', $this->_tabwidth - (strlen($match[1]) % $this->_tabwidth));
104: $line = preg_replace('/([^\t]*)\t/', '\1' . $tabs, $line);
105: }
106:
107: // split the line into fragments
108: $this->_frags = preg_split('/(' . $this->_split . ')/', $line, -1, PREG_SPLIT_DELIM_CAPTURE);
109: }
110: }
111:
112: if (!count($this->_frags)) {
113: break;
114: }
115:
116: // skip empty fragments
117: if (empty($this->_frags[0])) {
118: array_shift($this->_frags);
119: continue;
120: }
121:
122: if (!empty($frag)) {
123: // Check if we are inside a fragment
124: if (!is_null($btype)) {
125: $next = array_shift($this->_frags);
126:
127: // Some ugly special casing for escaped quotes.
128: if (substr($frag, -1, 1) == '\\' && substr($frag, -2, 2) != '\\\\' &&
129: (substr($next, 0, 1) == '"' || substr($next, 0, 1) == "'")) {
130: $frag .= $next;
131: $next = substr($next, 1);
132: } else {
133: // Add to the fragment
134: $frag .= $next;
135: }
136:
137: // We are done if this was the terminator
138: if (preg_match('/^' . str_replace('/', '\\/', $this->_term[$btype]) . '$/', $next)) {
139: // Return what we have
140: break;
141: }
142: } else {
143: // Is the start of a frag?
144: if (preg_match('/^' . $this->_open . '$/', $this->_frags[0])) {
145: // Return what we have
146: break;
147: } else {
148: // Add to the fragment and keep looking
149: $frag .= array_shift($this->_frags);
150: }
151: }
152: } else {
153: // Find the blocktype of the current block
154: $frag = array_shift($this->_frags);
155: if (preg_match_all('/^' . $this->_open . '$/', $frag, $match)) {
156: array_shift($match);
157: foreach ($match as $id => $matched) {
158: if ($matched[0] == $frag) {
159: $btype = $id;
160: break;
161: }
162: }
163: if (is_null($btype)) {
164: //return the fragment as unknown.
165: break;
166: }
167: }
168: }
169: }
170:
171: // Clear text block type
172: if (!is_null($btype)) {
173: $btype = $this->_bodyid[$btype];
174: }
175: return array($btype, $frag);
176: }
177:
178: }
179: