1: <?php
2: /**
3: * The Text_Flowed:: class provides common methods for manipulating text
4: * using the encoding described in RFC 3676 ('flowed' text).
5: *
6: * This class is based on the Text::Flowed perl module (Version 0.14) found
7: * in the CPAN perl repository. This module is released under the Perl
8: * license, which is compatible with the LGPL.
9: *
10: * Copyright 2002-2003 Philip Mak
11: * Copyright 2004-2012 Horde LLC (http://www.horde.org/)
12: *
13: * See the enclosed file COPYING for license information (LGPL). If you
14: * did not receive this file, see http://www.horde.org/licenses/lgpl21.
15: *
16: * @author Michael Slusarz <slusarz@horde.org>
17: * @category Horde
18: * @license http://www.horde.org/licenses/lgpl21 LGPL 2.1
19: * @package Text_Flowed
20: */
21: class Horde_Text_Flowed
22: {
23: /**
24: * The maximum length that a line is allowed to be (unless faced with
25: * with a word that is unreasonably long). This class will re-wrap a
26: * line if it exceeds this length.
27: *
28: * @var integer
29: */
30: protected $_maxlength = 78;
31:
32: /**
33: * When this class wraps a line, the newly created lines will be split
34: * at this length.
35: *
36: * @var integer
37: */
38: protected $_optlength = 72;
39:
40: /**
41: * The text to be formatted.
42: *
43: * @var string
44: */
45: protected $_text;
46:
47: /**
48: * The cached output of the formatting.
49: *
50: * @var array
51: */
52: protected $_output = array();
53:
54: /**
55: * The format of the data in $_output.
56: *
57: * @var string
58: */
59: protected $_formattype = null;
60:
61: /**
62: * The character set of the text.
63: *
64: * @var string
65: */
66: protected $_charset;
67:
68: /**
69: * Convert text using DelSp?
70: *
71: * @var boolean
72: */
73: protected $_delsp = false;
74:
75: /**
76: * Constructor.
77: *
78: * @param string $text The text to process.
79: * @param string $charset The character set of $text.
80: */
81: public function __construct($text, $charset = 'UTF-8')
82: {
83: $this->_text = $text;
84: $this->_charset = $charset;
85: }
86:
87: /**
88: * Set the maximum length of a line of text.
89: *
90: * @param integer $max A new value for $_maxlength.
91: */
92: public function setMaxLength($max)
93: {
94: $this->_maxlength = $max;
95: }
96:
97: /**
98: * Set the optimal length of a line of text.
99: *
100: * @param integer $max A new value for $_optlength.
101: */
102: public function setOptLength($opt)
103: {
104: $this->_optlength = $opt;
105: }
106:
107: /**
108: * Set whether to format text using DelSp.
109: *
110: * @param boolean $delsp Use DelSp?
111: */
112: public function setDelSp($delsp)
113: {
114: $this->_delsp = (bool)$delsp;
115: }
116:
117: /**
118: * Reformats the input string, where the string is 'format=flowed' plain
119: * text as described in RFC 2646.
120: *
121: * @param boolean $quote Add level of quoting to each line?
122: *
123: * @return string The text converted to RFC 2646 'fixed' format.
124: */
125: public function toFixed($quote = false)
126: {
127: $txt = '';
128:
129: $this->_reformat(false, $quote);
130: reset($this->_output);
131: $lines = count($this->_output) - 1;
132: while (list($no, $line) = each($this->_output)) {
133: $txt .= $line['text'] . (($lines == $no) ? '' : "\n");
134: }
135:
136: return $txt;
137: }
138:
139: /**
140: * Reformats the input string, and returns the output in an array format
141: * with quote level information.
142: *
143: * @param boolean $quote Add level of quoting to each line?
144: *
145: * @return array An array of arrays with the following elements:
146: * <pre>
147: * 'level' - The quote level of the current line.
148: * 'text' - The text for the current line.
149: * </pre>
150: */
151: public function toFixedArray($quote = false)
152: {
153: $this->_reformat(false, $quote);
154: return $this->_output;
155: }
156:
157: /**
158: * Reformats the input string, where the string is 'format=fixed' plain
159: * text as described in RFC 2646.
160: *
161: * @param boolean $quote Add level of quoting to each line?
162: * @param array $opts Additional options:
163: * <pre>
164: * 'nowrap' - (boolean) If true, does not wrap unquoted lines.
165: * DEFAULT: false
166: * </pre>
167: *
168: * @return string The text converted to RFC 2646 'flowed' format.
169: */
170: public function toFlowed($quote = false, array $opts = array())
171: {
172: $txt = '';
173:
174: $this->_reformat(true, $quote, empty($opts['nowrap']));
175: reset($this->_output);
176: while (list(,$line) = each($this->_output)) {
177: $txt .= $line['text'] . "\n";
178: }
179:
180: return $txt;
181: }
182:
183: /**
184: * Reformats the input string, where the string is 'format=flowed' plain
185: * text as described in RFC 2646.
186: *
187: * @param boolean $toflowed Convert to flowed?
188: * @param boolean $quote Add level of quoting to each line?
189: * @param boolean $wrap Wrap unquoted lines?
190: */
191: protected function _reformat($toflowed, $quote, $wrap = true)
192: {
193: $format_type = implode('|', array($toflowed, $quote));
194: if ($format_type == $this->_formattype) {
195: return;
196: }
197:
198: $this->_output = array();
199: $this->_formattype = $format_type;
200:
201: /* Set variables used in regexps. */
202: $delsp = ($toflowed && $this->_delsp) ? 1 : 0;
203: $opt = $this->_optlength - 1 - $delsp;
204:
205: /* Process message line by line. */
206: $text = preg_split("/\r?\n/", $this->_text);
207: $text_count = count($text) - 1;
208: $skip = 0;
209: reset($text);
210:
211: while (list($no, $line) = each($text)) {
212: if ($skip) {
213: --$skip;
214: continue;
215: }
216:
217: /* Per RFC 2646 [4.3], the 'Usenet Signature Convention' line
218: * (DASH DASH SP) is not considered flowed. Watch for this when
219: * dealing with potentially flowed lines. */
220:
221: /* The next three steps come from RFC 2646 [4.2]. */
222: /* STEP 1: Determine quote level for line. */
223: if (($num_quotes = $this->_numquotes($line))) {
224: $line = substr($line, $num_quotes);
225: }
226:
227: /* Only combine lines if we are converting to flowed or if the
228: * current line is quoted. */
229: if (!$toflowed || $num_quotes) {
230: /* STEP 2: Remove space stuffing from line. */
231: $line = $this->_unstuff($line);
232:
233: /* STEP 3: Should we interpret this line as flowed?
234: * While line is flowed (not empty and there is a space
235: * at the end of the line), and there is a next line, and the
236: * next line has the same quote depth, add to the current
237: * line. A line is not flowed if it is a signature line. */
238: if ($line != '-- ') {
239: while (!empty($line) &&
240: (substr($line, -1) == ' ') &&
241: ($text_count != $no) &&
242: ($this->_numquotes($text[$no + 1]) == $num_quotes)) {
243: /* If DelSp is yes and this is flowed input, we need to
244: * remove the trailing space. */
245: if (!$toflowed && $this->_delsp) {
246: $line = substr($line, 0, -1);
247: }
248: $line .= $this->_unstuff(substr($text[++$no], $num_quotes));
249: ++$skip;
250: }
251: }
252: }
253:
254: /* Ensure line is fixed, since we already joined all flowed
255: * lines. Remove all trailing ' ' from the line. */
256: if ($line != '-- ') {
257: $line = rtrim($line);
258: }
259:
260: /* Increment quote depth if we're quoting. */
261: if ($quote) {
262: $num_quotes++;
263: }
264:
265: /* The quote prefix for the line. */
266: $quotestr = str_repeat('>', $num_quotes);
267:
268: if (empty($line)) {
269: /* Line is empty. */
270: $this->_output[] = array('text' => $quotestr, 'level' => $num_quotes);
271: } elseif ((!$wrap && !$num_quotes) ||
272: empty($this->_maxlength) ||
273: ((Horde_String::length($line, $this->_charset) + $num_quotes) <= $this->_maxlength)) {
274: /* Line does not require rewrapping. */
275: $this->_output[] = array('text' => $quotestr . $this->_stuff($line, $num_quotes, $toflowed), 'level' => $num_quotes);
276: } else {
277: $min = $num_quotes + 1;
278:
279: /* Rewrap this paragraph. */
280: while ($line) {
281: /* Stuff and re-quote the line. */
282: $line = $quotestr . $this->_stuff($line, $num_quotes, $toflowed);
283: $line_length = Horde_String::length($line, $this->_charset);
284: if ($line_length <= $this->_optlength) {
285: /* Remaining section of line is short enough. */
286: $this->_output[] = array('text' => $line, 'level' => $num_quotes);
287: break;
288: } else {
289: $regex = array();
290: if ($min <= $opt) {
291: $regex[] = '^(.{' . $min . ',' . $opt . '}) (.*)';
292: }
293: if ($min <= $this->_maxlength) {
294: $regex[] = '^(.{' . $min . ',' . $this->_maxlength . '}) (.*)';
295: }
296: $regex[] = '^(.{' . $min . ',})? (.*)';
297:
298: if ($m = Horde_String::regexMatch($line, $regex, $this->_charset)) {
299: /* We need to wrap text at a certain number of
300: * *characters*, not a certain number of *bytes*;
301: * thus the need for a multibyte capable regex.
302: * If a multibyte regex isn't available, we are
303: * stuck with preg_match() (the function will
304: * still work - are just left with shorter rows
305: * than expected if multibyte characters exist in
306: * the row).
307: *
308: * 1. Try to find a string as long as _optlength.
309: * 2. Try to find a string as long as _maxlength.
310: * 3. Take the first word. */
311: if (empty($m[1])) {
312: $m[1] = $m[2];
313: $m[2] = '';
314: }
315: $this->_output[] = array('text' => $m[1] . ' ' . (($delsp) ? ' ' : ''), 'level' => $num_quotes);
316: $line = $m[2];
317: } elseif ($line_length > 998) {
318: /* One excessively long word left on line. Be
319: * absolutely sure it does not exceed 998
320: * characters in length or else we must
321: * truncate. */
322: $this->_output[] = array('text' => Horde_String::substr($line, 0, 998, $this->_charset), 'level' => $num_quotes);
323: $line = Horde_String::substr($line, 998, null, $this->_charset);
324: } else {
325: $this->_output[] = array('text' => $line, 'level' => $num_quotes);
326: break;
327: }
328: }
329: }
330: }
331: }
332: }
333:
334: /**
335: * Returns the number of leading '>' characters in the text input.
336: * '>' characters are defined by RFC 2646 to indicate a quoted line.
337: *
338: * @param string $text The text to analyze.
339: *
340: * @return integer The number of leading quote characters.
341: */
342: protected function _numquotes($text)
343: {
344: return strspn($text, '>');
345: }
346:
347: /**
348: * Space-stuffs if it starts with ' ' or '>' or 'From ', or if
349: * quote depth is non-zero (for aesthetic reasons so that there is a
350: * space after the '>').
351: *
352: * @param string $text The text to stuff.
353: * @param string $num_quotes The quote-level of this line.
354: * @param boolean $toflowed Are we converting to flowed text?
355: *
356: * @return string The stuffed text.
357: */
358: protected function _stuff($text, $num_quotes, $toflowed)
359: {
360: return ($toflowed && ($num_quotes || preg_match("/^(?: |>|From |From$)/", $text)))
361: ? ' ' . $text
362: : $text;
363: }
364:
365: /**
366: * Unstuffs a space stuffed line.
367: *
368: * @param string $text The text to unstuff.
369: *
370: * @return string The unstuffed text.
371: */
372: protected function _unstuff($text)
373: {
374: return (!empty($text) && ($text[0] == ' '))
375: ? substr($text, 1)
376: : $text;
377: }
378:
379: }
380: