Overview

Packages

  • Text
    • Flowed

Classes

  • Horde_Text_Flowed
  • Overview
  • Package
  • Class
  • Tree
  1: <?php
  2: /**
  3:  * The Text_Flowed:: class provides common methods for manipulating text
  4:  * using the encoding described in RFC 3676 ('flowed' text).
  5:  *
  6:  * This class is based on the Text::Flowed perl module (Version 0.14) found
  7:  * in the CPAN perl repository.  This module is released under the Perl
  8:  * license, which is compatible with the LGPL.
  9:  *
 10:  * Copyright 2002-2003 Philip Mak
 11:  * Copyright 2004-2012 Horde LLC (http://www.horde.org/)
 12:  *
 13:  * See the enclosed file COPYING for license information (LGPL). If you
 14:  * did not receive this file, see http://www.horde.org/licenses/lgpl21.
 15:  *
 16:  * @author   Michael Slusarz <slusarz@horde.org>
 17:  * @category Horde
 18:  * @license  http://www.horde.org/licenses/lgpl21 LGPL 2.1
 19:  * @package  Text_Flowed
 20:  */
 21: class Horde_Text_Flowed
 22: {
 23:     /**
 24:      * The maximum length that a line is allowed to be (unless faced with
 25:      * with a word that is unreasonably long). This class will re-wrap a
 26:      * line if it exceeds this length.
 27:      *
 28:      * @var integer
 29:      */
 30:     protected $_maxlength = 78;
 31: 
 32:     /**
 33:      * When this class wraps a line, the newly created lines will be split
 34:      * at this length.
 35:      *
 36:      * @var integer
 37:      */
 38:     protected $_optlength = 72;
 39: 
 40:     /**
 41:      * The text to be formatted.
 42:      *
 43:      * @var string
 44:      */
 45:     protected $_text;
 46: 
 47:     /**
 48:      * The cached output of the formatting.
 49:      *
 50:      * @var array
 51:      */
 52:     protected $_output = array();
 53: 
 54:     /**
 55:      * The format of the data in $_output.
 56:      *
 57:      * @var string
 58:      */
 59:     protected $_formattype = null;
 60: 
 61:     /**
 62:      * The character set of the text.
 63:      *
 64:      * @var string
 65:      */
 66:     protected $_charset;
 67: 
 68:     /**
 69:      * Convert text using DelSp?
 70:      *
 71:      * @var boolean
 72:      */
 73:     protected $_delsp = false;
 74: 
 75:     /**
 76:      * Constructor.
 77:      *
 78:      * @param string $text     The text to process.
 79:      * @param string $charset  The character set of $text.
 80:      */
 81:     public function __construct($text, $charset = 'UTF-8')
 82:     {
 83:         $this->_text = $text;
 84:         $this->_charset = $charset;
 85:     }
 86: 
 87:     /**
 88:      * Set the maximum length of a line of text.
 89:      *
 90:      * @param integer $max  A new value for $_maxlength.
 91:      */
 92:     public function setMaxLength($max)
 93:     {
 94:         $this->_maxlength = $max;
 95:     }
 96: 
 97:     /**
 98:      * Set the optimal length of a line of text.
 99:      *
100:      * @param integer $max  A new value for $_optlength.
101:      */
102:     public function setOptLength($opt)
103:     {
104:         $this->_optlength = $opt;
105:     }
106: 
107:     /**
108:      * Set whether to format text using DelSp.
109:      *
110:      * @param boolean $delsp  Use DelSp?
111:      */
112:     public function setDelSp($delsp)
113:     {
114:         $this->_delsp = (bool)$delsp;
115:     }
116: 
117:     /**
118:      * Reformats the input string, where the string is 'format=flowed' plain
119:      * text as described in RFC 2646.
120:      *
121:      * @param boolean $quote  Add level of quoting to each line?
122:      *
123:      * @return string  The text converted to RFC 2646 'fixed' format.
124:      */
125:     public function toFixed($quote = false)
126:     {
127:         $txt = '';
128: 
129:         $this->_reformat(false, $quote);
130:         reset($this->_output);
131:         $lines = count($this->_output) - 1;
132:         while (list($no, $line) = each($this->_output)) {
133:             $txt .= $line['text'] . (($lines == $no) ? '' : "\n");
134:         }
135: 
136:         return $txt;
137:     }
138: 
139:     /**
140:      * Reformats the input string, and returns the output in an array format
141:      * with quote level information.
142:      *
143:      * @param boolean $quote  Add level of quoting to each line?
144:      *
145:      * @return array  An array of arrays with the following elements:
146:      * <pre>
147:      * 'level' - The quote level of the current line.
148:      * 'text'  - The text for the current line.
149:      * </pre>
150:      */
151:     public function toFixedArray($quote = false)
152:     {
153:         $this->_reformat(false, $quote);
154:         return $this->_output;
155:     }
156: 
157:     /**
158:      * Reformats the input string, where the string is 'format=fixed' plain
159:      * text as described in RFC 2646.
160:      *
161:      * @param boolean $quote  Add level of quoting to each line?
162:      * @param array $opts     Additional options:
163:      * <pre>
164:      * 'nowrap' - (boolean) If true, does not wrap unquoted lines.
165:      *            DEFAULT: false
166:      * </pre>
167:      *
168:      * @return string  The text converted to RFC 2646 'flowed' format.
169:      */
170:     public function toFlowed($quote = false, array $opts = array())
171:     {
172:         $txt = '';
173: 
174:         $this->_reformat(true, $quote, empty($opts['nowrap']));
175:         reset($this->_output);
176:         while (list(,$line) = each($this->_output)) {
177:             $txt .= $line['text'] . "\n";
178:         }
179: 
180:         return $txt;
181:     }
182: 
183:     /**
184:      * Reformats the input string, where the string is 'format=flowed' plain
185:      * text as described in RFC 2646.
186:      *
187:      * @param boolean $toflowed  Convert to flowed?
188:      * @param boolean $quote     Add level of quoting to each line?
189:      * @param boolean $wrap      Wrap unquoted lines?
190:      */
191:     protected function _reformat($toflowed, $quote, $wrap = true)
192:     {
193:         $format_type = implode('|', array($toflowed, $quote));
194:         if ($format_type == $this->_formattype) {
195:             return;
196:         }
197: 
198:         $this->_output = array();
199:         $this->_formattype = $format_type;
200: 
201:         /* Set variables used in regexps. */
202:         $delsp = ($toflowed && $this->_delsp) ? 1 : 0;
203:         $opt = $this->_optlength - 1 - $delsp;
204: 
205:         /* Process message line by line. */
206:         $text = preg_split("/\r?\n/", $this->_text);
207:         $text_count = count($text) - 1;
208:         $skip = 0;
209:         reset($text);
210: 
211:         while (list($no, $line) = each($text)) {
212:             if ($skip) {
213:                 --$skip;
214:                 continue;
215:             }
216: 
217:             /* Per RFC 2646 [4.3], the 'Usenet Signature Convention' line
218:              * (DASH DASH SP) is not considered flowed.  Watch for this when
219:              * dealing with potentially flowed lines. */
220: 
221:             /* The next three steps come from RFC 2646 [4.2]. */
222:             /* STEP 1: Determine quote level for line. */
223:             if (($num_quotes = $this->_numquotes($line))) {
224:                 $line = substr($line, $num_quotes);
225:             }
226: 
227:             /* Only combine lines if we are converting to flowed or if the
228:              * current line is quoted. */
229:             if (!$toflowed || $num_quotes) {
230:                 /* STEP 2: Remove space stuffing from line. */
231:                 $line = $this->_unstuff($line);
232: 
233:                 /* STEP 3: Should we interpret this line as flowed?
234:                  * While line is flowed (not empty and there is a space
235:                  * at the end of the line), and there is a next line, and the
236:                  * next line has the same quote depth, add to the current
237:                  * line. A line is not flowed if it is a signature line. */
238:                 if ($line != '-- ') {
239:                     while (!empty($line) &&
240:                            (substr($line, -1) == ' ') &&
241:                            ($text_count != $no) &&
242:                            ($this->_numquotes($text[$no + 1]) == $num_quotes)) {
243:                         /* If DelSp is yes and this is flowed input, we need to
244:                          * remove the trailing space. */
245:                         if (!$toflowed && $this->_delsp) {
246:                             $line = substr($line, 0, -1);
247:                         }
248:                         $line .= $this->_unstuff(substr($text[++$no], $num_quotes));
249:                         ++$skip;
250:                     }
251:                 }
252:             }
253: 
254:             /* Ensure line is fixed, since we already joined all flowed
255:              * lines. Remove all trailing ' ' from the line. */
256:             if ($line != '-- ') {
257:                 $line = rtrim($line);
258:             }
259: 
260:             /* Increment quote depth if we're quoting. */
261:             if ($quote) {
262:                 $num_quotes++;
263:             }
264: 
265:             /* The quote prefix for the line. */
266:             $quotestr = str_repeat('>', $num_quotes);
267: 
268:             if (empty($line)) {
269:                 /* Line is empty. */
270:                 $this->_output[] = array('text' => $quotestr, 'level' => $num_quotes);
271:             } elseif ((!$wrap && !$num_quotes) ||
272:                       empty($this->_maxlength) ||
273:                       ((Horde_String::length($line, $this->_charset) + $num_quotes) <= $this->_maxlength)) {
274:                 /* Line does not require rewrapping. */
275:                 $this->_output[] = array('text' => $quotestr . $this->_stuff($line, $num_quotes, $toflowed), 'level' => $num_quotes);
276:             } else {
277:                 $min = $num_quotes + 1;
278: 
279:                 /* Rewrap this paragraph. */
280:                 while ($line) {
281:                     /* Stuff and re-quote the line. */
282:                     $line = $quotestr . $this->_stuff($line, $num_quotes, $toflowed);
283:                     $line_length = Horde_String::length($line, $this->_charset);
284:                     if ($line_length <= $this->_optlength) {
285:                         /* Remaining section of line is short enough. */
286:                         $this->_output[] = array('text' => $line, 'level' => $num_quotes);
287:                         break;
288:                     } else {
289:                         $regex = array();
290:                         if ($min <= $opt) {
291:                             $regex[] = '^(.{' . $min . ',' . $opt . '}) (.*)';
292:                         }
293:                         if ($min <= $this->_maxlength) {
294:                             $regex[] = '^(.{' . $min . ',' . $this->_maxlength . '}) (.*)';
295:                         }
296:                         $regex[] = '^(.{' . $min . ',})? (.*)';
297: 
298:                         if ($m = Horde_String::regexMatch($line, $regex, $this->_charset)) {
299:                             /* We need to wrap text at a certain number of
300:                              * *characters*, not a certain number of *bytes*;
301:                              * thus the need for a multibyte capable regex.
302:                              * If a multibyte regex isn't available, we are
303:                              * stuck with preg_match() (the function will
304:                              * still work - are just left with shorter rows
305:                              * than expected if multibyte characters exist in
306:                              * the row).
307:                              *
308:                              * 1. Try to find a string as long as _optlength.
309:                              * 2. Try to find a string as long as _maxlength.
310:                              * 3. Take the first word. */
311:                             if (empty($m[1])) {
312:                                 $m[1] = $m[2];
313:                                 $m[2] = '';
314:                             }
315:                             $this->_output[] = array('text' => $m[1] . ' ' . (($delsp) ? ' ' : ''), 'level' => $num_quotes);
316:                             $line = $m[2];
317:                         } elseif ($line_length > 998) {
318:                             /* One excessively long word left on line.  Be
319:                              * absolutely sure it does not exceed 998
320:                              * characters in length or else we must
321:                              * truncate. */
322:                             $this->_output[] = array('text' => Horde_String::substr($line, 0, 998, $this->_charset), 'level' => $num_quotes);
323:                             $line = Horde_String::substr($line, 998, null, $this->_charset);
324:                         } else {
325:                             $this->_output[] = array('text' => $line, 'level' => $num_quotes);
326:                             break;
327:                         }
328:                     }
329:                 }
330:             }
331:         }
332:     }
333: 
334:     /**
335:      * Returns the number of leading '>' characters in the text input.
336:      * '>' characters are defined by RFC 2646 to indicate a quoted line.
337:      *
338:      * @param string $text  The text to analyze.
339:      *
340:      * @return integer  The number of leading quote characters.
341:      */
342:     protected function _numquotes($text)
343:     {
344:         return strspn($text, '>');
345:     }
346: 
347:     /**
348:      * Space-stuffs if it starts with ' ' or '>' or 'From ', or if
349:      * quote depth is non-zero (for aesthetic reasons so that there is a
350:      * space after the '>').
351:      *
352:      * @param string $text        The text to stuff.
353:      * @param string $num_quotes  The quote-level of this line.
354:      * @param boolean $toflowed   Are we converting to flowed text?
355:      *
356:      * @return string  The stuffed text.
357:      */
358:     protected function _stuff($text, $num_quotes, $toflowed)
359:     {
360:         return ($toflowed && ($num_quotes || preg_match("/^(?: |>|From |From$)/", $text)))
361:             ? ' ' . $text
362:             : $text;
363:     }
364: 
365:     /**
366:      * Unstuffs a space stuffed line.
367:      *
368:      * @param string $text  The text to unstuff.
369:      *
370:      * @return string  The unstuffed text.
371:      */
372:     protected function _unstuff($text)
373:     {
374:         return (!empty($text) && ($text[0] == ' '))
375:             ? substr($text, 1)
376:             : $text;
377:     }
378: 
379: }
380: 
API documentation generated by ApiGen