1: <?php
2: /**
3: * This filter attempts to sanitize HTML by cleaning up malformed HTML tags.
4: *
5: * Parameters:
6: * <pre>
7: * body_only - (boolean) Only return the body data?
8: * DEFAULT: Return the whole HTML document
9: * charset - (string) Charset of the data.
10: * DEFAULT: UTF-8
11: * size - (integer) Only filter if data is below this size.
12: * DEFAULT: No default
13: * </pre>
14: *
15: * Copyright 2009-2012 Horde LLC (http://www.horde.org/)
16: *
17: * See the enclosed file COPYING for license information (LGPL). If you
18: * did not receive this file, see http://www.horde.org/licenses/lgpl21.
19: *
20: * @author Michael Slusarz <jan@horde.org>
21: * @category Horde
22: * @license http://www.horde.org/licenses/lgpl21 LGPL 2.1
23: * @package Text_Filter
24: */
25: class Horde_Text_Filter_Cleanhtml extends Horde_Text_Filter_Base
26: {
27: /**
28: * Filter parameters.
29: *
30: * @var array
31: */
32: protected $_params = array(
33: 'body_only' => false,
34: 'charset' => 'UTF-8',
35: 'size' => false
36: );
37:
38: /**
39: * Executes any code necessary after applying the filter patterns.
40: *
41: * @param string $text The text after the filtering.
42: *
43: * @return string The modified text.
44: */
45: public function postProcess($text)
46: {
47: if (!Horde_Util::extensionExists('tidy') ||
48: (($this->_params['size'] !== false) &&
49: (strlen($text) > $this->_params['size']))) {
50: return $text;
51: }
52:
53: $tidy_config = array(
54: 'enclose-block-text' => true,
55: 'hide-comments' => true,
56: 'indent' => false,
57: 'numeric-entities' => true,
58: 'preserve-entities' => true,
59: 'show-body-only' => !empty($this->_params['body_only']),
60: 'tab-size' => 0,
61: 'wrap' => 0
62: );
63:
64: $tidy = new tidy();
65:
66: if (strtolower($this->_params['charset']) == 'us-ascii') {
67: if ($tidy->parseString($text, $tidy_config, 'ascii')) {
68: $tidy->cleanRepair();
69: $text = $tidy->value;
70: }
71: } elseif ($tidy->parseString(Horde_String::convertCharset($text, $this->_params['charset'], 'UTF-8'), $tidy_config, 'utf8')) {
72: $tidy->cleanRepair();
73: $text = Horde_String::convertCharset($tidy->value, 'UTF-8', $this->_params['charset']);
74: }
75:
76: return $text;
77: }
78:
79: }
80: