1: <?php
2:
3: /**
4: * Definition for tables. The general idea is to extract out all of the
5: * essential bits, and then reconstruct it later.
6: *
7: * This is a bit confusing, because the DTDs and the W3C
8: * validators seem to disagree on the appropriate definition. The
9: * DTD claims:
10: *
11: * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
12: *
13: * But actually, the HTML4 spec then has this to say:
14: *
15: * The TBODY start tag is always required except when the table
16: * contains only one table body and no table head or foot sections.
17: * The TBODY end tag may always be safely omitted.
18: *
19: * So the DTD is kind of wrong. The validator is, unfortunately, kind
20: * of on crack.
21: *
22: * The definition changed again in XHTML1.1; and in my opinion, this
23: * formulation makes the most sense.
24: *
25: * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
26: *
27: * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
28: * If we encounter a thead, tfoot or tbody, we are placed in the former
29: * mode, and we *must* wrap any stray tr segments with a tbody. But if
30: * we don't run into any of them, just have tr tags is OK.
31: */
32: class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
33: {
34: /**
35: * @type bool
36: */
37: public $allow_empty = false;
38:
39: /**
40: * @type string
41: */
42: public $type = 'table';
43:
44: /**
45: * @type array
46: */
47: public $elements = array(
48: 'tr' => true,
49: 'tbody' => true,
50: 'thead' => true,
51: 'tfoot' => true,
52: 'caption' => true,
53: 'colgroup' => true,
54: 'col' => true
55: );
56:
57: public function __construct()
58: {
59: }
60:
61: /**
62: * @param array $children
63: * @param HTMLPurifier_Config $config
64: * @param HTMLPurifier_Context $context
65: * @return array
66: */
67: public function validateChildren($children, $config, $context)
68: {
69: if (empty($children)) {
70: return false;
71: }
72:
73: // only one of these elements is allowed in a table
74: $caption = false;
75: $thead = false;
76: $tfoot = false;
77:
78: // whitespace
79: $initial_ws = array();
80: $after_caption_ws = array();
81: $after_thead_ws = array();
82: $after_tfoot_ws = array();
83:
84: // as many of these as you want
85: $cols = array();
86: $content = array();
87:
88: $tbody_mode = false; // if true, then we need to wrap any stray
89: // <tr>s with a <tbody>.
90:
91: $ws_accum =& $initial_ws;
92:
93: foreach ($children as $node) {
94: if ($node instanceof HTMLPurifier_Node_Comment) {
95: $ws_accum[] = $node;
96: continue;
97: }
98: switch ($node->name) {
99: case 'tbody':
100: $tbody_mode = true;
101: // fall through
102: case 'tr':
103: $content[] = $node;
104: $ws_accum =& $content;
105: break;
106: case 'caption':
107: // there can only be one caption!
108: if ($caption !== false) break;
109: $caption = $node;
110: $ws_accum =& $after_caption_ws;
111: break;
112: case 'thead':
113: $tbody_mode = true;
114: // XXX This breaks rendering properties with
115: // Firefox, which never floats a <thead> to
116: // the top. Ever. (Our scheme will float the
117: // first <thead> to the top.) So maybe
118: // <thead>s that are not first should be
119: // turned into <tbody>? Very tricky, indeed.
120: if ($thead === false) {
121: $thead = $node;
122: $ws_accum =& $after_thead_ws;
123: } else {
124: // Oops, there's a second one! What
125: // should we do? Current behavior is to
126: // transmutate the first and last entries into
127: // tbody tags, and then put into content.
128: // Maybe a better idea is to *attach
129: // it* to the existing thead or tfoot?
130: // We don't do this, because Firefox
131: // doesn't float an extra tfoot to the
132: // bottom like it does for the first one.
133: $node->name = 'tbody';
134: $content[] = $node;
135: $ws_accum =& $content;
136: }
137: break;
138: case 'tfoot':
139: // see above for some aveats
140: $tbody_mode = true;
141: if ($tfoot === false) {
142: $tfoot = $node;
143: $ws_accum =& $after_tfoot_ws;
144: } else {
145: $node->name = 'tbody';
146: $content[] = $node;
147: $ws_accum =& $content;
148: }
149: break;
150: case 'colgroup':
151: case 'col':
152: $cols[] = $node;
153: $ws_accum =& $cols;
154: break;
155: case '#PCDATA':
156: // How is whitespace handled? We treat is as sticky to
157: // the *end* of the previous element. So all of the
158: // nonsense we have worked on is to keep things
159: // together.
160: if (!empty($node->is_whitespace)) {
161: $ws_accum[] = $node;
162: }
163: break;
164: }
165: }
166:
167: if (empty($content) && $thead === false && $tfoot === false) {
168: return false;
169: }
170:
171: $ret = $initial_ws;
172: if ($caption !== false) {
173: $ret[] = $caption;
174: $ret = array_merge($ret, $after_caption_ws);
175: }
176: if ($cols !== false) {
177: $ret = array_merge($ret, $cols);
178: }
179: if ($thead !== false) {
180: $ret[] = $thead;
181: $ret = array_merge($ret, $after_thead_ws);
182: }
183: if ($tfoot !== false) {
184: $ret[] = $tfoot;
185: $ret = array_merge($ret, $after_tfoot_ws);
186: }
187:
188: if ($tbody_mode) {
189: // we have to shuffle tr into tbody
190: $current_tr_tbody = null;
191:
192: foreach($content as $node) {
193: switch ($node->name) {
194: case 'tbody':
195: $current_tr_tbody = null;
196: $ret[] = $node;
197: break;
198: case 'tr':
199: if ($current_tr_tbody === null) {
200: $current_tr_tbody = new HTMLPurifier_Node_Element('tbody');
201: $ret[] = $current_tr_tbody;
202: }
203: $current_tr_tbody->children[] = $node;
204: break;
205: case '#PCDATA':
206: //assert($node->is_whitespace);
207: if ($current_tr_tbody === null) {
208: $ret[] = $node;
209: } else {
210: $current_tr_tbody->children[] = $node;
211: }
212: break;
213: }
214: }
215: } else {
216: $ret = array_merge($ret, $content);
217: }
218:
219: return $ret;
220:
221: }
222: }
223:
224: // vim: et sw=4 sts=4
225: