File xoops_lib\modules\protector\library\HTMLPurifier\Injector\AutoParagraph.php

1:	<?php
2:
3:	/**
4:	* Injector that auto paragraphs text in the root node based on
5:	* double-spacing.
6:	* @todo Ensure all states are unit tested, including variations as well.
7:	* @todo Make a graph of the flow control for this Injector.
8:	*/
9:	class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
10:	{
11:	/**
12:	* @type string
13:	*/
14:	public $name = 'AutoParagraph';
15:
16:	/**
17:	* @type array
18:	*/
19:	public $needed = array('p');
20:
21:	/**
22:	* @return HTMLPurifier_Token_Start
23:	*/
24:	private function _pStart()
25:	{
26:	$par = new HTMLPurifier_Token_Start('p');
27:	$par->armor['MakeWellFormed_TagClosedError'] = true;
28:	return $par;
29:	}
30:
31:	/**
32:	* @param HTMLPurifier_Token_Text $token
33:	*/
34:	public function handleText(&$token)
35:	{
36:	$text = $token->data;
37:	// Does the current parent allow <p> tags?
38:	if ($this->allowsElement('p')) {
39:	if (empty($this->currentNesting) \|\| strpos($text, "\n\n") !== false) {
40:	// Note that we have differing behavior when dealing with text
41:	// in the anonymous root node, or a node inside the document.
42:	// If the text as a double-newline, the treatment is the same;
43:	// if it doesn't, see the next if-block if you're in the document.
44:
45:	$i = $nesting = null;
46:	if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
47:	// State 1.1: ... ^ (whitespace, then document end)
48:	// ----
49:	// This is a degenerate case
50:	} else {
51:	if (!$token->is_whitespace \|\| $this->_isInline($current)) {
52:	// State 1.2: PAR1
53:	// ----
54:
55:	// State 1.3: PAR1\n\nPAR2
56:	// ------------
57:
58:	// State 1.4: <div>PAR1\n\nPAR2 (see State 2)
59:	// ------------
60:	$token = array($this->_pStart());
61:	$this->_splitText($text, $token);
62:	} else {
63:	// State 1.5: \n<hr />
64:	// --
65:	}
66:	}
67:	} else {
68:	// State 2: <div>PAR1... (similar to 1.4)
69:	// ----
70:
71:	// We're in an element that allows paragraph tags, but we're not
72:	// sure if we're going to need them.
73:	if ($this->_pLookAhead()) {
74:	// State 2.1: <div>PAR1<b>PAR1\n\nPAR2
75:	// ----
76:	// Note: This will always be the first child, since any
77:	// previous inline element would have triggered this very
78:	// same routine, and found the double newline. One possible
79:	// exception would be a comment.
80:	$token = array($this->_pStart(), $token);
81:	} else {
82:	// State 2.2.1: <div>PAR1<div>
83:	// ----
84:
85:	// State 2.2.2: <div>PAR1<b>PAR1</b></div>
86:	// ----
87:	}
88:	}
89:	// Is the current parent a <p> tag?
90:	} elseif (!empty($this->currentNesting) &&
91:	$this->currentNesting[count($this->currentNesting) - 1]->name == 'p') {
92:	// State 3.1: ...<p>PAR1
93:	// ----
94:
95:	// State 3.2: ...<p>PAR1\n\nPAR2
96:	// ------------
97:	$token = array();
98:	$this->_splitText($text, $token);
99:	// Abort!
100:	} else {
101:	// State 4.1: ...<b>PAR1
102:	// ----
103:
104:	// State 4.2: ...<b>PAR1\n\nPAR2
105:	// ------------
106:	}
107:	}
108:
109:	/**
110:	* @param HTMLPurifier_Token $token
111:	*/
112:	public function handleElement(&$token)
113:	{
114:	// We don't have to check if we're already in a <p> tag for block
115:	// tokens, because the tag would have been autoclosed by MakeWellFormed.
116:	if ($this->allowsElement('p')) {
117:	if (!empty($this->currentNesting)) {
118:	if ($this->_isInline($token)) {
119:	// State 1: <div>...<b>
120:	// ---
121:	// Check if this token is adjacent to the parent token
122:	// (seek backwards until token isn't whitespace)
123:	$i = null;
124:	$this->backward($i, $prev);
125:
126:	if (!$prev instanceof HTMLPurifier_Token_Start) {
127:	// Token wasn't adjacent
128:	if ($prev instanceof HTMLPurifier_Token_Text &&
129:	substr($prev->data, -2) === "\n\n"
130:	) {
131:	// State 1.1.4: <div><p>PAR1</p>\n\n<b>
132:	// ---
133:	// Quite frankly, this should be handled by splitText
134:	$token = array($this->_pStart(), $token);
135:	} else {
136:	// State 1.1.1: <div><p>PAR1</p><b>
137:	// ---
138:	// State 1.1.2: <div><br /><b>
139:	// ---
140:	// State 1.1.3: <div>PAR<b>
141:	// ---
142:	}
143:	} else {
144:	// State 1.2.1: <div><b>
145:	// ---
146:	// Lookahead to see if <p> is needed.
147:	if ($this->_pLookAhead()) {
148:	// State 1.3.1: <div><b>PAR1\n\nPAR2
149:	// ---
150:	$token = array($this->_pStart(), $token);
151:	} else {
152:	// State 1.3.2: <div><b>PAR1</b></div>
153:	// ---
154:
155:	// State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>
156:	// ---
157:	}
158:	}
159:	} else {
160:	// State 2.3: ...<div>
161:	// -----
162:	}
163:	} else {
164:	if ($this->_isInline($token)) {
165:	// State 3.1: <b>
166:	// ---
167:	// This is where the {p} tag is inserted, not reflected in
168:	// inputTokens yet, however.
169:	$token = array($this->_pStart(), $token);
170:	} else {
171:	// State 3.2: <div>
172:	// -----
173:	}
174:
175:	$i = null;
176:	if ($this->backward($i, $prev)) {
177:	if (!$prev instanceof HTMLPurifier_Token_Text) {
178:	// State 3.1.1: ...</p>{p}<b>
179:	// ---
180:	// State 3.2.1: ...</p><div>
181:	// -----
182:	if (!is_array($token)) {
183:	$token = array($token);
184:	}
185:	array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
186:	} else {
187:	// State 3.1.2: ...</p>\n\n{p}<b>
188:	// ---
189:	// State 3.2.2: ...</p>\n\n<div>
190:	// -----
191:	// Note: PAR<ELEM> cannot occur because PAR would have been
192:	// wrapped in <p> tags.
193:	}
194:	}
195:	}
196:	} else {
197:	// State 2.2: <ul><li>
198:	// ----
199:	// State 2.4: <p><b>
200:	// ---
201:	}
202:	}
203:
204:	/**
205:	* Splits up a text in paragraph tokens and appends them
206:	* to the result stream that will replace the original
207:	* @param string $data String text data that will be processed
208:	* into paragraphs
209:	* @param HTMLPurifier_Token[] $result Reference to array of tokens that the
210:	* tags will be appended onto
211:	*/
212:	private function _splitText($data, &$result)
213:	{
214:	$raw_paragraphs = explode("\n\n", $data);
215:	$paragraphs = array(); // without empty paragraphs
216:	$needs_start = false;
217:	$needs_end = false;
218:
219:	$c = count($raw_paragraphs);
220:	if ($c == 1) {
221:	// There were no double-newlines, abort quickly. In theory this
222:	// should never happen.
223:	$result[] = new HTMLPurifier_Token_Text($data);
224:	return;
225:	}
226:	for ($i = 0; $i < $c; $i++) {
227:	$par = $raw_paragraphs[$i];
228:	if (trim($par) !== '') {
229:	$paragraphs[] = $par;
230:	} else {
231:	if ($i == 0) {
232:	// Double newline at the front
233:	if (empty($result)) {
234:	// The empty result indicates that the AutoParagraph
235:	// injector did not add any start paragraph tokens.
236:	// This means that we have been in a paragraph for
237:	// a while, and the newline means we should start a new one.
238:	$result[] = new HTMLPurifier_Token_End('p');
239:	$result[] = new HTMLPurifier_Token_Text("\n\n");
240:	// However, the start token should only be added if
241:	// there is more processing to be done (i.e. there are
242:	// real paragraphs in here). If there are none, the
243:	// next start paragraph tag will be handled by the
244:	// next call to the injector
245:	$needs_start = true;
246:	} else {
247:	// We just started a new paragraph!
248:	// Reinstate a double-newline for presentation's sake, since
249:	// it was in the source code.
250:	array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
251:	}
252:	} elseif ($i + 1 == $c) {
253:	// Double newline at the end
254:	// There should be a trailing </p> when we're finally done.
255:	$needs_end = true;
256:	}
257:	}
258:	}
259:
260:	// Check if this was just a giant blob of whitespace. Move this earlier,
261:	// perhaps?
262:	if (empty($paragraphs)) {
263:	return;
264:	}
265:
266:	// Add the start tag indicated by \n\n at the beginning of $data
267:	if ($needs_start) {
268:	$result[] = $this->_pStart();
269:	}
270:
271:	// Append the paragraphs onto the result
272:	foreach ($paragraphs as $par) {
273:	$result[] = new HTMLPurifier_Token_Text($par);
274:	$result[] = new HTMLPurifier_Token_End('p');
275:	$result[] = new HTMLPurifier_Token_Text("\n\n");
276:	$result[] = $this->_pStart();
277:	}
278:
279:	// Remove trailing start token; Injector will handle this later if
280:	// it was indeed needed. This prevents from needing to do a lookahead,
281:	// at the cost of a lookbehind later.
282:	array_pop($result);
283:
284:	// If there is no need for an end tag, remove all of it and let
285:	// MakeWellFormed close it later.
286:	if (!$needs_end) {
287:	array_pop($result); // removes \n\n
288:	array_pop($result); // removes </p>
289:	}
290:	}
291:
292:	/**
293:	* Returns true if passed token is inline (and, ergo, allowed in
294:	* paragraph tags)
295:	* @param HTMLPurifier_Token $token
296:	* @return bool
297:	*/
298:	private function _isInline($token)
299:	{
300:	return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
301:	}
302:
303:	/**
304:	* Looks ahead in the token list and determines whether or not we need
305:	* to insert a <p> tag.
306:	* @return bool
307:	*/
308:	private function _pLookAhead()
309:	{
310:	if ($this->currentToken instanceof HTMLPurifier_Token_Start) {
311:	$nesting = 1;
312:	} else {
313:	$nesting = 0;
314:	}
315:	$ok = false;
316:	$i = null;
317:	while ($this->forwardUntilEndToken($i, $current, $nesting)) {
318:	$result = $this->_checkNeedsP($current);
319:	if ($result !== null) {
320:	$ok = $result;
321:	break;
322:	}
323:	}
324:	return $ok;
325:	}
326:
327:	/**
328:	* Determines if a particular token requires an earlier inline token
329:	* to get a paragraph. This should be used with _forwardUntilEndToken
330:	* @param HTMLPurifier_Token $current
331:	* @return bool
332:	*/
333:	private function _checkNeedsP($current)
334:	{
335:	if ($current instanceof HTMLPurifier_Token_Start) {
336:	if (!$this->_isInline($current)) {
337:	// <div>PAR1<div>
338:	// ----
339:	// Terminate early, since we hit a block element
340:	return false;
341:	}
342:	} elseif ($current instanceof HTMLPurifier_Token_Text) {
343:	if (strpos($current->data, "\n\n") !== false) {
344:	// <div>PAR1<b>PAR1\n\nPAR2
345:	// ----
346:	return true;
347:	} else {
348:	// <div>PAR1<b>PAR1...
349:	// ----
350:	}
351:	}
352:	return null;
353:	}
354:	}
355:
356:	// vim: et sw=4 sts=4
357:

Namespaces

Classes

Interfaces

Exceptions

Functions