File xoops_lib\modules\protector\library\HTMLPurifier\AttrDef\CSS\FontFamily.php

1:	<?php
2:
3:	/**
4:	* Validates a font family list according to CSS spec
5:	*/
6:	class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
7:	{
8:
9:	protected $mask = null;
10:
11:	public function __construct()
12:	{
13:	$this->mask = '_- ';
14:	for ($c = 'a'; $c <= 'z'; $c++) {
15:	$this->mask .= $c;
16:	}
17:	for ($c = 'A'; $c <= 'Z'; $c++) {
18:	$this->mask .= $c;
19:	}
20:	for ($c = '0'; $c <= '9'; $c++) {
21:	$this->mask .= $c;
22:	} // cast-y, but should be fine
23:	// special bytes used by UTF-8
24:	for ($i = 0x80; $i <= 0xFF; $i++) {
25:	// We don't bother excluding invalid bytes in this range,
26:	// because the our restriction of well-formed UTF-8 will
27:	// prevent these from ever occurring.
28:	$this->mask .= chr($i);
29:	}
30:
31:	/*
32:	PHP's internal strcspn implementation is
33:	O(length of string * length of mask), making it inefficient
34:	for large masks. However, it's still faster than
35:	preg_match 8)
36:	for (p = s1;;) {
37:	spanp = s2;
38:	do {
39:	if (*spanp == c \|\| p == s1_end) {
40:	return p - s1;
41:	}
42:	} while (spanp++ < (s2_end - 1));
43:	c = *++p;
44:	}
45:	*/
46:	// possible optimization: invert the mask.
47:	}
48:
49:	/**
50:	* @param string $string
51:	* @param HTMLPurifier_Config $config
52:	* @param HTMLPurifier_Context $context
53:	* @return bool\|string
54:	*/
55:	public function validate($string, $config, $context)
56:	{
57:	static $generic_names = array(
58:	'serif' => true,
59:	'sans-serif' => true,
60:	'monospace' => true,
61:	'fantasy' => true,
62:	'cursive' => true
63:	);
64:	$allowed_fonts = $config->get('CSS.AllowedFonts');
65:
66:	// assume that no font names contain commas in them
67:	$fonts = explode(',', $string);
68:	$final = '';
69:	foreach ($fonts as $font) {
70:	$font = trim($font);
71:	if ($font === '') {
72:	continue;
73:	}
74:	// match a generic name
75:	if (isset($generic_names[$font])) {
76:	if ($allowed_fonts === null \|\| isset($allowed_fonts[$font])) {
77:	$final .= $font . ', ';
78:	}
79:	continue;
80:	}
81:	// match a quoted name
82:	if ($font[0] === '"' \|\| $font[0] === "'") {
83:	$length = strlen($font);
84:	if ($length <= 2) {
85:	continue;
86:	}
87:	$quote = $font[0];
88:	if ($font[$length - 1] !== $quote) {
89:	continue;
90:	}
91:	$font = substr($font, 1, $length - 2);
92:	}
93:
94:	$font = $this->expandCSSEscape($font);
95:
96:	// $font is a pure representation of the font name
97:
98:	if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
99:	continue;
100:	}
101:
102:	if (ctype_alnum($font) && $font !== '') {
103:	// very simple font, allow it in unharmed
104:	$final .= $font . ', ';
105:	continue;
106:	}
107:
108:	// bugger out on whitespace. form feed (0C) really
109:	// shouldn't show up regardless
110:	$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
111:
112:	// Here, there are various classes of characters which need
113:	// to be treated differently:
114:	// - Alphanumeric characters are essentially safe. We
115:	// handled these above.
116:	// - Spaces require quoting, though most parsers will do
117:	// the right thing if there aren't any characters that
118:	// can be misinterpreted
119:	// - Dashes rarely occur, but they fairly unproblematic
120:	// for parsing/rendering purposes.
121:	// The above characters cover the majority of Western font
122:	// names.
123:	// - Arbitrary Unicode characters not in ASCII. Because
124:	// most parsers give little thought to Unicode, treatment
125:	// of these codepoints is basically uniform, even for
126:	// punctuation-like codepoints. These characters can
127:	// show up in non-Western pages and are supported by most
128:	// major browsers, for example: "ＭＳ明朝" is a
129:	// legitimate font-name
130:	// <http://ja.wikipedia.org/wiki/MS_明朝>. See
131:	// the CSS3 spec for more examples:
132:	// <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
133:	// You can see live samples of these on the Internet:
134:	// <http://www.google.co.jp/search?q=font-family+ＭＳ+明朝\|ゴシック>
135:	// However, most of these fonts have ASCII equivalents:
136:	// for example, 'MS Mincho', and it's considered
137:	// professional to use ASCII font names instead of
138:	// Unicode font names. Thanks Takeshi Terada for
139:	// providing this information.
140:	// The following characters, to my knowledge, have not been
141:	// used to name font names.
142:	// - Single quote. While theoretically you might find a
143:	// font name that has a single quote in its name (serving
144:	// as an apostrophe, e.g. Dave's Scribble), I haven't
145:	// been able to find any actual examples of this.
146:	// Internet Explorer's cssText translation (which I
147:	// believe is invoked by innerHTML) normalizes any
148:	// quoting to single quotes, and fails to escape single
149:	// quotes. (Note that this is not IE's behavior for all
150:	// CSS properties, just some sort of special casing for
151:	// font-family). So a single quote cannot be used
152:	// safely in the font-family context if there will be an
153:	// innerHTML/cssText translation. Note that Firefox 3.x
154:	// does this too.
155:	// - Double quote. In IE, these get normalized to
156:	// single-quotes, no matter what the encoding. (Fun
157:	// fact, in IE8, the 'content' CSS property gained
158:	// support, where they special cased to preserve encoded
159:	// double quotes, but still translate unadorned double
160:	// quotes into single quotes.) So, because their
161:	// fixpoint behavior is identical to single quotes, they
162:	// cannot be allowed either. Firefox 3.x displays
163:	// single-quote style behavior.
164:	// - Backslashes are reduced by one (so \\ -> \) every
165:	// iteration, so they cannot be used safely. This shows
166:	// up in IE7, IE8 and FF3
167:	// - Semicolons, commas and backticks are handled properly.
168:	// - The rest of the ASCII punctuation is handled properly.
169:	// We haven't checked what browsers do to unadorned
170:	// versions, but this is not important as long as the
171:	// browser doesn't /remove/ surrounding quotes (as IE does
172:	// for HTML).
173:	//
174:	// With these results in hand, we conclude that there are
175:	// various levels of safety:
176:	// - Paranoid: alphanumeric, spaces and dashes(?)
177:	// - International: Paranoid + non-ASCII Unicode
178:	// - Edgy: Everything except quotes, backslashes
179:	// - NoJS: Standards compliance, e.g. sod IE. Note that
180:	// with some judicious character escaping (since certain
181:	// types of escaping doesn't work) this is theoretically
182:	// OK as long as innerHTML/cssText is not called.
183:	// We believe that international is a reasonable default
184:	// (that we will implement now), and once we do more
185:	// extensive research, we may feel comfortable with dropping
186:	// it down to edgy.
187:
188:	// Edgy: alphanumeric, spaces, dashes, underscores and Unicode. Use of
189:	// str(c)spn assumes that the string was already well formed
190:	// Unicode (which of course it is).
191:	if (strspn($font, $this->mask) !== strlen($font)) {
192:	continue;
193:	}
194:
195:	// Historical:
196:	// In the absence of innerHTML/cssText, these ugly
197:	// transforms don't pose a security risk (as \\ and \"
198:	// might--these escapes are not supported by most browsers).
199:	// We could try to be clever and use single-quote wrapping
200:	// when there is a double quote present, but I have choosen
201:	// not to implement that. (NOTE: you can reduce the amount
202:	// of escapes by one depending on what quoting style you use)
203:	// $font = str_replace('\\', '\\5C ', $font);
204:	// $font = str_replace('"', '\\22 ', $font);
205:	// $font = str_replace("'", '\\27 ', $font);
206:
207:	// font possibly with spaces, requires quoting
208:	$final .= "'$font', ";
209:	}
210:	$final = rtrim($final, ', ');
211:	if ($final === '') {
212:	return false;
213:	}
214:	return $final;
215:	}
216:
217:	}
218:
219:	// vim: et sw=4 sts=4
220:

Namespaces

Classes

Interfaces

Exceptions

Functions