1: <?php
2: /**
3: * Zend Framework (http://framework.zend.com/)
4: *
5: * @link http://github.com/zendframework/zf2 for the canonical source repository
6: * @copyright Copyright (c) 2005-2013 Zend Technologies USA Inc. (http://www.zend.com)
7: * @license http://framework.zend.com/license/new-bsd New BSD License
8: */
9:
10: namespace Wei;
11:
12: /**
13: * Context specific methods for use in secure output escaping
14: *
15: * The escape is derived from code of the Zend Framework 2.1.5 - 2.2.2
16: *
17: * @link https://github.com/zendframework/zf2/blob/master/library/Zend/Escaper/Escaper.php
18: */
19: class E extends Base
20: {
21: /**
22: * Entity Map mapping Unicode codepoints to any available named HTML entities.
23: *
24: * While HTML supports far more named entities, the lowest common denominator
25: * has become HTML5's XML Serialisation which is restricted to the those named
26: * entities that XML supports. Using HTML entities would result in this error:
27: * XML Parsing Error: undefined entity
28: *
29: * @var array
30: */
31: protected static $htmlNamedEntityMap = array(
32: 34 => 'quot', // quotation mark
33: 38 => 'amp', // ampersand
34: 60 => 'lt', // less-than sign
35: 62 => 'gt', // greater-than sign
36: );
37:
38: /**
39: * Current encoding for escaping. If not UTF-8, we convert strings from this encoding
40: * pre-escaping and back to this encoding post-escaping.
41: *
42: * @var string
43: */
44: protected $encoding = 'utf-8';
45:
46: /**
47: * Holds the value of the special flags passed as second parameter to
48: * htmlspecialchars(). We modify these for PHP 5.4 to take advantage
49: * of the new ENT_SUBSTITUTE flag for correctly dealing with invalid
50: * UTF-8 sequences.
51: *
52: * @var string
53: */
54: protected $htmlSpecialCharsFlags = ENT_QUOTES;
55:
56: /**
57: * Static Matcher which escapes characters for HTML Attribute contexts
58: *
59: * @var callable
60: */
61: protected $htmlAttrMatcher;
62:
63: /**
64: * Static Matcher which escapes characters for Javascript contexts
65: *
66: * @var callable
67: */
68: protected $jsMatcher;
69:
70: /**
71: * Static Matcher which escapes characters for CSS Attribute contexts
72: *
73: * @var callable
74: */
75: protected $cssMatcher;
76:
77: /**
78: * List of all encoding supported by this class
79: *
80: * @var array
81: */
82: protected $supportedEncodings = array(
83: 'iso-8859-1', 'iso8859-1', 'iso-8859-5', 'iso8859-5',
84: 'iso-8859-15', 'iso8859-15', 'utf-8', 'cp866',
85: 'ibm866', '866', 'cp1251', 'windows-1251',
86: 'win-1251', '1251', 'cp1252', 'windows-1252',
87: '1252', 'koi8-r', 'koi8-ru', 'koi8r',
88: 'big5', '950', 'gb2312', '936',
89: 'big5-hkscs', 'shift_jis', 'sjis', 'sjis-win',
90: 'cp932', '932', 'euc-jp', 'eucjp',
91: 'eucjp-win', 'macroman'
92: );
93:
94: /**
95: * Constructor: Single parameter allows setting of global encoding for use by
96: * the current object. If PHP 5.4 is detected, additional ENT_SUBSTITUTE flag
97: * is set for htmlspecialchars() calls.
98: *
99: * @param array $options
100: * @throws \InvalidArgumentException
101: */
102: public function __construct($options = array())
103: {
104: parent::__construct($options);
105:
106: $encoding = $this->encoding;
107:
108: if ($encoding !== null) {
109: $encoding = (string) $encoding;
110: if ($encoding === '') {
111: throw new \InvalidArgumentException(
112: get_class($this) . ' constructor parameter does not allow a blank value'
113: );
114: }
115:
116: $encoding = strtolower($encoding);
117: if (!in_array($encoding, $this->supportedEncodings)) {
118: throw new \InvalidArgumentException(
119: 'Value of \'' . $encoding . '\' passed to ' . get_class($this)
120: . ' constructor parameter is invalid. Provide an encoding supported by htmlspecialchars()'
121: );
122: }
123:
124: $this->encoding = $encoding;
125: }
126:
127: if (defined('ENT_SUBSTITUTE')) {
128: $this->htmlSpecialCharsFlags|= ENT_SUBSTITUTE;
129: }
130:
131: // set matcher callbacks
132: $this->htmlAttrMatcher = array($this, 'htmlAttrMatcher');
133: $this->jsMatcher = array($this, 'jsMatcher');
134: $this->cssMatcher = array($this, 'cssMatcher');
135: }
136:
137: /**
138: * Return the encoding that all output/input is expected to be encoded in.
139: *
140: * @return string
141: */
142: public function getEncoding()
143: {
144: return $this->encoding;
145: }
146:
147: /**
148: * Escape a string for the HTML Body context where there are very few characters
149: * of special meaning. Internally this will use htmlspecialchars().
150: *
151: * @param string $string
152: * @return string
153: */
154: public function html($string)
155: {
156: if (!$string) {
157: return $string;
158: }
159:
160: $result = htmlspecialchars($string, $this->htmlSpecialCharsFlags, $this->encoding);
161: return $result;
162: }
163:
164: /**
165: * Escape a string for the HTML Attribute context. We use an extended set of characters
166: * to escape that are not covered by htmlspecialchars() to cover cases where an attribute
167: * might be unquoted or quoted illegally (e.g. backticks are valid quotes for IE).
168: *
169: * @param string $string
170: * @return string
171: */
172: public function attr($string)
173: {
174: if (!$string) {
175: return $string;
176: }
177:
178: $string = $this->toUtf8($string);
179: if ($string === '' || ctype_digit($string)) {
180: return $string;
181: }
182:
183: $result = preg_replace_callback('/[^a-z0-9,\.\-_]/iSu', $this->htmlAttrMatcher, $string);
184: return $this->fromUtf8($result);
185: }
186:
187: /**
188: * Escape a string for the Javascript context. This does not use json_encode(). An extended
189: * set of characters are escaped beyond ECMAScript's rules for Javascript literal string
190: * escaping in order to prevent misinterpretation of Javascript as HTML leading to the
191: * injection of special characters and entities. The escaping used should be tolerant
192: * of cases where HTML escaping was not applied on top of Javascript escaping correctly.
193: * Backslash escaping is not used as it still leaves the escaped character as-is and so
194: * is not useful in a HTML context.
195: *
196: * @param string $string
197: * @return string
198: */
199: public function js($string)
200: {
201: if (!$string) {
202: return $string;
203: }
204:
205: $string = $this->toUtf8($string);
206: if ($string === '' || ctype_digit($string)) {
207: return $string;
208: }
209:
210: $result = preg_replace_callback('/[^a-z0-9,\._]/iSu', $this->jsMatcher, $string);
211: return $this->fromUtf8($result);
212: }
213:
214: /**
215: * Escape a string for the URI or Parameter contexts. This should not be used to escape
216: * an entire URI - only a subcomponent being inserted. The function is a simple proxy
217: * to rawurlencode() which now implements RFC 3986 since PHP 5.3 completely.
218: *
219: * @param string $string
220: * @return string
221: */
222: public function url($string)
223: {
224: if (!$string) {
225: return $string;
226: }
227:
228: return rawurlencode($string);
229: }
230:
231: /**
232: * Escape a string for the CSS context. CSS escaping can be applied to any string being
233: * inserted into CSS and escapes everything except alphanumerics.
234: *
235: * @param string $string
236: * @return string
237: */
238: public function css($string)
239: {
240: if (!$string) {
241: return $string;
242: }
243:
244: $string = $this->toUtf8($string);
245: if ($string === '' || ctype_digit($string)) {
246: return $string;
247: }
248:
249: $result = preg_replace_callback('/[^a-z0-9]/iSu', $this->cssMatcher, $string);
250: return $this->fromUtf8($result);
251: }
252:
253: /**
254: * Callback function for preg_replace_callback that applies HTML Attribute
255: * escaping to all matches.
256: *
257: * @param array $matches
258: * @return string
259: */
260: protected function htmlAttrMatcher($matches)
261: {
262: $chr = $matches[0];
263: $ord = ord($chr);
264:
265: /**
266: * The following replaces characters undefined in HTML with the
267: * hex entity for the Unicode replacement character.
268: */
269: if (($ord <= 0x1f && $chr != "\t" && $chr != "\n" && $chr != "\r")
270: || ($ord >= 0x7f && $ord <= 0x9f)
271: ) {
272: return '�';
273: }
274:
275: /**
276: * Check if the current character to escape has a name entity we should
277: * replace it with while grabbing the integer value of the character.
278: */
279: if (strlen($chr) > 1) {
280: $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
281: }
282:
283: $hex = bin2hex($chr);
284: $ord = hexdec($hex);
285: if (isset(static::$htmlNamedEntityMap[$ord])) {
286: return '&' . static::$htmlNamedEntityMap[$ord] . ';';
287: }
288:
289: /**
290: * Per OWASP recommendations, we'll use upper hex entities
291: * for any other characters where a named entity does not exist.
292: */
293: if ($ord > 255) {
294: return sprintf('&#x%04X;', $ord);
295: }
296: return sprintf('&#x%02X;', $ord);
297: }
298:
299: /**
300: * Callback function for preg_replace_callback that applies Javascript
301: * escaping to all matches.
302: *
303: * @param array $matches
304: * @return string
305: */
306: protected function jsMatcher($matches)
307: {
308: $chr = $matches[0];
309: if (strlen($chr) == 1) {
310: return sprintf('\\x%02X', ord($chr));
311: }
312: $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
313: return sprintf('\\u%04s', strtoupper(bin2hex($chr)));
314: }
315:
316: /**
317: * Callback function for preg_replace_callback that applies CSS
318: * escaping to all matches.
319: *
320: * @param array $matches
321: * @return string
322: */
323: protected function cssMatcher($matches)
324: {
325: $chr = $matches[0];
326: if (strlen($chr) == 1) {
327: $ord = ord($chr);
328: } else {
329: $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
330: $ord = hexdec(bin2hex($chr));
331: }
332: return sprintf('\\%X ', $ord);
333: }
334:
335: /**
336: * Converts a string to UTF-8 from the base encoding. The base encoding is set via this
337: * class' constructor.
338: *
339: * @param string $string
340: * @throws \RuntimeException
341: * @return string
342: */
343: protected function toUtf8($string)
344: {
345: if ($this->getEncoding() === 'utf-8') {
346: $result = $string;
347: } else {
348: $result = $this->convertEncoding($string, 'UTF-8', $this->getEncoding());
349: }
350:
351: if (!$this->isUtf8($result)) {
352: throw new \RuntimeException(sprintf(
353: 'String to be escaped was not valid UTF-8 or could not be converted: %s', $result
354: ));
355: }
356:
357: return $result;
358: }
359:
360: /**
361: * Converts a string from UTF-8 to the base encoding. The base encoding is set via this
362: * class' constructor.
363: * @param string $string
364: * @return string
365: */
366: protected function fromUtf8($string)
367: {
368: if ($this->getEncoding() === 'utf-8') {
369: return $string;
370: }
371:
372: return $this->convertEncoding($string, $this->getEncoding(), 'UTF-8');
373: }
374:
375: /**
376: * Checks if a given string appears to be valid UTF-8 or not.
377: *
378: * @param string $string
379: * @return bool
380: */
381: protected function isUtf8($string)
382: {
383: return ($string === '' || preg_match('/^./su', $string));
384: }
385:
386: /**
387: * Encoding conversion helper which wraps iconv and mbstring where they exist or throws
388: * and exception where neither is available.
389: *
390: * @param string $string
391: * @param string $to
392: * @param array|string $from
393: * @throws \RuntimeException
394: * @return string
395: */
396: protected function convertEncoding($string, $to, $from)
397: {
398: $result = '';
399: if (function_exists('iconv')) {
400: $result = iconv($from, $to, $string);
401: } elseif (function_exists('mb_convert_encoding')) {
402: $result = mb_convert_encoding($string, $to, $from);
403: } else {
404: throw new \RuntimeException(
405: get_class($this)
406: . ' requires either the iconv or mbstring extension to be installed'
407: . ' when escaping for non UTF-8 strings.'
408: );
409: }
410:
411: if ($result === false) {
412: return ''; // return non-fatal blank string on encoding errors from users
413: }
414: return $result;
415: }
416:
417: /**
418: * Escapes a string by specified type for secure output
419: *
420: * @param string $string
421: * @param string $type
422: * @return string
423: * @throws \InvalidArgumentException
424: */
425: public function __invoke($string, $type = 'html')
426: {
427: if (in_array($type, array('html', 'js', 'css', 'attr', 'url'))) {
428: return $this->$type($string);
429: }
430: throw new \InvalidArgumentException(sprintf('Unsupported escape type "%s"', $type));
431: }
432: }