1: <?php
2: /**
3: * Washes strings from unwanted noise.
4: *
5: * Helpful methods to make unsafe strings usable.
6: *
7: * PHP 5
8: *
9: * CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
10: * Copyright (c) Cake Software Foundation, Inc. (http://cakefoundation.org)
11: *
12: * Licensed under The MIT License
13: * For full copyright and license information, please see the LICENSE.txt
14: * Redistributions of files must retain the above copyright notice.
15: *
16: * @copyright Copyright (c) Cake Software Foundation, Inc. (http://cakefoundation.org)
17: * @link http://cakephp.org CakePHP(tm) Project
18: * @package Cake.Utility
19: * @since CakePHP(tm) v 0.10.0.1076
20: * @license http://www.opensource.org/licenses/mit-license.php MIT License
21: */
22:
23: App::import('Model', 'ConnectionManager');
24:
25: /**
26: * Data Sanitization.
27: *
28: * Removal of alphanumeric characters, SQL-safe slash-added strings, HTML-friendly strings,
29: * and all of the above on arrays.
30: *
31: * @package Cake.Utility
32: */
33: class Sanitize {
34:
35: /**
36: * Removes any non-alphanumeric characters.
37: *
38: * @param string $string String to sanitize
39: * @param array $allowed An array of additional characters that are not to be removed.
40: * @return string Sanitized string
41: */
42: public static function paranoid($string, $allowed = array()) {
43: $allow = null;
44: if (!empty($allowed)) {
45: foreach ($allowed as $value) {
46: $allow .= "\\$value";
47: }
48: }
49:
50: if (!is_array($string)) {
51: return preg_replace("/[^{$allow}a-zA-Z0-9]/", '', $string);
52: }
53:
54: $cleaned = array();
55: foreach ($string as $key => $clean) {
56: $cleaned[$key] = preg_replace("/[^{$allow}a-zA-Z0-9]/", '', $clean);
57: }
58:
59: return $cleaned;
60: }
61:
62: /**
63: * Makes a string SQL-safe.
64: *
65: * @param string $string String to sanitize
66: * @param string $connection Database connection being used
67: * @return string SQL safe string
68: */
69: public static function escape($string, $connection = 'default') {
70: if (is_numeric($string) || $string === null || is_bool($string)) {
71: return $string;
72: }
73: $db = ConnectionManager::getDataSource($connection);
74: $string = $db->value($string, 'string');
75: $start = 1;
76: if ($string{0} === 'N') {
77: $start = 2;
78: }
79:
80: return substr(substr($string, $start), 0, -1);
81: }
82:
83: /**
84: * Returns given string safe for display as HTML. Renders entities.
85: *
86: * strip_tags() does not validating HTML syntax or structure, so it might strip whole passages
87: * with broken HTML.
88: *
89: * ### Options:
90: *
91: * - remove (boolean) if true strips all HTML tags before encoding
92: * - charset (string) the charset used to encode the string
93: * - quotes (int) see http://php.net/manual/en/function.htmlentities.php
94: * - double (boolean) double encode html entities
95: *
96: * @param string $string String from where to strip tags
97: * @param array $options Array of options to use.
98: * @return string Sanitized string
99: */
100: public static function html($string, $options = array()) {
101: static $defaultCharset = false;
102: if ($defaultCharset === false) {
103: $defaultCharset = Configure::read('App.encoding');
104: if ($defaultCharset === null) {
105: $defaultCharset = 'UTF-8';
106: }
107: }
108: $default = array(
109: 'remove' => false,
110: 'charset' => $defaultCharset,
111: 'quotes' => ENT_QUOTES,
112: 'double' => true
113: );
114:
115: $options = array_merge($default, $options);
116:
117: if ($options['remove']) {
118: $string = strip_tags($string);
119: }
120:
121: return htmlentities($string, $options['quotes'], $options['charset'], $options['double']);
122: }
123:
124: /**
125: * Strips extra whitespace from output
126: *
127: * @param string $str String to sanitize
128: * @return string whitespace sanitized string
129: */
130: public static function stripWhitespace($str) {
131: return preg_replace('/\s{2,}/u', ' ', preg_replace('/[\n\r\t]+/', '', $str));
132: }
133:
134: /**
135: * Strips image tags from output
136: *
137: * @param string $str String to sanitize
138: * @return string Sting with images stripped.
139: */
140: public static function stripImages($str) {
141: $preg = array(
142: '/(<a[^>]*>)(<img[^>]+alt=")([^"]*)("[^>]*>)(<\/a>)/i' => '$1$3$5<br />',
143: '/(<img[^>]+alt=")([^"]*)("[^>]*>)/i' => '$2<br />',
144: '/<img[^>]*>/i' => ''
145: );
146:
147: return preg_replace(array_keys($preg), array_values($preg), $str);
148: }
149:
150: /**
151: * Strips scripts and stylesheets from output
152: *
153: * @param string $str String to sanitize
154: * @return string String with <link>, <img>, <script>, <style> elements and html comments removed.
155: */
156: public static function stripScripts($str) {
157: $regex =
158: '/(<link[^>]+rel="[^"]*stylesheet"[^>]*>|' .
159: '<img[^>]*>|style="[^"]*")|' .
160: '<script[^>]*>.*?<\/script>|' .
161: '<style[^>]*>.*?<\/style>|' .
162: '<!--.*?-->/is';
163: return preg_replace($regex, '', $str);
164: }
165:
166: /**
167: * Strips extra whitespace, images, scripts and stylesheets from output
168: *
169: * @param string $str String to sanitize
170: * @return string sanitized string
171: */
172: public static function stripAll($str) {
173: return Sanitize::stripScripts(
174: Sanitize::stripImages(
175: Sanitize::stripWhitespace($str)
176: )
177: );
178: }
179:
180: /**
181: * Strips the specified tags from output. First parameter is string from
182: * where to remove tags. All subsequent parameters are tags.
183: *
184: * Ex.`$clean = Sanitize::stripTags($dirty, 'b', 'p', 'div');`
185: *
186: * Will remove all `<b>`, `<p>`, and `<div>` tags from the $dirty string.
187: *
188: * @param string $str,... String to sanitize
189: * @return string sanitized String
190: */
191: public static function stripTags($str) {
192: $params = func_get_args();
193:
194: for ($i = 1, $count = count($params); $i < $count; $i++) {
195: $str = preg_replace('/<' . $params[$i] . '\b[^>]*>/i', '', $str);
196: $str = preg_replace('/<\/' . $params[$i] . '[^>]*>/i', '', $str);
197: }
198: return $str;
199: }
200:
201: /**
202: * Sanitizes given array or value for safe input. Use the options to specify
203: * the connection to use, and what filters should be applied (with a boolean
204: * value). Valid filters:
205: *
206: * - odd_spaces - removes any non space whitespace characters
207: * - encode - Encode any html entities. Encode must be true for the `remove_html` to work.
208: * - dollar - Escape `$` with `\$`
209: * - carriage - Remove `\r`
210: * - unicode -
211: * - escape - Should the string be SQL escaped.
212: * - backslash -
213: * - remove_html - Strip HTML with strip_tags. `encode` must be true for this option to work.
214: *
215: * @param string|array $data Data to sanitize
216: * @param string|array $options If string, DB connection being used, otherwise set of options
217: * @return mixed Sanitized data
218: */
219: public static function clean($data, $options = array()) {
220: if (empty($data)) {
221: return $data;
222: }
223:
224: if (!is_array($options)) {
225: $options = array('connection' => $options);
226: }
227:
228: $options = array_merge(array(
229: 'connection' => 'default',
230: 'odd_spaces' => true,
231: 'remove_html' => false,
232: 'encode' => true,
233: 'dollar' => true,
234: 'carriage' => true,
235: 'unicode' => true,
236: 'escape' => true,
237: 'backslash' => true
238: ), $options);
239:
240: if (is_array($data)) {
241: foreach ($data as $key => $val) {
242: $data[$key] = Sanitize::clean($val, $options);
243: }
244: return $data;
245: }
246:
247: if ($options['odd_spaces']) {
248: $data = str_replace(chr(0xCA), '', $data);
249: }
250: if ($options['encode']) {
251: $data = Sanitize::html($data, array('remove' => $options['remove_html']));
252: }
253: if ($options['dollar']) {
254: $data = str_replace("\\\$", "$", $data);
255: }
256: if ($options['carriage']) {
257: $data = str_replace("\r", "", $data);
258: }
259: if ($options['unicode']) {
260: $data = preg_replace("/&#([0-9]+);/s", "&#\\1;", $data);
261: }
262: if ($options['escape']) {
263: $data = Sanitize::escape($data, $options['connection']);
264: }
265: if ($options['backslash']) {
266: $data = preg_replace("/\\\(?!&#|\?#)/", "\\", $data);
267: }
268: return $data;
269: }
270: }
271: