1: <?php
2: /**
3: * Washes strings from unwanted noise.
4: *
5: * Helpful methods to make unsafe strings usable.
6: *
7: * CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
8: * Copyright (c) Cake Software Foundation, Inc. (http://cakefoundation.org)
9: *
10: * Licensed under The MIT License
11: * For full copyright and license information, please see the LICENSE.txt
12: * Redistributions of files must retain the above copyright notice.
13: *
14: * @copyright Copyright (c) Cake Software Foundation, Inc. (http://cakefoundation.org)
15: * @link http://cakephp.org CakePHP(tm) Project
16: * @package Cake.Utility
17: * @since CakePHP(tm) v 0.10.0.1076
18: * @license http://www.opensource.org/licenses/mit-license.php MIT License
19: */
20:
21: App::uses('ConnectionManager', 'Model');
22:
23: /**
24: * Data Sanitization.
25: *
26: * Removal of alphanumeric characters, SQL-safe slash-added strings, HTML-friendly strings,
27: * and all of the above on arrays.
28: *
29: * @package Cake.Utility
30: * @deprecated 3.0.0 Deprecated since version 2.4
31: */
32: class Sanitize {
33:
34: /**
35: * Removes any non-alphanumeric characters.
36: *
37: * @param string $string String to sanitize
38: * @param array $allowed An array of additional characters that are not to be removed.
39: * @return string Sanitized string
40: */
41: public static function paranoid($string, $allowed = array()) {
42: $allow = null;
43: if (!empty($allowed)) {
44: foreach ($allowed as $value) {
45: $allow .= "\\$value";
46: }
47: }
48:
49: if (!is_array($string)) {
50: return preg_replace("/[^{$allow}a-zA-Z0-9]/", '', $string);
51: }
52:
53: $cleaned = array();
54: foreach ($string as $key => $clean) {
55: $cleaned[$key] = preg_replace("/[^{$allow}a-zA-Z0-9]/", '', $clean);
56: }
57:
58: return $cleaned;
59: }
60:
61: /**
62: * Makes a string SQL-safe.
63: *
64: * @param string $string String to sanitize
65: * @param string $connection Database connection being used
66: * @return string SQL safe string
67: */
68: public static function escape($string, $connection = 'default') {
69: if (is_numeric($string) || $string === null || is_bool($string)) {
70: return $string;
71: }
72: $db = ConnectionManager::getDataSource($connection);
73: $string = $db->value($string, 'string');
74: $start = 1;
75: if ($string{0} === 'N') {
76: $start = 2;
77: }
78:
79: return substr(substr($string, $start), 0, -1);
80: }
81:
82: /**
83: * Returns given string safe for display as HTML. Renders entities.
84: *
85: * strip_tags() does not validating HTML syntax or structure, so it might strip whole passages
86: * with broken HTML.
87: *
88: * ### Options:
89: *
90: * - remove (boolean) if true strips all HTML tags before encoding
91: * - charset (string) the charset used to encode the string
92: * - quotes (int) see http://php.net/manual/en/function.htmlentities.php
93: * - double (boolean) double encode html entities
94: *
95: * @param string $string String from where to strip tags
96: * @param array $options Array of options to use.
97: * @return string Sanitized string
98: */
99: public static function html($string, $options = array()) {
100: static $defaultCharset = false;
101: if ($defaultCharset === false) {
102: $defaultCharset = Configure::read('App.encoding');
103: if ($defaultCharset === null) {
104: $defaultCharset = 'UTF-8';
105: }
106: }
107: $defaults = array(
108: 'remove' => false,
109: 'charset' => $defaultCharset,
110: 'quotes' => ENT_QUOTES,
111: 'double' => true
112: );
113:
114: $options += $defaults;
115:
116: if ($options['remove']) {
117: $string = strip_tags($string);
118: }
119:
120: return htmlentities($string, $options['quotes'], $options['charset'], $options['double']);
121: }
122:
123: /**
124: * Strips extra whitespace from output
125: *
126: * @param string $str String to sanitize
127: * @return string whitespace sanitized string
128: */
129: public static function stripWhitespace($str) {
130: return preg_replace('/\s{2,}/u', ' ', preg_replace('/[\n\r\t]+/', '', $str));
131: }
132:
133: /**
134: * Strips image tags from output
135: *
136: * @param string $str String to sanitize
137: * @return string Sting with images stripped.
138: */
139: public static function stripImages($str) {
140: $preg = array(
141: '/(<a[^>]*>)(<img[^>]+alt=")([^"]*)("[^>]*>)(<\/a>)/i' => '$1$3$5<br />',
142: '/(<img[^>]+alt=")([^"]*)("[^>]*>)/i' => '$2<br />',
143: '/<img[^>]*>/i' => ''
144: );
145:
146: return preg_replace(array_keys($preg), array_values($preg), $str);
147: }
148:
149: /**
150: * Strips scripts and stylesheets from output
151: *
152: * @param string $str String to sanitize
153: * @return string String with <link>, <img>, <script>, <style> elements and html comments removed.
154: */
155: public static function stripScripts($str) {
156: $regex =
157: '/(<link[^>]+rel="[^"]*stylesheet"[^>]*>|' .
158: '<img[^>]*>|style="[^"]*")|' .
159: '<script[^>]*>.*?<\/script>|' .
160: '<style[^>]*>.*?<\/style>|' .
161: '<!--.*?-->/is';
162: return preg_replace($regex, '', $str);
163: }
164:
165: /**
166: * Strips extra whitespace, images, scripts and stylesheets from output
167: *
168: * @param string $str String to sanitize
169: * @return string sanitized string
170: */
171: public static function stripAll($str) {
172: return Sanitize::stripScripts(
173: Sanitize::stripImages(
174: Sanitize::stripWhitespace($str)
175: )
176: );
177: }
178:
179: /**
180: * Strips the specified tags from output. First parameter is string from
181: * where to remove tags. All subsequent parameters are tags.
182: *
183: * Ex.`$clean = Sanitize::stripTags($dirty, 'b', 'p', 'div');`
184: *
185: * Will remove all `<b>`, `<p>`, and `<div>` tags from the $dirty string.
186: *
187: * @param string $str String to sanitize.
188: * @return string sanitized String
189: */
190: public static function stripTags($str) {
191: $params = func_get_args();
192:
193: for ($i = 1, $count = count($params); $i < $count; $i++) {
194: $str = preg_replace('/<' . $params[$i] . '\b[^>]*>/i', '', $str);
195: $str = preg_replace('/<\/' . $params[$i] . '[^>]*>/i', '', $str);
196: }
197: return $str;
198: }
199:
200: /**
201: * Sanitizes given array or value for safe input. Use the options to specify
202: * the connection to use, and what filters should be applied (with a boolean
203: * value). Valid filters:
204: *
205: * - odd_spaces - removes any non space whitespace characters
206: * - encode - Encode any html entities. Encode must be true for the `remove_html` to work.
207: * - dollar - Escape `$` with `\$`
208: * - carriage - Remove `\r`
209: * - unicode -
210: * - escape - Should the string be SQL escaped.
211: * - backslash -
212: * - remove_html - Strip HTML with strip_tags. `encode` must be true for this option to work.
213: *
214: * @param string|array $data Data to sanitize
215: * @param string|array $options If string, DB connection being used, otherwise set of options
216: * @return mixed Sanitized data
217: */
218: public static function clean($data, $options = array()) {
219: if (empty($data)) {
220: return $data;
221: }
222:
223: if (!is_array($options)) {
224: $options = array('connection' => $options);
225: }
226:
227: $options += array(
228: 'connection' => 'default',
229: 'odd_spaces' => true,
230: 'remove_html' => false,
231: 'encode' => true,
232: 'dollar' => true,
233: 'carriage' => true,
234: 'unicode' => true,
235: 'escape' => true,
236: 'backslash' => true
237: );
238:
239: if (is_array($data)) {
240: foreach ($data as $key => $val) {
241: $data[$key] = Sanitize::clean($val, $options);
242: }
243: return $data;
244: }
245:
246: if ($options['odd_spaces']) {
247: $data = str_replace(chr(0xCA), '', $data);
248: }
249: if ($options['encode']) {
250: $data = Sanitize::html($data, array('remove' => $options['remove_html']));
251: }
252: if ($options['dollar']) {
253: $data = str_replace("\\\$", "$", $data);
254: }
255: if ($options['carriage']) {
256: $data = str_replace("\r", "", $data);
257: }
258: if ($options['unicode']) {
259: $data = preg_replace("/&#([0-9]+);/s", "&#\\1;", $data);
260: }
261: if ($options['escape']) {
262: $data = Sanitize::escape($data, $options['connection']);
263: }
264: if ($options['backslash']) {
265: $data = preg_replace("/\\\(?!&#|\?#)/", "\\", $data);
266: }
267: return $data;
268: }
269: }
270: