Summery Summery
Used for data cleanup and post-processing
Syntax Syntax
Description Description
This class can be overloaded with SimplePie::set_sanitize_class()
Source Source
File: wp-includes/SimplePie/Sanitize.php
{ // Private vars var $base; // Options var $remove_div = true; var $image_handler = ''; var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'); var $encode_instead_of_strip = false; var $strip_attributes = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); var $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none')); var $strip_comments = false; var $output_encoding = 'UTF-8'; var $enable_cache = true; var $cache_location = './cache'; var $cache_name_function = 'md5'; var $timeout = 10; var $useragent = ''; var $force_fsockopen = false; var $replace_url_attributes = null; public function __construct() { // Set defaults $this->set_url_replacements(null); } public function remove_div($enable = true) { $this->remove_div = (bool) $enable; } public function set_image_handler($page = false) { if ($page) { $this->image_handler = (string) $page; } else { $this->image_handler = false; } } public function set_registry(SimplePie_Registry $registry) { $this->registry = $registry; } public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache') { if (isset($enable_cache)) { $this->enable_cache = (bool) $enable_cache; } if ($cache_location) { $this->cache_location = (string) $cache_location; } if ($cache_name_function) { $this->cache_name_function = (string) $cache_name_function; } } public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false) { if ($timeout) { $this->timeout = (string) $timeout; } if ($useragent) { $this->useragent = (string) $useragent; } if ($force_fsockopen) { $this->force_fsockopen = (string) $force_fsockopen; } } public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style')) { if ($tags) { if (is_array($tags)) { $this->strip_htmltags = $tags; } else { $this->strip_htmltags = explode(',', $tags); } } else { $this->strip_htmltags = false; } } public function encode_instead_of_strip($encode = false) { $this->encode_instead_of_strip = (bool) $encode; } public function strip_attributes($attribs = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc')) { if ($attribs) { if (is_array($attribs)) { $this->strip_attributes = $attribs; } else { $this->strip_attributes = explode(',', $attribs); } } else { $this->strip_attributes = false; } } public function add_attributes($attribs = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none'))) { if ($attribs) { if (is_array($attribs)) { $this->add_attributes = $attribs; } else { $this->add_attributes = explode(',', $attribs); } } else { $this->add_attributes = false; } } public function strip_comments($strip = false) { $this->strip_comments = (bool) $strip; } public function set_output_encoding($encoding = 'UTF-8') { $this->output_encoding = (string) $encoding; } /** * Set element/attribute key/value pairs of HTML attributes * containing URLs that need to be resolved relative to the feed * * Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite, * |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite, * |q|@cite * * @since 1.0 * @param array|null $element_attribute Element/attribute key/value pairs, null for default */ public function set_url_replacements($element_attribute = null) { if ($element_attribute === null) { $element_attribute = array( 'a' => 'href', 'area' => 'href', 'blockquote' => 'cite', 'del' => 'cite', 'form' => 'action', 'img' => array( 'longdesc', 'src' ), 'input' => 'src', 'ins' => 'cite', 'q' => 'cite' ); } $this->replace_url_attributes = (array) $element_attribute; } public function sanitize($data, $type, $base = '') { $data = trim($data); if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) { if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) { if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) { $type |= SIMPLEPIE_CONSTRUCT_HTML; } else { $type |= SIMPLEPIE_CONSTRUCT_TEXT; } } if ($type & SIMPLEPIE_CONSTRUCT_BASE64) { $data = base64_decode($data); } if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) { if (!class_exists('DOMDocument')) { throw new SimplePie_Exception('DOMDocument not found, unable to use sanitizer'); } $document = new DOMDocument(); $document->encoding = 'UTF-8'; $data = $this->preprocess($data, $type); set_error_handler(array('SimplePie_Misc', 'silence_errors')); $document->loadHTML($data); restore_error_handler(); $xpath = new DOMXPath($document); // Strip comments if ($this->strip_comments) { $comments = $xpath->query('//comment()'); foreach ($comments as $comment) { $comment->parentNode->removeChild($comment); } } // Strip out HTML tags and attributes that might cause various security problems. // Based on recommendations by Mark Pilgrim at: // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely if ($this->strip_htmltags) { foreach ($this->strip_htmltags as $tag) { $this->strip_tag($tag, $document, $xpath, $type); } } if ($this->strip_attributes) { foreach ($this->strip_attributes as $attrib) { $this->strip_attr($attrib, $xpath); } } if ($this->add_attributes) { foreach ($this->add_attributes as $tag => $valuePairs) { $this->add_attr($tag, $valuePairs, $document); } } // Replace relative URLs $this->base = $base; foreach ($this->replace_url_attributes as $element => $attributes) { $this->replace_urls($document, $element, $attributes); } // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags. if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache) { $images = $document->getElementsByTagName('img'); foreach ($images as $img) { if ($img->hasAttribute('src')) { $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src')); $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi')); if ($cache->load()) { $img->setAttribute('src', $this->image_handler . $image_url); } else { $file = $this->registry->create('File', array($img->getAttribute('src'), $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen)); $headers = $file->headers; if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) { if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) { $img->setAttribute('src', $this->image_handler . $image_url); } else { trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); } } } } } } // Get content node $div = $document->getElementsByTagName('body')->item(0)->firstChild; // Finally, convert to a HTML string $data = trim($document->saveHTML($div)); if ($this->remove_div) { $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data); $data = preg_replace('/<\/div>$/', '', $data); } else { $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data); } } if ($type & SIMPLEPIE_CONSTRUCT_IRI) { $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base)); if ($absolute !== false) { $data = $absolute; } } if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) { $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8'); } if ($this->output_encoding !== 'UTF-8') { $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding)); } } return $data; } protected function preprocess($html, $type) { $ret = ''; $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html); if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML) { // Atom XHTML constructs are wrapped with a div by default // Note: No protection if $html contains a stray </div>! $html = '<div>' . $html . '</div>'; $ret .= '<!DOCTYPE html>'; $content_type = 'text/html'; } else { $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'; $content_type = 'application/xhtml+xml'; } $ret .= '<html><head>'; $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />'; $ret .= '</head><body>' . $html . '</body></html>'; return $ret; } public function replace_urls($document, $tag, $attributes) { if (!is_array($attributes)) { $attributes = array($attributes); } if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) { $elements = $document->getElementsByTagName($tag); foreach ($elements as $element) { foreach ($attributes as $attribute) { if ($element->hasAttribute($attribute)) { $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base)); if ($value !== false) { $element->setAttribute($attribute, $value); } } } } } } public function do_strip_htmltags($match) { if ($this->encode_instead_of_strip) { if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) { $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8'); $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8'); return "<$match[1]$match[2]>$match[3]</$match[1]>"; } else { return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8'); } } elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) { return $match[4]; } else { return ''; } } protected function strip_tag($tag, $document, $xpath, $type) { $elements = $xpath->query('body//' . $tag); if ($this->encode_instead_of_strip) { foreach ($elements as $element) { $fragment = $document->createDocumentFragment(); // For elements which aren't script or style, include the tag itself if (!in_array($tag, array('script', 'style'))) { $text = '<' . $tag; if ($element->hasAttributes()) { $attrs = array(); foreach ($element->attributes as $name => $attr) { $value = $attr->value; // In XHTML, empty values should never exist, so we repeat the value if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML)) { $value = $name; } // For HTML, empty is fine elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML)) { $attrs[] = $name; continue; } // Standard attribute text $attrs[] = $name . '="' . $attr->value . '"'; } $text .= ' ' . implode(' ', $attrs); } $text .= '>'; $fragment->appendChild(new DOMText($text)); } $number = $element->childNodes->length; for ($i = $number; $i > 0; $i--) { $child = $element->childNodes->item(0); $fragment->appendChild($child); } if (!in_array($tag, array('script', 'style'))) { $fragment->appendChild(new DOMText('</' . $tag . '>')); } $element->parentNode->replaceChild($fragment, $element); } return; } elseif (in_array($tag, array('script', 'style'))) { foreach ($elements as $element) { $element->parentNode->removeChild($element); } return; } else { foreach ($elements as $element) { $fragment = $document->createDocumentFragment(); $number = $element->childNodes->length; for ($i = $number; $i > 0; $i--) { $child = $element->childNodes->item(0);