SimplePie_Sanitize::sanitize

Advertisement

Syntax Syntax

SimplePie_Sanitize::sanitize( $data,  $type,  $base = '' )

Source Source

File: wp-includes/SimplePie/Sanitize.php

		{
			$element_attribute = array(
				'a' => 'href',
				'area' => 'href',
				'blockquote' => 'cite',
				'del' => 'cite',
				'form' => 'action',
				'img' => array(
					'longdesc',
					'src'
				),
				'input' => 'src',
				'ins' => 'cite',
				'q' => 'cite'
			);
		}
		$this->replace_url_attributes = (array) $element_attribute;
	}

	public function sanitize($data, $type, $base = '')
	{
		$data = trim($data);
		if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI)
		{
			if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML)
			{
				if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data))
				{
					$type |= SIMPLEPIE_CONSTRUCT_HTML;
				}
				else
				{
					$type |= SIMPLEPIE_CONSTRUCT_TEXT;
				}
			}

			if ($type & SIMPLEPIE_CONSTRUCT_BASE64)
			{
				$data = base64_decode($data);
			}

			if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
			{

				if (!class_exists('DOMDocument'))
				{
					throw new SimplePie_Exception('DOMDocument not found, unable to use sanitizer');
				}
				$document = new DOMDocument();
				$document->encoding = 'UTF-8';

				$data = $this->preprocess($data, $type);

				set_error_handler(array('SimplePie_Misc', 'silence_errors'));
				$document->loadHTML($data);
				restore_error_handler();

				$xpath = new DOMXPath($document);

				// Strip comments
				if ($this->strip_comments)
				{
					$comments = $xpath->query('//comment()');

					foreach ($comments as $comment)
					{
						$comment->parentNode->removeChild($comment);
					}
				}

				// Strip out HTML tags and attributes that might cause various security problems.
				// Based on recommendations by Mark Pilgrim at:
				// http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
				if ($this->strip_htmltags)
				{
					foreach ($this->strip_htmltags as $tag)
					{
						$this->strip_tag($tag, $document, $xpath, $type);
					}
				}

				if ($this->strip_attributes)
				{
					foreach ($this->strip_attributes as $attrib)
					{
						$this->strip_attr($attrib, $xpath);
					}
				}

				if ($this->add_attributes)
				{
					foreach ($this->add_attributes as $tag => $valuePairs)
					{
						$this->add_attr($tag, $valuePairs, $document);
					}
				}

				// Replace relative URLs
				$this->base = $base;
				foreach ($this->replace_url_attributes as $element => $attributes)
				{
					$this->replace_urls($document, $element, $attributes);
				}

				// If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
				if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache)
				{
					$images = $document->getElementsByTagName('img');
					foreach ($images as $img)
					{
						if ($img->hasAttribute('src'))
						{
							$image_url = call_user_func($this->cache_name_function, $img->getAttribute('src'));
							$cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi'));

							if ($cache->load())
							{
								$img->setAttribute('src', $this->image_handler . $image_url);
							}
							else
							{
								$file = $this->registry->create('File', array($img->getAttribute('src'), $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen));
								$headers = $file->headers;

								if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
								{
									if ($cache->save(array('headers' => $file->headers, 'body' => $file->body)))
									{
										$img->setAttribute('src', $this->image_handler . $image_url);
									}
									else
									{
										trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
									}
								}
							}
						}
					}
				}

				// Get content node
				$div = $document->getElementsByTagName('body')->item(0)->firstChild;
				// Finally, convert to a HTML string
				$data = trim($document->saveHTML($div));

				if ($this->remove_div)
				{
					$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
					$data = preg_replace('/<\/div>$/', '', $data);
				}
				else
				{
					$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
				}
			}

			if ($type & SIMPLEPIE_CONSTRUCT_IRI)
			{

Advertisement

Advertisement

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.