Summery Summery
Replace invalid character with percent encoding
Syntax Syntax
Parameters Parameters
- $string
-
(Required) Input string
- $extra_chars
-
(Required) Valid characters not in iunreserved or iprivate (this is ASCII-only)
- $iprivate
-
(Optional) Allow iprivate
Default value: false
Return Return
(string)
Source Source
File: wp-includes/Requests/IRI.php
protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false) { // Normalize as many pct-encoded sections as possible $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array(&$this, 'remove_iunreserved_percent_encoded'), $string); // Replace invalid percent characters $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string); // Add unreserved and % to $extra_chars (the latter is safe because all // pct-encoded sections are now valid). $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%'; // Now replace any bytes that aren't allowed with their pct-encoded versions $position = 0; $strlen = strlen($string); while (($position += strspn($string, $extra_chars, $position)) < $strlen) { $value = ord($string[$position]); // Start position $start = $position; // By default we are valid $valid = true; // No one byte sequences are valid due to the while. // Two byte sequence: if (($value & 0xE0) === 0xC0) { $character = ($value & 0x1F) << 6; $length = 2; $remaining = 1; } // Three byte sequence: elseif (($value & 0xF0) === 0xE0) { $character = ($value & 0x0F) << 12; $length = 3; $remaining = 2; } // Four byte sequence: elseif (($value & 0xF8) === 0xF0) { $character = ($value & 0x07) << 18; $length = 4; $remaining = 3; } // Invalid byte: else { $valid = false; $length = 1; $remaining = 0; } if ($remaining) { if ($position + $length <= $strlen) { for ($position++; $remaining; $position++) { $value = ord($string[$position]); // Check that the byte is valid, then add it to the character: if (($value & 0xC0) === 0x80) { $character |= ($value & 0x3F) << (--$remaining * 6); } // If it is invalid, count the sequence as invalid and reprocess the current byte: else { $valid = false; $position--; break; } } } else { $position = $strlen - 1; $valid = false; } } // Percent encode anything invalid or not in ucschar if ( // Invalid sequences !$valid // Non-shortest form sequences are invalid || $length > 1 && $character <= 0x7F || $length > 2 && $character <= 0x7FF || $length > 3 && $character <= 0xFFFF // Outside of range of ucschar codepoints // Noncharacters || ($character & 0xFFFE) === 0xFFFE || $character >= 0xFDD0 && $character <= 0xFDEF || ( // Everything else not in ucschar $character > 0xD7FF && $character < 0xF900 || $character < 0xA0 || $character > 0xEFFFD ) && ( // Everything not in iprivate, if it applies !$iprivate || $character < 0xE000 || $character > 0x10FFFD ) ) { // If we were a character, pretend we weren't, but rather an error. if ($valid) { $position--; } for ($j = $start; $j <= $position; $j++) { $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1); $j += 2; $position += 2; $strlen += 2; } } } return $string; }