user input data filter html cleaner better than html purifier customize this class to allow or disallow tags attributes and scripting from user data input. light weight and gives you total control over user data input regulated by your settings. Strips all scripting and closes all open tags that would break site layout.
,
//and html attribute write like :
class InputFilter {
protected $tagsArray; // default = empty array
protected $attrArray; // default = empty array
protected $tagsMethod; // default = 0
protected $attrMethod; // default = 0
protected $xssAuto; // default = 1
protected $tagBlacklist = array('applet', 'body', 'bgsound', 'base', 'basefont', 'embed', 'frame', 'frameset', 'head', 'html', 'id', 'iframe', 'ilayer', 'layer', 'link', 'meta', 'name', 'object', 'script', 'style', 'title', 'xml');
protected $attrBlacklist = array('action', 'background', 'codebase', 'dynsrc', 'lowsrc'); // also will strip ALL event handlers
/**
* Constructor for inputFilter class. Only first parameter is required.
* @access constructor
* @param Array $tagsArray - list of user-defined tags
* @param Array $attrArray - list of user-defined attributes
* @param int $tagsMethod - 0= allow just user-defined, 1= allow all but user-defined
* @param int $attrMethod - 0= allow just user-defined, 1= allow all but user-defined
* @param int $xssAuto - 0= only auto clean essentials, 1= allow clean blacklisted tags/attr
*/
//define tags to allow. remove from blacklist if listed there
//set tagmethod to 0 user allowed tags
//$tagsArray = array('article','script','aside')
//set for $tags Array allow those tags and $attrArray attributes only
public function __construct($tagsArray = array('article','img','script','aside'), $attrArray = array('src','title','alt'), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1) {
// make sure user defined arrays are in lowercase
for ($i = 0; $i < count($tagsArray); $i++) $tagsArray[$i] = strtolower($tagsArray[$i]);
for ($i = 0; $i < count($attrArray); $i++) $attrArray[$i] = strtolower($attrArray[$i]);
// assign to member vars
$this->tagsArray = (array) $tagsArray;
$this->attrArray = (array) $attrArray;
$this->tagsMethod = $tagsMethod;
$this->attrMethod = $attrMethod;
$this->xssAuto = $xssAuto;
}
/**
* Method to be called by another php script. Processes for XSS and specified bad code.
* @access public
* @param Mixed $source - input string/array-of-string to be 'cleaned'
* @return String $source - 'cleaned' version of input parameter
*/
public function process($source) {
// clean all elements in this array
// if (is_array($source)) {
// faster to check if array
if ((array) $source === $source) {
foreach($source as $key => $value)
// filter element for XSS and other 'bad' code etc.
if (is_string($value)) $source[$key] = $this->remove($this->decode($value));
return $source;
// clean this string
} else if (is_string($source)) {
// filter source for XSS and other 'bad' code etc.
return $this->remove($this->decode($source));
// return parameter as given
} else return $source;
}
/**
* Internal method to iteratively remove all unwanted tags and attributes
* @access protected
* @param String $source - input string to be 'cleaned'
* @return String $source - 'cleaned' version of input parameter
*/
protected function remove($source) {
$loopCounter=0;
// provides nested-tag protection
while($source != $this->filterTags($source)) {
$source = $this->filterTags($source);
$loopCounter++;
}
return $source;
}
/**
* Internal method to strip a string of certain tags
* @access protected
* @param String $source - input string to be 'cleaned'
* @return String $source - 'cleaned' version of input parameter
*/
protected function filterTags($source) {
// filter pass setup
$preTag = NULL;
$postTag = $source;
// find initial tag's position
$tagOpen_start = strpos($source, '<');
// interate through string until no tags left
while($tagOpen_start !== FALSE) {
// process tag interatively
$preTag .= substr($postTag, 0, $tagOpen_start);
$postTag = substr($postTag, $tagOpen_start);
$fromTagOpen = substr($postTag, 1);
// end of tag
$tagOpen_end = strpos($fromTagOpen, '>');
if ($tagOpen_end === false) break;
// next start of tag (for nested tag assessment)
$tagOpen_nested = strpos($fromTagOpen, '<');
if (($tagOpen_nested !== false) && ($tagOpen_nested < $tagOpen_end)) {
$preTag .= substr($postTag, 0, ($tagOpen_nested+1));
$postTag = substr($postTag, ($tagOpen_nested+1));
$tagOpen_start = strpos($postTag, '<');
continue;
}
$tagOpen_nested = (strpos($fromTagOpen, '<') + $tagOpen_start + 1);
$currentTag = substr($fromTagOpen, 0, $tagOpen_end);
$tagLength = strlen($currentTag);
if (!$tagOpen_end) {
$preTag .= $postTag;
$tagOpen_start = strpos($postTag, '<');
}
// iterate through tag finding attribute pairs - setup
$tagLeft = $currentTag;
$attrSet = array();
$currentSpace = strpos($tagLeft, ' ');
// is end tag
if (substr($currentTag, 0, 1) == "/") {
$isCloseTag = TRUE;
list($tagName) = explode(' ', $currentTag);
$tagName = substr($tagName, 1);
// is start tag
} else {
$isCloseTag = FALSE;
list($tagName) = explode(' ', $currentTag);
}
// excludes all "non-regular" tagnames OR no tagname OR remove if xssauto is on and tag is blacklisted
if ((!preg_match("/^[a-z][a-z0-9]*$/i",$tagName)) || (!$tagName) || ((in_array(strtolower($tagName), $this->tagBlacklist)) && ($this->xssAuto))) {
$postTag = substr($postTag, ($tagLength + 2));
$tagOpen_start = strpos($postTag, '<');
// don't append this tag
continue;
}
// this while is needed to support attribute values with spaces in!
while ($currentSpace !== FALSE) {
$fromSpace = substr($tagLeft, ($currentSpace+1));
$nextSpace = strpos($fromSpace, ' ');
$openQuotes = strpos($fromSpace, '"');
$closeQuotes = strpos(substr($fromSpace, ($openQuotes+1)), '"') + $openQuotes + 1;
// another equals exists
if (strpos($fromSpace, '=') !== FALSE) {
// opening and closing quotes exists
if (($openQuotes !== FALSE) && (strpos(substr($fromSpace, ($openQuotes+1)), '"') !== FALSE))
$attr = substr($fromSpace, 0, ($closeQuotes+1));
// one or neither exist
else $attr = substr($fromSpace, 0, $nextSpace);
// no more equals exist
} else $attr = substr($fromSpace, 0, $nextSpace);
// last attr pair
if (!$attr) $attr = $fromSpace;
// add to attribute pairs array
$attrSet[] = $attr;
// next inc
$tagLeft = substr($fromSpace, strlen($attr));
$currentSpace = strpos($tagLeft, ' ');
}
// appears in array specified by user
$tagFound = in_array(strtolower($tagName), $this->tagsArray);
// remove this tag on condition
if ((!$tagFound && $this->tagsMethod) || ($tagFound && !$this->tagsMethod)) {
// reconstruct tag with allowed attributes
if (!$isCloseTag) {
$attrSet = $this->filterAttr($attrSet);
$preTag .= '<' . $tagName;
for ($i = 0; $i < count($attrSet); $i++)
$preTag .= ' ' . $attrSet[$i];
// reformat single tags to XHTML
//added eleif to not add close slash to img tag html
if (strpos($fromTagOpen, "" . $tagName)) {
$preTag .= '>';
// i added below because it was closing imag tags with xhtml closure i use html5
// }
// elseif ($tagName == "img") {
// $preTag .= '>';
}
else {$preTag .= ' />';
}
// just the tagname
} else $preTag .= '' . $tagName . '>';
}
// find next tag's start
$postTag = substr($postTag, ($tagLength + 2));
$tagOpen_start = strpos($postTag, '<');
}
// append any code after end of tags
$preTag .= $postTag;
return $preTag;
}
/**
* Internal method to strip a tag of certain attributes
* @access protected
* @param Array $attrSet
* @return Array $newSet
*/
protected function filterAttr($attrSet) {
$newSet = array();
// process attributes
for ($i = 0; $i xssAuto) && ((in_array(strtolower($attrSubSet[0]), $this->attrBlacklist)) || (substr($attrSubSet[0], 0, 2) == 'on'))))
continue;
// xss attr value filtering
if ($attrSubSet[1]) {
// strips unicode, hex, etc
$attrSubSet[1] = str_replace('', '', $attrSubSet[1]);
// strip normal newline within attr value
$attrSubSet[1] = preg_replace('/\s+/', '', $attrSubSet[1]);
// strip double quotes
$attrSubSet[1] = str_replace('"', '', $attrSubSet[1]);
// [requested feature] convert single quotes from either side to doubles (Single quotes shouldn't be used to pad attr value)
if ((substr($attrSubSet[1], 0, 1) == "'") && (substr($attrSubSet[1], (strlen($attrSubSet[1]) - 1), 1) == "'"))
$attrSubSet[1] = substr($attrSubSet[1], 1, (strlen($attrSubSet[1]) - 2));
// strip slashes
$attrSubSet[1] = stripslashes($attrSubSet[1]);
}
// auto strip attr's with "javascript:
if ( ((strpos(strtolower($attrSubSet[1]), 'expression') !== false) && (strtolower($attrSubSet[0]) == 'style')) ||
(strpos(strtolower($attrSubSet[1]), 'javascript:') !== false) ||
(strpos(strtolower($attrSubSet[1]), 'behaviour:') !== false) ||
(strpos(strtolower($attrSubSet[1]), 'vbscript:') !== false) ||
(strpos(strtolower($attrSubSet[1]), 'mocha:') !== false) ||
(strpos(strtolower($attrSubSet[1]), 'livescript:') !== false)
) continue;
// if matches user defined array
$attrFound = in_array(strtolower($attrSubSet[0]), $this->attrArray);
// keep this attr on condition
if ((!$attrFound && $this->attrMethod) || ($attrFound && !$this->attrMethod)) {
// attr has value
if ($attrSubSet[1]) $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[1] . '"';
// attr has decimal zero as value
else if ($attrSubSet[1] == "0") $newSet[] = $attrSubSet[0] . '="0"';
// reformat single attributes to XHTML
else $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[0] . '"';
}
}
return $newSet;
}
/**
* Try to convert to plaintext
* @access protected
* @param String $source
* @return String $source
*/
protected function decode($source) {
// url decode
$source = html_entity_decode($source, ENT_QUOTES, "ISO-8859-1");
// convert decimal
$source = preg_replace('/(\d+);/me',"chr(\\1)", $source); // decimal notation
// convert hex
$source = preg_replace('/([a-f0-9]+);/mei',"chr(0x\\1)", $source); // hex notation
return $source;
}
/**
* Method to be called by another php script. Processes for SQL injection
* @access public
* @param Mixed $source - input string/array-of-string to be 'cleaned'
* @param Buffer $connection - An open MySQL connection
* @return String $source - 'cleaned' version of input parameter
*/
public function safeSQL($source, &$connection) {
// clean all elements in this array
if (is_array($source)) {
foreach($source as $key => $value)
// filter element for SQL injection
if (is_string($value)) $source[$key] = $this->quoteSmart($this->decode($value), $connection);
return $source;
// clean this string
} else if (is_string($source)) {
// filter source for SQL injection
if (is_string($source)) return $this->quoteSmart($this->decode($source), $connection);
// return parameter as given
} else return $source;
}
/**
* @author Chris Tobin
* @author Daniel Morris
* @access protected
* @param String $source
* @param Resource $connection - An open MySQL connection
* @return String $source
*/
protected function quoteSmart($source, &$connection) {
// strip slashes
if (get_magic_quotes_gpc()) $source = stripslashes($source);
// quote both numeric and text
$source = $this->escapeString($source, $connection);
return $source;
}
/**
* @author Chris Tobin
* @author Daniel Morris
* @access protected
* @param String $source
* @param Resource $connection - An open MySQL connection
* @return String $source
*/
protected function escapeString($string, &$connection) {
// depreciated function
if (version_compare(phpversion(),"4.3.0", "<")) mysql_escape_string($string);
// current function
else mysql_real_escape_string($string);
return $string;
}
}
$clean = new InputFilter;
$data="dvvdvdsvsd