diff -u -w -r1.378 -r1.379
--- functions/mime.php~	2 Dec 2006 15:11:09 -0000	1.378
+++ functions/mime.php	4 Dec 2006 01:08:51 -0000	1.379
@@ -1125,6 +1127,11 @@
         return;
     }
     $m = false;
+    // before deent, translate the dangerous unicode characters and ... to safe values
+    // otherwise the regular expressions do not match.
+
+
+
     do {
         $m = false;
         $m = $m || sq_deent($attvalue, '/\&#0*(\d+);*/s');
@@ -1151,6 +1158,75 @@
 }
 
 /**
+ * Translate all dangerous Unicode or Shift_JIS characters which are accepted by
+ * IE as regular characters.
+ *
+ * @param  attvalue  The attribute value before dangerous characters are translated.
+ * @return attvalue  Nothing, modifies a reference value.
+ * @author Marc Groot Koerkamp.
+ */
+function sq_fixIE_idiocy(&$attvalue) {
+    // remove NUL
+    $attvalue = str_replace("\0", "", $attvalue);
+    // remove comments
+    $attvalue = preg_replace("/(\/\*.*?\*\/)/","",$attvalue);
+
+    // IE has the evil habit of accepting every possible value for the attribute expression.
+    // The table below contain characters which are parsed by IE if they are used in the "expression"
+    // attribute value.
+    $aDangerousCharsReplacementTable = array(
+                        array('&#x029F;', '&#0671;' ,/* L UNICODE IPA Extension */
+                              '&#x0280;', '&#0640;' ,/* R UNICODE IPA Extension */
+                              '&#x0274;', '&#0628;' ,/* N UNICODE IPA Extension */
+                              '&#xFF25;', '&#65317' ,/* Unicode FULLWIDTH LATIN CAPITAL LETTER E */
+                              '&#xFF45;', '&#65349' ,/* Unicode FULLWIDTH LATIN SMALL LETTER E */
+                              '&#xFF38;', '&#65336;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER X */
+                              '&#xFF58;', '&#65368;',/* Unicode FULLWIDTH LATIN SMALL LETTER X */
+                              '&#xFF30;', '&#65328;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER P */
+                              '&#xFF50;', '&#65360;',/* Unicode FULLWIDTH LATIN SMALL LETTER P */
+                              '&#xFF32;', '&#65330;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER R */
+                              '&#xFF52;', '&#65362;',/* Unicode FULLWIDTH LATIN SMALL LETTER R */
+                              '&#xFF33;', '&#65331;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER S */
+                              '&#xFF53;', '&#65363;',/* Unicode FULLWIDTH LATIN SMALL LETTER S */
+                              '&#xFF29;', '&#65321;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER I */
+                              '&#xFF49;', '&#65353;',/* Unicode FULLWIDTH LATIN SMALL LETTER I */
+                              '&#xFF2F;', '&#65327;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER O */
+                              '&#xFF4F;', '&#65359;',/* Unicode FULLWIDTH LATIN SMALL LETTER O */
+                              '&#xFF2E;', '&#65326;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER N */
+                              '&#xFF4E;', '&#65358;',/* Unicode FULLWIDTH LATIN SMALL LETTER N */
+                              '&#xFF2C;', '&#65324;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER L */
+                              '&#xFF4C;', '&#65356;',/* Unicode FULLWIDTH LATIN SMALL LETTER L */
+                              '&#xFF35;', '&#65333;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER U */
+                              '&#xFF55;', '&#65365;',/* Unicode FULLWIDTH LATIN SMALL LETTER U */
+                              '&#x207F;', '&#8319;' ,/* Unicode SUPERSCRIPT LATIN SMALL LETTER N */
+                              '&#x8264;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER E */   // in unicode this is some chinese char range
+                              '&#x8285;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER E */
+                              '&#x8277;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER X */
+                              '&#x8298;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER X */
+                              '&#x826F;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER P */
+                              '&#x8290;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER P */
+                              '&#x8271;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER R */
+                              '&#x8292;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER R */
+                              '&#x8272;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER S */
+                              '&#x8293;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER S */
+                              '&#x8268;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER I */
+                              '&#x8289;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER I */
+                              '&#x826E;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER O */
+                              '&#x828F;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER O */
+                              '&#x826D;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER N */
+                              '&#x828E;'), /* Shift JIS FULLWIDTH LATIN SMALL LETTER N */
+                       array('l', 'l', 'r','r','n','n',
+                             'E','E','e','e','X','X','x','x','P','P','p','p','S','S','s','s','I','I',
+                             'i','i','O','O','o','o','N','N','n','n','L','L','l','l','U','U','u','u','n',
+                             'E','e','X','x','P','p','S','s','I','i','O','o','N','n'));
+    $attvalue = str_replace($aDangerousCharsReplacementTable[0],$aDangerousCharsReplacementTable[1],$attvalue);
+
+    // Escapes are useful for special characters like "{}[]()'&. In other cases they are
+    // used for XSS
+    $attvalue = preg_replace("/(\\\\)([a-zA-Z]{1})/",'$2',$attvalue);
+}
+
+/**
  * This function returns the final tag out of the tag name, an array
  * of attributes, and the type of the tag. This function is called by
  * sq_sanitize internally.
@@ -1563,6 +1639,8 @@
 function sq_deent(&$attvalue, $regex, $hex=false){
     $me = 'sq_deent';
     $ret_match = false;
+    // remove comments
+    //$attvalue = preg_replace("/(\/\*.*\*\/)/","",$attvalue);
     preg_match_all($regex, $attvalue, $matches);
     if (is_array($matches) && sizeof($matches[0]) > 0){
         $repl = Array();
@@ -1617,9 +1695,22 @@
             }
         }
         /**
+         * Workaround for IE quirks
+         */
+        sq_fixIE_idiocy($attvalue);
+
+        /**
          * Remove any backslashes, entities, and extraneous whitespace.
          */
+
+        $oldattvalue = $attvalue;
         sq_defang($attvalue);
+        if ($attname == 'style' && $attvalue !== $oldattvalue) {
+            // entities are used in the attribute value. In 99% of the cases it's there as XSS
+            // i.e.<div style="{ left:exp&#x0280;essio&#x0274;( alert('XSS') ) }">
+            $attvalue = "idiocy";
+            $attary{$attname} = $attvalue;
+        }
         sq_unspace($attvalue);
 
         /**
@@ -1699,7 +1790,6 @@
 function sq_fixstyle($body, $pos, $message, $id, $mailbox){
     global $view_unsafe_images;
     $me = 'sq_fixstyle';
-
     // workaround for </style> in between comments
     $iCurrentPos = $pos;
     $content = '';
@@ -1740,6 +1830,9 @@
                     // possible comment
                     if (isset($body{$i+2}) && substr($body,$i,3) == '!--') {
                         $i = strpos($body,'-->',$i+3);
+                        if ($i === false) { // no end comment
+                            $i = strlen($body);
+                        }
                         $sToken = '';
                     }
                 } else {
@@ -1772,9 +1867,15 @@
     */
     //   $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si",
     //                           "url(\\1$secremoveimg\\2)", $content);
-    // remove NUL
-    $content = str_replace("\0", "", $content);
+
+    // IE Sucks hard. We have a special function for it.
+    sq_fixIE_idiocy($content);
+
+    // remove @import line
+    $content = preg_replace("/^\s*(@import.*)$/mi","\n<!-- @import rules forbidden -->\n",$content);
+
     // translate ur\l and variations (IE parses that)
+    // TODO check if the sq_fixIE_idiocy function already handles this.
     $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content);
     // NB I insert NUL characters to keep to avoid an infinite loop. They are removed after the loop.
     while (preg_match("/url\s*\(\s*[\'\"]?([^:]+):(.*)?[\'\"]?\s*\)/si", $content, $matches)) {
@@ -1833,8 +1934,10 @@
                     '/expression/i',
                     '/behaviou*r/i',
                     '/binding/i',
-                    '/include-source/i');
-    $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy');
+                    '/include-source/i',
+                    '/javascript/i',
+                    '/script/i');
+    $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy');
     $contentNew = preg_replace($match, $replace, $contentTemp);
     if ($contentNew !== $contentTemp) {
         // insecure css declarations are used. From now on we don't care

