Wikipedia:AutoEd/unicodehex.js

From Simple English Wikipedia, the free encyclopedia

//

// Purpose: Changes hexcharacter codes in wikilinks to actual unicode characters
//
// Examples: [[Stra%C3%9Fe|street]] -> [[Straße|street]]
//           [[AutoEd#History_.281990.29|History]] ->
//               [[AutoEd#History (1990)|History]]
//
// Note: Contributed by CharlotteWeb
//
// Comments (CharlotteWeb):
// To keep things simple we'll ignore all image links. because some people prefer
// underscores in the file name and the caption can contain god-knows-what.
// one easy way is to flag them with a character which should never be used,
// but if it is already present we have a problem, so let's just quit.
//
function autoEdUnicodeHex(txt) { //MAIN FUNCTION describes list of fixes
 if(txt.match(/\uE000/)) return(txt); // see [[Private Use Area]]
 txt = txt.replace(/(\[\[[\:\s*]*(?:Image|File|Media)\s*\:)/gi, "$1\uE000");
 if(m = txt.match(/\[\[[^\[\]\n\uE000]+\]\]/g)) {
  for(var i = 0; i < m.length; i++) {
   parts = m[i].split("|");
   link = parts[0];
   a = link.split("#")
   title = a[0];
   section = a[1];
   try {
    link = decodeURIComponent(title.replace(/\%(.[^0-9A-F]|[^0-9A-F].|$)/gi, "%25$1")
     ) + ( section ? ("#" + decodeURIComponent(section
           // change "." to "%" when followed by valid hex
           .replace(/\.([0-9A-F]{2})/gi, "%$1")
           .replace(/\%(.[^0-9A-F]|[^0-9A-F].|$)/gi, "%25$1")
         )
     ) : "" )
    } catch(e) { } // just do no decoding
      parts[0] = link; 
      txt = txt.replace(m[i], parts.join("|"));
   }
  }
  return(txt.replace(/\uE000/g, ""));
}

//