MailHog/assets/js/strutil.js
2014-11-01 22:38:39 +00:00

980 lines
31 KiB
JavaScript

// -*- coding: utf-8 -*-
// GO-MAILHOG: This file borrowed from http://0xcc.net/jsescape/strutil.js
// Utility functions for strings.
//
// Copyright (C) 2007 Satoru Takabayashi <satoru 0xcc.net>
// All rights reserved. This is free software with ABSOLUTELY NO WARRANTY.
// You can redistribute it and/or modify it under the terms of
// the GNU General Public License version 2.
// NOTES:
//
// Surrogate pairs:
//
// 1st 0xD800 - 0xDBFF (high surrogate)
// 2nd 0xDC00 - 0xDFFF (low surrogate)
//
// UTF-8 sequences:
//
// 0xxxxxxx
// 110xxxxx 10xxxxxx
// 1110xxxx 10xxxxxx 10xxxxxx
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
var EQUAL_SIGN = 0x3D;
var QUESTION_MARK = 0x3F;
// "あい" => [ 0x3042, 0x3044 ]
function convertStringToUnicodeCodePoints(str) {
var surrogate_1st = 0;
var unicode_codes = [];
for (var i = 0; i < str.length; ++i) {
var utf16_code = str.charCodeAt(i);
if (surrogate_1st != 0) {
if (utf16_code >= 0xDC00 && utf16_code <= 0xDFFF) {
var surrogate_2nd = utf16_code;
var unicode_code = (surrogate_1st - 0xD800) * (1 << 10) + (1 << 16) +
(surrogate_2nd - 0xDC00);
unicode_codes.push(unicode_code);
} else {
// Malformed surrogate pair ignored.
}
surrogate_1st = 0;
} else if (utf16_code >= 0xD800 && utf16_code <= 0xDBFF) {
surrogate_1st = utf16_code;
} else {
unicode_codes.push(utf16_code);
}
}
return unicode_codes;
}
// [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ] => [ 0x3042, 0x3044 ]
function convertUtf8BytesToUnicodeCodePoints(utf8_bytes) {
var unicode_codes = [];
var unicode_code = 0;
var num_followed = 0;
for (var i = 0; i < utf8_bytes.length; ++i) {
var utf8_byte = utf8_bytes[i];
if (utf8_byte >= 0x100) {
// Malformed utf8 byte ignored.
} else if ((utf8_byte & 0xC0) == 0x80) {
if (num_followed > 0) {
unicode_code = (unicode_code << 6) | (utf8_byte & 0x3f);
num_followed -= 1;
} else {
// Malformed UTF-8 sequence ignored.
}
} else {
if (num_followed == 0) {
unicode_codes.push(unicode_code);
} else {
// Malformed UTF-8 sequence ignored.
}
if (utf8_byte < 0x80){ // 1-byte
unicode_code = utf8_byte;
num_followed = 0;
} else if ((utf8_byte & 0xE0) == 0xC0) { // 2-byte
unicode_code = utf8_byte & 0x1f;
num_followed = 1;
} else if ((utf8_byte & 0xF0) == 0xE0) { // 3-byte
unicode_code = utf8_byte & 0x0f;
num_followed = 2;
} else if ((utf8_byte & 0xF8) == 0xF0) { // 4-byte
unicode_code = utf8_byte & 0x07;
num_followed = 3;
} else {
// Malformed UTF-8 sequence ignored.
}
}
}
if (num_followed == 0) {
unicode_codes.push(unicode_code);
} else {
// Malformed UTF-8 sequence ignored.
}
unicode_codes.shift(); // Trim the first element.
return unicode_codes;
}
// Helper function.
function convertEscapedCodesToCodes(str, prefix, base, num_bits) {
var parts = str.split(prefix);
parts.shift(); // Trim the first element.
var codes = [];
var max = Math.pow(2, num_bits);
for (var i = 0; i < parts.length; ++i) {
var code = parseInt(parts[i], base);
if (code >= 0 && code < max) {
codes.push(code);
} else {
// Malformed code ignored.
}
}
return codes;
}
// r'\u3042\u3044' => [ 0x3042, 0x3044 ]
// Note that the r '...' notation is borrowed from Python.
function convertEscapedUtf16CodesToUtf16Codes(str) {
return convertEscapedCodesToCodes(str, "\\u", 16, 16);
}
// r'\U00003042\U00003044' => [ 0x3042, 0x3044 ]
function convertEscapedUtf32CodesToUnicodeCodePoints(str) {
return convertEscapedCodesToCodes(str, "\\U", 16, 32);
}
// r'\xE3\x81\x82\xE3\x81\x84' => [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ]
// r'\343\201\202\343\201\204' => [ 0343, 0201, 0202, 0343, 0201, 0204 ]
function convertEscapedBytesToBytes(str, base) {
var prefix = (base == 16 ? "\\x" : "\\");
return convertEscapedCodesToCodes(str, prefix, base, 8);
}
// "&amp;#12354;&amp;#12356;" => [ 0x3042, 0x3044 ]
// "&amp;#x3042;&amp;#x3044;" => [ 0x3042, 0x3044 ]
function convertNumRefToUnicodeCodePoints(str, base) {
var num_refs = str.split(";");
num_refs.pop(); // Trim the last element.
var unicode_codes = [];
for (var i = 0; i < num_refs.length; ++i) {
var decimal_str = num_refs[i].replace(/^&#x?/, "");
var unicode_code = parseInt(decimal_str, base);
unicode_codes.push(unicode_code);
}
return unicode_codes;
}
// [ 0x3042, 0x3044 ] => [ 0x3042, 0x3044 ]
// [ 0xD840, 0xDC0B ] => [ 0x2000B ] // A surrogate pair.
function convertUnicodeCodePointsToUtf16Codes(unicode_codes) {
var utf16_codes = [];
for (var i = 0; i < unicode_codes.length; ++i) {
var unicode_code = unicode_codes[i];
if (unicode_code < (1 << 16)) {
utf16_codes.push(unicode_code);
} else {
var first = ((unicode_code - (1 << 16)) / (1 << 10)) + 0xD800;
var second = (unicode_code % (1 << 10)) + 0xDC00;
utf16_codes.push(first)
utf16_codes.push(second)
}
}
return utf16_codes;
}
// 0x3042 => [ 0xE3, 0x81, 0x82 ]
function convertUnicodeCodePointToUtf8Bytes(unicode_code, base) {
var utf8_bytes = [];
if (unicode_code < 0x80) { // 1-byte
utf8_bytes.push(unicode_code);
} else if (unicode_code < (1 << 11)) { // 2-byte
utf8_bytes.push((unicode_code >>> 6) | 0xC0);
utf8_bytes.push((unicode_code & 0x3F) | 0x80);
} else if (unicode_code < (1 << 16)) { // 3-byte
utf8_bytes.push((unicode_code >>> 12) | 0xE0);
utf8_bytes.push(((unicode_code >> 6) & 0x3f) | 0x80);
utf8_bytes.push((unicode_code & 0x3F) | 0x80);
} else if (unicode_code < (1 << 21)) { // 4-byte
utf8_bytes.push((unicode_code >>> 18) | 0xF0);
utf8_bytes.push(((unicode_code >> 12) & 0x3F) | 0x80);
utf8_bytes.push(((unicode_code >> 6) & 0x3F) | 0x80);
utf8_bytes.push((unicode_code & 0x3F) | 0x80);
}
return utf8_bytes;
}
// [ 0x3042, 0x3044 ] => [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ]
function convertUnicodeCodePointsToUtf8Bytes(unicode_codes) {
var utf8_bytes = [];
for (var i = 0; i < unicode_codes.length; ++i) {
var bytes = convertUnicodeCodePointToUtf8Bytes(unicode_codes[i]);
utf8_bytes = utf8_bytes.concat(bytes);
}
return utf8_bytes;
}
// 0xff => "ff"
// 0xff => "377"
function formatNumber(number, base, num_digits) {
var str = number.toString(base).toUpperCase();
for (var i = str.length; i < num_digits; ++i) {
str = "0" + str;
}
return str;
}
var BASE64 =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
function encodeBase64Helper(data) {
var encoded = [];
if (data.length == 1) {
encoded.push(BASE64.charAt(data[0] >> 2));
encoded.push(BASE64.charAt(((data[0] & 3) << 4)));
encoded.push('=');
encoded.push('=');
} else if (data.length == 2) {
encoded.push(BASE64.charAt(data[0] >> 2));
encoded.push(BASE64.charAt(((data[0] & 3) << 4) |
(data[1] >> 4)));
encoded.push(BASE64.charAt(((data[1] & 0xF) << 2)));
encoded.push('=');
} else if (data.length == 3) {
encoded.push(BASE64.charAt(data[0] >> 2));
encoded.push(BASE64.charAt(((data[0] & 3) << 4) |
(data[1] >> 4)));
encoded.push(BASE64.charAt(((data[1] & 0xF) << 2) |
(data[2] >> 6)));
encoded.push(BASE64.charAt(data[2] & 0x3f));
}
return encoded.join('');
}
// "44GC44GE" => [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ]
function decodeBase64(encoded) {
var decoded_bytes = [];
var data_bytes = [];
for (var i = 0; i < encoded.length; i += 4) {
data_bytes.length = 0;
for (var j = i; j < i + 4; ++j) {
var letter = encoded.charAt(j);
if (letter == "=" || letter == "") {
break;
}
var data_byte = BASE64.indexOf(letter);
if (data_byte >= 64) { // Malformed base64 data.
break;
}
data_bytes.push(data_byte);
}
if (data_bytes.length == 1) {
// Malformed base64 data.
} else if (data_bytes.length == 2) { // 12-bit.
decoded_bytes.push((data_bytes[0] << 2) | (data_bytes[1] >> 4));
} else if (data_bytes.length == 3) { // 18-bit.
decoded_bytes.push((data_bytes[0] << 2) | (data_bytes[1] >> 4));
decoded_bytes.push(((data_bytes[1] & 0xF) << 4) | (data_bytes[2] >> 2));
} else if (data_bytes.length == 4) { // 24-bit.
decoded_bytes.push((data_bytes[0] << 2) | (data_bytes[1] >> 4));
decoded_bytes.push(((data_bytes[1] & 0xF) << 4) | (data_bytes[2] >> 2));
decoded_bytes.push(((data_bytes[2] & 0x3) << 6) | (data_bytes[3]));
}
}
return decoded_bytes;
}
// [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ] => "44GC44GE"
function encodeBase64(data_bytes) {
var encoded = '';
for (var i = 0; i < data_bytes.length; i += 3) {
var at_most_three_bytes = data_bytes.slice(i, i + 3);
encoded += encodeBase64Helper(at_most_three_bytes);
}
return encoded;
}
function decodeQuotedPrintableHelper(str, prefix) {
var decoded_bytes = [];
for (var i = 0; i < str.length;) {
if (str.charAt(i) == prefix) {
decoded_bytes.push(parseInt(str.substr(i + 1, 2), 16));
i += 3;
} else {
decoded_bytes.push(str.charCodeAt(i));
++i;
}
}
return decoded_bytes;
}
// "=E3=81=82=E3=81=84" => [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ]
function decodeQuotedPrintable(str) {
str = str.replace(/_/g, " ") // RFC 2047.
return decodeQuotedPrintableHelper(str, "=");
}
// "%E3%81%82%E3%81%84" => [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ]
function decodeUrl(str) {
return decodeQuotedPrintableHelper(str, "%");
}
function encodeQuotedPrintableHelper(data_bytes, prefix, should_escape) {
var encoded = '';
var prefix_code = prefix.charCodeAt(0);
for (var i = 0; i < data_bytes.length; ++i) {
var data_byte = data_bytes[i];
if (should_escape(data_byte)) {
encoded += prefix + formatNumber(data_bytes[i], 16, 2);
} else {
encoded += String.fromCharCode(data_byte);
}
}
return encoded;
}
// [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ] => "=E3=81=82=E3=81=84"
function encodeQuotedPrintable(data_bytes) {
var should_escape = function(b) {
return b < 32 || b > 126 || b == EQUAL_SIGN || b == QUESTION_MARK;
};
return encodeQuotedPrintableHelper(data_bytes, '=', should_escape);
}
var URL_SAFE =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-";
// [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ] => "%E3%81%82%E3%81%84"
function encodeUrl(data_bytes) {
var should_escape = function(b) {
return URL_SAFE.indexOf(String.fromCharCode(b)) == -1;
};
return encodeQuotedPrintableHelper(data_bytes, '%', should_escape);
}
// [ 0x3042, 0x3044 ] => "あい"
function convertUtf16CodesToString(utf16_codes) {
var unescaped = '';
for (var i = 0; i < utf16_codes.length; ++i) {
unescaped += String.fromCharCode(utf16_codes[i]);
}
return unescaped;
}
// [ 0x3042, 0x3044 ] => "あい"
function convertUnicodeCodePointsToString(unicode_codes) {
var utf16_codes = convertUnicodeCodePointsToUtf16Codes(unicode_codes);
return convertUtf16CodesToString(utf16_codes);
}
function maybeInitMaps(encoded_maps, to_unicode_map, from_unicode_map) {
if (to_unicode_map.is_initialized) {
return;
}
var data_types = [ 'ROUNDTRIP', 'INPUT_ONLY', 'OUTPUT_ONLY' ];
for (var i = 0; i < data_types.length; ++i) {
var data_type = data_types[i];
var encoded_data = encoded_maps[data_type];
var data_bytes = decodeBase64(encoded_data);
for (var j = 0; j < data_bytes.length; j += 4) {
var local_code = (data_bytes[j] << 8) | data_bytes[j + 1];
var unicode_code = (data_bytes[j + 2] << 8) | data_bytes[j + 3];
if (i == 0 || i == 1) { // ROUNDTRIP or INPUT_ONLY
to_unicode_map[local_code] = unicode_code;
}
if (i == 0 || i == 2) { // ROUNDTRIP or OUTPUT_ONLY
from_unicode_map[unicode_code] = local_code;
}
}
}
to_unicode_map.is_initialized = true;
}
var SJIS_TO_UNICODE = {}
var UNICODE_TO_SJIS = {}
// Requires: sjis_map.js should be loaded.
function maybeInitSjisMaps() {
maybeInitMaps(SJIS_MAP_ENCODED, SJIS_TO_UNICODE, UNICODE_TO_SJIS);
}
var ISO88591_TO_UNICODE = {}
var UNICODE_TO_ISO88591 = {}
// Requires: iso88591_map.js should be loaded.
function maybeInitIso88591Maps() {
maybeInitMaps(ISO88591_MAP_ENCODED, ISO88591_TO_UNICODE,
UNICODE_TO_ISO88591);
}
function lookupMapWithDefault(map, key, default_value) {
var value = map[key];
if (!value) {
value = default_value;
}
return value;
}
// [ 0x3042, 0x3044 ] => [ 0x82, 0xA0, 0x82, 0xA2 ]
function convertUnicodeCodePointsToSjisBytes(unicode_codes) {
maybeInitSjisMaps();
var sjis_bytes = [];
for (var i = 0; i < unicode_codes.length; ++i) {
var unicode_code = unicode_codes[i];
var sjis_code = lookupMapWithDefault(UNICODE_TO_SJIS,
unicode_code, QUESTION_MARK);
if (sjis_code <= 0xFF) { // 1-byte character.
sjis_bytes.push(sjis_code);
} else {
sjis_bytes.push(sjis_code >> 8);
sjis_bytes.push(sjis_code & 0xFF);
}
}
return sjis_bytes;
}
// [ 0x3042, 0x3044 ] => [ 0xA4, 0xA2, 0xA4, 0xA4 ]
function convertUnicodeCodePointsToEucJpBytes(unicode_codes) {
maybeInitSjisMaps();
var eucjp_bytes = [];
for (var i = 0; i < unicode_codes.length; ++i) {
var unicode_code = unicode_codes[i];
var sjis_code = lookupMapWithDefault(UNICODE_TO_SJIS, unicode_code,
QUESTION_MARK);
if (sjis_code > 0xFF) { // Double byte character.
var jis_code = convertSjisCodeToJisX208Code(sjis_code);
var eucjp_code = jis_code | 0x8080;
eucjp_bytes.push(eucjp_code >> 8);
eucjp_bytes.push(eucjp_code & 0xFF);
} else if (sjis_code >= 0x80) { // 8-bit character.
eucjp_bytes.push(0x8E);
eucjp_bytes.push(sjis_code);
} else { // 7-bit character.
eucjp_bytes.push(sjis_code);
}
}
return eucjp_bytes;
}
function convertUnicodeCodePointsToIso88591Bytes(unicode_codes) {
maybeInitIso88591Maps();
var latin_bytes = [];
for (var i = 0; i < unicode_codes.length; ++i) {
var unicode_code = unicode_codes[i];
var latin_code = lookupMapWithDefault(UNICODE_TO_ISO88591,
unicode_code, QUESTION_MARK);
latin_bytes.push(latin_code);
}
return latin_bytes;
}
// [ 0x82, 0xA0, 0x82, 0xA2 ] => [ 0x3042, 0x3044 ]
function convertSjisBytesToUnicodeCodePoints(sjis_bytes) {
maybeInitSjisMaps();
var unicode_codes = [];
for (var i = 0; i < sjis_bytes.length;) {
var sjis_code = -1;
var sjis_byte = sjis_bytes[i];
if ((sjis_byte >= 0x81 && sjis_byte <= 0x9F) ||
(sjis_byte >= 0xE0 && sjis_byte <= 0xFC)) {
++i;
var sjis_byte2 = sjis_bytes[i];
if ((sjis_byte2 >= 0x40 && sjis_byte2 <= 0x7E) ||
(sjis_byte2 >= 0x80 && sjis_byte2 <= 0xFC)) {
sjis_code = (sjis_byte << 8) | sjis_byte2;
++i;
}
} else {
sjis_code = sjis_byte;
++i;
}
var unicode_code = lookupMapWithDefault(SJIS_TO_UNICODE,
sjis_code, QUESTION_MARK);
unicode_codes.push(unicode_code);
}
return unicode_codes;
}
function convertIso88591BytesToUnicodeCodePoints(latin_bytes) {
maybeInitIso88591Maps();
var unicode_codes = [];
for (var i = 0; i < latin_bytes.length; ++i) {
var latin_code = latin_bytes[i];
var unicode_code = lookupMapWithDefault(ISO88591_TO_UNICODE,
latin_code, QUESTION_MARK);
unicode_codes.push(unicode_code);
}
return unicode_codes;
}
// 0x2422 => 0x82a0
function convertJisX208CodeToSjisCode(jis_code) {
var j1 = jis_code >> 8;
var j2 = jis_code & 0xFF;
// http://people.debian.org/~kubota/unicode-symbols-map2.html.ja
var s1 = ((j1 - 1) >> 1) + ((j1 <= 0x5E) ? 0x71 : 0xB1);
var s2 = j2 + ((j1 & 1) ? ((j2 < 0x60) ? 0x1F : 0x20) : 0x7E);
return (s1 << 8) | s2;
}
// 0x82a0 => 0x2422
function convertSjisCodeToJisX208Code(sjis_code) {
var s1 = sjis_code >> 8;
var s2 = sjis_code & 0xFF;
// http://people.debian.org/~kubota/unicode-symbols-map2.html.ja
var j1 = (s1 << 1) - (s1 <= 0x9f ? 0xe0 : 0x160) - (s2 < 0x9f ? 1 : 0);
var j2 = s2 - 0x1f - (s2 >= 0x7f ? 1 : 0) - (s2 >= 0x9f ? 0x5e : 0);
return (j1 << 8) | j2;
}
// [ 0x24, 0x22, 0x24, 0x24 ] => [ 0x82, 0xA0, 0x82, 0xA2 ]
function convertJisX208BytesToSjisBytes(jis_bytes) {
var sjis_bytes = [];
for (var i = 0; i < jis_bytes.length; i += 2) {
var jis_code = (jis_bytes[i] << 8) | jis_bytes[i + 1];
var sjis_code = convertJisX208CodeToSjisCode(jis_code);
sjis_bytes.push(sjis_code >> 8);
sjis_bytes.push(sjis_code & 0xFF);
}
return sjis_bytes;
}
// [ 0x82, 0xA0, 0x82, 0xA2 ] => [ 0x24, 0x22, 0x24, 0x24 ]
function convertSjisBytesToJisX208Bytes(sjis_bytes) {
var jis_bytes = [];
for (var i = 0; i < sjis_bytes.length; i += 2) {
var sjis_code = (sjis_bytes[i] << 8) | sjis_bytes[i + 1];
var jis_code = convertSjisCodeToJisX208Code(sjis_code);
jis_bytes.push(jis_code >> 8);
jis_bytes.push(jis_code & 0xFF);
}
return jis_bytes;
}
// Constants used in convertJisBytesToUnicodeCodePoints().
var ASCII = 0;
var JISX201 = 1;
var JISX208 = 2;
// Map used in convertIso2022JpBytesToUnicodeCodePoints().
var ESCAPE_SEQUENCE_TO_MODE = {
"(B": ASCII,
"(J": JISX201,
"$B": JISX208,
"$@": JISX208
};
// Map used in convertUnicodeCodePointsToIso2022JpBytes().
var MODE_TO_ESCAPE_SEQUENCE = {}
MODE_TO_ESCAPE_SEQUENCE[ASCII] = "(B";
MODE_TO_ESCAPE_SEQUENCE[JISX201] = "(J";
MODE_TO_ESCAPE_SEQUENCE[JISX208] = "$B";
// [ 0x1B, 0x24, 0x42, 0x24, 0x22, 0x1B, 0x28, 0x42, ] => [ 0x3042 ]
function convertIso2022JpBytesToUnicodeCodePoints(iso2022jp_bytes) {
maybeInitSjisMaps();
var flush = function(mode, data_bytes, output) {
var unicode_codes = [];
if (mode == ASCII) {
unicode_codes = data_bytes;
} else if (mode == JISX201) { // Might have half-width Katakana?
unicode_codes = convertSjisBytesToUnicodeCodePoints(data_bytes);
} else if (mode == JISX208) {
var sjis_bytes = convertJisX208BytesToSjisBytes(data_bytes);
unicode_codes = convertSjisBytesToUnicodeCodePoints(sjis_bytes);
} else { // Unknown mode
}
for (var i = 0; i < unicode_codes.length; ++i) {
output.push(unicode_codes[i]);
}
data_bytes.length = 0; // Clear.
}
var unicode_codes = [];
var mode = ASCII;
var current_data_bytes = [];
for (var i = 0; i < iso2022jp_bytes.length;) {
if (iso2022jp_bytes[i] == 0x1B) { // Mode is changed.
flush(mode, current_data_bytes, unicode_codes);
++i;
var code = String.fromCharCode(iso2022jp_bytes[i],
iso2022jp_bytes[i + 1]);
mode = ESCAPE_SEQUENCE_TO_MODE[code];
if (!mode) { // Unknown mode.
mode = ASCII;
}
i += 2;
} else {
current_data_bytes.push(iso2022jp_bytes[i]);
++i;
}
}
flush(mode, current_data_bytes, unicode_codes);
return unicode_codes;
}
// [ 0xA4, 0xA2, 0xA4, 0xA4 ] => [ 0x3042, 0x3044 ]
function convertEucJpBytesToUnicodeCodePoints(eucjp_bytes) {
maybeInitSjisMaps();
var unicode_codes = [];
for (var i = 0; i < eucjp_bytes.length;) {
if (eucjp_bytes[i] >= 0x80 && (i + 1) < eucjp_bytes.length &&
eucjp_bytes[i + 1] >= 0x80) {
var eucjp_code = (eucjp_bytes[i] << 8) | eucjp_bytes[i + 1];
var jis_code = eucjp_code & 0x7F7F;
var sjis_code = convertJisX208CodeToSjisCode(jis_code);
var unicode_code = lookupMapWithDefault(SJIS_TO_UNICODE,
sjis_code, QUESTION_MARK);
unicode_codes.push(unicode_code);
i += 2;
} else {
if (eucjp_bytes[i] < 0x80) {
unicode_codes.push(eucjp_bytes[i]);
} else {
// Ignore singleton 8-bit byte.
}
++i;
}
}
return unicode_codes;
}
// [ 0x3042 ] => [ 0x1B, 0x24, 0x42, 0x24, 0x22, 0x1B, 0x28, 0x42, ]
function convertUnicodeCodePointsToIso2022JpBytes(unicode_codes) {
maybeInitSjisMaps();
var mode = ASCII;
var maybeChangeMode = function(new_mode) {
if (mode != new_mode) {
mode = new_mode;
var esc_as_string = MODE_TO_ESCAPE_SEQUENCE[mode];
var esc_as_code_points = convertStringToUnicodeCodePoints(esc_as_string);
iso2022jp_bytes.push(0x1B); // ESC code.
iso2022jp_bytes = iso2022jp_bytes.concat(esc_as_code_points);
}
}
var iso2022jp_bytes = [];
for (var i = 0; i < unicode_codes.length; ++i) {
var unicode_code = unicode_codes[i];
var sjis_code = lookupMapWithDefault(UNICODE_TO_SJIS, unicode_code,
QUESTION_MARK);
if (sjis_code > 0xFF) { // Double byte character.
var jis_code = convertSjisCodeToJisX208Code(sjis_code);
maybeChangeMode(JISX208);
iso2022jp_bytes.push(jis_code >> 8);
iso2022jp_bytes.push(jis_code & 0xFF);
} else if (sjis_code >= 0x80) { // 8-bit character.
maybeChangeMode(JISX201);
iso2022jp_bytes.push(sjis_code);
} else { // 7-bit character.
maybeChangeMode(ASCII);
iso2022jp_bytes.push(sjis_code);
}
}
maybeChangeMode(ASCII);
return iso2022jp_bytes;
}
var MIME_FULL_MATCH = /^=\?([^?]+)\?([BQ])\?([^?]+)\?=$/;
var MIME_PARTIAL_MATCH = /^=\?([^?]+)\?([BQ])\?([^?]+)\?=/;
// "=?UTF-8?B?44GC?=" => true
// "foobar" => false
function isMimeEncodedString(str) {
return str.match(MIME_FULL_MATCH) != null;
}
// "=?UTF-8?B?44GC?=" => ["UTF-8", [0xE3, 0x81, 0x82]]
// "=?UTF-8?Q?=E3=81=82?=" => ["UTF-8", [0xE3, 0x81, 0x82]]
// "INVALID" => []
function decodeMime(str) {
var m = str.match(MIME_FULL_MATCH);
if (m) {
var char_encoding = m[1];
// We don't need the language information preceded by '*'.
char_encoding = char_encoding.replace(/\*.*$/, "")
var mime_encoding = m[2];
var mime_str = m[3];
var decoded_bytes;
if (mime_encoding == "B") {
decoded_bytes = decodeBase64(mime_str);
} else if (mime_encoding == "Q") {
decoded_bytes = decodeQuotedPrintable(mime_str);
}
if (char_encoding != "" && decoded_bytes) {
return [char_encoding, decoded_bytes]
}
}
return [];
}
var OUTPUT_CONVERTERS = {
'ISO2022JP': convertUnicodeCodePointsToIso2022JpBytes,
'ISO88591': convertUnicodeCodePointsToIso88591Bytes,
'SHIFTJIS': convertUnicodeCodePointsToSjisBytes,
'EUCJP': convertUnicodeCodePointsToEucJpBytes,
'UTF8': convertUnicodeCodePointsToUtf8Bytes
}
var INPUT_CONVERTERS = {
'ISO2022JP': convertIso2022JpBytesToUnicodeCodePoints,
'ISO88591': convertIso88591BytesToUnicodeCodePoints,
'SHIFTJIS': convertSjisBytesToUnicodeCodePoints,
'EUCJP': convertEucJpBytesToUnicodeCodePoints,
'UTF8': convertUtf8BytesToUnicodeCodePoints
}
function convertUnicodeCodePointsToBytes(unicode_codes, encoding) {
var normalized_encoding = normalizeEncodingName(encoding);
var convert_function = OUTPUT_CONVERTERS[normalized_encoding];
if (convert_function) {
return convert_function(unicode_codes);
}
return [];
}
function convertBytesToUnicodeCodePoints(data_bytes, encoding) {
var normalized_encoding = normalizeEncodingName(encoding);
var convert_function = INPUT_CONVERTERS[normalized_encoding];
if (convert_function) {
return convert_function(data_bytes);
}
return [];
}
// 'あい' => r'\u3042\u3044'
function escapeToUtf16(str) {
var escaped = ''
for (var i = 0; i < str.length; ++i) {
var hex = str.charCodeAt(i).toString(16).toUpperCase();
escaped += "\\u" + "0000".substr(hex.length) + hex;
}
return escaped;
}
// 'あい' => r'\U00003042\U00003044'
function escapeToUtf32(str) {
var escaped = ''
var unicode_codes = convertStringToUnicodeCodePoints(str);
for (var i = 0; i <unicode_codes.length; ++i) {
var hex = unicode_codes[i].toString(16).toUpperCase();
escaped += "\\U" + "00000000".substr(hex.length) + hex;
}
return escaped;
}
// "あい" => "&#12354;&#12356;"
// "あい" => "&#x3042;&#x3044;"
function escapeToNumRef(str, base) {
var unicode_codes = convertStringToUnicodeCodePoints(str);
var escaped = ''
var prefix = base == 10 ? '' : 'x';
for (var i = 0; i < unicode_codes.length; ++i) {
var code = unicode_codes[i].toString(base).toUpperCase();
var num_ref = "&#" + prefix + code + ";"
escaped += num_ref;
}
return escaped;
}
// "あい" => "l8je"
function escapeToPunyCode(str) {
var unicode_codes = convertStringToPunyCodes(str);
return convertUnicodeCodePointsToString(unicode_codes);
}
// [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ] => '\xE3\x81\x82\xE3\x81\x84'
// [ 0343, 0201, 0202, 0343, 0201, 0204 ] => '\343\201\202\343\201\204'
function convertBytesToEscapedString(data_bytes, base) {
var escaped = '';
for (var i = 0; i < data_bytes.length; ++i) {
var prefix = (base == 16 ? "\\x" : "\\");
var num_digits = base == 16 ? 2 : 3;
var escaped_byte = prefix + formatNumber(data_bytes[i], base, num_digits)
escaped += escaped_byte;
}
return escaped;
}
// "あい" => [0x6C, 0x38, 0x6A, 0x65] // "l8je"
// Requires: punycode.js should be loaded.
function convertStringToPunyCodes(str) {
var unicode_codes = convertStringToUnicodeCodePoints(str);
var puny_codes = [];
var result = "";
if (PunyCode.encode(unicode_codes, puny_codes)) {
return puny_codes;
}
return unicode_codes;
}
// [ 0x6C, 0x38, 0x6A, 0x65 ] => "あい"
// Requires: punycode.js should be loaded.
function convertPunyCodesToString(puny_codes) {
var unicode_codes = [];
if (PunyCode.decode(puny_codes, unicode_codes)) {
return convertUnicodeCodePointsToString(unicode_codes);
}
return convertUnicodeCodePointsToString(puny_codes);
}
// "あい" => r'\xE3\x81\x82\xE3\x81\x84' // UTF-8
// "あい" => r'\343\201\202\343\201\204' // UTF-8
function escapeToEscapedBytes(str, base, encoding) {
var unicode_codes = convertStringToUnicodeCodePoints(str);
var data_bytes = convertUnicodeCodePointsToBytes(unicode_codes, encoding);
return convertBytesToEscapedString(data_bytes, base);
}
// "あい" => "44GC44GE" // UTF-8
function escapeToBase64(str, encoding) {
var unicode_codes = convertStringToUnicodeCodePoints(str);
var data_bytes = convertUnicodeCodePointsToBytes(unicode_codes, encoding);
return encodeBase64(data_bytes);
}
// "あい" => "=E3=81=82=E3=81=84" // UTF-8
function escapeToQuotedPrintable(str, encoding) {
var unicode_codes = convertStringToUnicodeCodePoints(str);
var data_bytes = convertUnicodeCodePointsToBytes(unicode_codes, encoding);
return encodeQuotedPrintable(data_bytes);
}
// "あい" => "%E3%81%82%E3%81%84"
function escapeToUrl(str, encoding) {
var unicode_codes = convertStringToUnicodeCodePoints(str);
var data_bytes = convertUnicodeCodePointsToBytes(unicode_codes, encoding);
return encodeUrl(data_bytes);
}
// "あい" => "=?UTF-8?B?44GC44GE?="
// "あい" => "=?UTF-8?Q?=E3=81=82=E3=81=84?="
function escapeToMime(str, mime_encoding, char_encoding) {
var unicode_codes = convertStringToUnicodeCodePoints(str);
var data_bytes = convertUnicodeCodePointsToBytes(unicode_codes,
char_encoding);
if (str == "") {
return "";
}
var escaped = "=?" + char_encoding + "?";
if (mime_encoding == 'base64') {
escaped += "B?";
escaped += encodeBase64(data_bytes);
} else {
escaped += "Q?";
escaped += encodeQuotedPrintable(data_bytes);
}
escaped += '?=';
return escaped;
}
// r'\u3042\u3044 => "あい"
function unescapeFromUtf16(str) {
var utf16_codes = convertEscapedUtf16CodesToUtf16Codes(str);
return convertUtf16CodesToString(utf16_codes);
}
// r'\U00003042\U00003044 => "あい"
function unescapeFromUtf32(str) {
var unicode_codes = convertEscapedUtf32CodesToUnicodeCodePoints(str);
var utf16_codes = convertUnicodeCodePointsToUtf16Codes(unicode_codes);
return convertUtf16CodesToString(utf16_codes);
}
// r'\xE3\x81\x82\xE3\x81\x84' => "あい"
// r'\343\201\202\343\201\204' => "あい"
function unescapeFromEscapedBytes(str, base, encoding) {
var data_bytes = convertEscapedBytesToBytes(str, base);
var unicode_codes = convertBytesToUnicodeCodePoints(data_bytes, encoding);
return convertUnicodeCodePointsToString(unicode_codes);
}
// "&#12354;&#12356;" => "あい"
// "&#x3042;&#x3044;" => "あい"
function unescapeFromNumRef(str, base) {
var unicode_codes = convertNumRefToUnicodeCodePoints(str, base);
return convertUnicodeCodePointsToString(unicode_codes);
}
// "l8je" => "あい"
function unescapeFromPunyCode(str) {
var unicode_codes = convertStringToUnicodeCodePoints(str);
return convertPunyCodesToString(unicode_codes);
}
// "44GC44GE" => "あい"
function unescapeFromBase64(str, encoding) {
var decoded_bytes = decodeBase64(str);
var unicode_codes = convertBytesToUnicodeCodePoints(decoded_bytes, encoding);
return convertUnicodeCodePointsToString(unicode_codes);
}
// "=E3=81=82=E3=81=84" => "あい"
function unescapeFromQuotedPrintable(str, encoding) {
var decoded_bytes = decodeQuotedPrintable(str);
var unicode_bytes = convertBytesToUnicodeCodePoints(decoded_bytes, encoding);
return convertUnicodeCodePointsToString(unicode_bytes);
}
// "%E3%81%82%E3%81%84" => "あい"
function unescapeFromUrl(str, encoding) {
var decoded_bytes = decodeUrl(str);
var unicode_bytes = convertBytesToUnicodeCodePoints(decoded_bytes, encoding);
return convertUnicodeCodePointsToString(unicode_bytes);
}
// " " => true
// " \n" => true
function isEmptyOrSequenceOfWhiteSpaces(str) {
for (var i = 0; i < str.length; ++i) {
var code = str.charCodeAt(i);
if (!(code == 0x09 || // TAB
code == 0x0A || // LF
code == 0x0D || // CR
code == 0x20)) { // SPACE
return false;
}
}
return true;
}
// "=?UTF-8?B?*?= =?UTF-8?B?*?=" => ["=?UTF-8?B?*?=", "=?UTF-8?B?*?="]
// "=?UTF-8?B?*?=FOO" => ["=?UTF-8?B?*?=", "FOO"]
function splitMimeString(str) {
var parts = [];
var current = "";
while (str != "") {
var m = str.match(MIME_PARTIAL_MATCH)
if (m) {
if (!isEmptyOrSequenceOfWhiteSpaces(current)) {
parts.push(current);
}
current = "";
parts.push(m[0]);
str = str.substr(m[0].length);
} else {
current += str.charAt(0);
str = str.substr(1);
}
}
if (!isEmptyOrSequenceOfWhiteSpaces(current)) {
parts.push(current);
}
return parts;
}
// "UTF-8" => "UTF8"
// "Shift_JIS" => "SHIFTJIS"
function normalizeEncodingName(encoding) {
return encoding.toUpperCase().replace(/[_-]/g, "");
}
// "=?UTF-8?B?44GC44GE?=" => "あい"
// "=?Shift_JIS?B?gqCCog==?=" => "あい"
// "=?ISO-2022-JP?B?GyRCJCIkJBsoQg==?=" => "あい"
// "=?UTF-8?Q?=E3=81=82=E3=81=84?=" => "あい"
// "=?Shift_JIS?Q?=82=A0=82=A2?=" => "あい"
// "=?ISO-2022-JP?Q?=1B$B$"$$=1B(B?=" => "あい"
function unescapeFromMime(str) {
var parts = splitMimeString(str);
var unescaped = "";
for (var i = 0; i < parts.length; ++i) {
if (isMimeEncodedString(parts[i])) {
var pair = decodeMime(parts[i]);
if (pair.length == 0) { // Malformed MIME string. Skip it.
continue;
}
var encoding = normalizeEncodingName(pair[0]);
var data_bytes = pair[1];
var unicode_codes = convertBytesToUnicodeCodePoints(data_bytes,
encoding);
unescaped += convertUnicodeCodePointsToString(unicode_codes);
} else {
unescaped += parts[i];
}
}
return unescaped;
}