char2uni.c

Go to the documentation of this file.
00001 /*
00002  * All the code below represent subset of the
00003  * Bruno Haible's libiconv library, homepage
00004  * http://clisp.cons.org/~haible/packages-libiconv.html
00005  *
00006  * Based on libiconv ver. 1.17
00007  * See below for copyright notice.
00008  */
00009 
00010 /*
00011  * Copyright (C) 1999-2001 Free Software Foundation, Inc.
00012  * This file is part of the GNU LIBICONV Library.
00013  *
00014  * The GNU LIBICONV Library is free software; you can redistribute it
00015  * and/or modify it under the terms of the GNU Library General Public
00016  * License as published by the Free Software Foundation; either version 2
00017  * of the License, or (at your option) any later version.
00018  *
00019  * The GNU LIBICONV Library is distributed in the hope that it will be
00020  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
00021  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00022  * Library General Public License for more details.
00023  *
00024  * You should have received a copy of the GNU Library General Public
00025  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
00026  * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
00027  * Suite 330, Boston, MA 02111-1307, USA.
00028  */
00029 
00030 #ifdef _WIN32
00031 #include "win32/config.h"
00032 #else
00033 #include "config.h"
00034 #endif
00035 
00036 #define LOCAL_DEBUG
00037 /*#define DO_CLOCKING*/
00038 
00039 #include <string.h>
00040 #ifdef _WIN32
00041 # include "win32/afterbase.h"
00042 #else
00043 # include "afterbase.h"
00044 #endif
00045 #include "char2uni.h"
00046 
00047 
00048 /*
00049  * ISO-8859-1
00050  */
00051 static const unsigned short _as_iso8859_1_2uni[128] = {
00052 /* does not really require translation, but we'll stick it in
00053    there for uniformity : */
00054   /* 0x80 */
00055   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00056   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00057   /* 0x90 */
00058   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00059   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00060   /* 0xa0 */
00061   0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
00062   0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
00063   /* 0xb0 */
00064   0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
00065   0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
00066   /* 0xc0 */
00067   0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
00068   0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
00069   /* 0xd0 */
00070   0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
00071   0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
00072   /* 0xe0 */
00073   0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
00074   0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
00075   /* 0xf0 */
00076   0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
00077   0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff
00078 };
00079 
00080 /*
00081  * ISO-8859-2
00082  */
00083 
00084 static const unsigned short _as_iso8859_2_2uni[128] = {
00085   /* 0x80 */
00086   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00087   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00088   /* 0x90 */
00089   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00090   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00091   /* 0xa0 */
00092   0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
00093   0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
00094   /* 0xb0 */
00095   0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
00096   0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
00097   /* 0xc0 */
00098   0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
00099   0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
00100   /* 0xd0 */
00101   0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
00102   0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
00103   /* 0xe0 */
00104   0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
00105   0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
00106   /* 0xf0 */
00107   0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
00108   0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
00109 };
00110 
00111 /*
00112  * ISO-8859-3
00113  */
00114 
00115 static const unsigned short _as_iso8859_3_2uni[128] = {
00116   /* 0x80 */
00117   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00118   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00119   /* 0x90 */
00120   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00121   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00122   /* 0xa0 */
00123   0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0xfffd, 0x0124, 0x00a7,
00124   0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0xfffd, 0x017b,
00125   /* 0xb0 */
00126   0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
00127   0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0xfffd, 0x017c,
00128   /* 0xc0 */
00129   0x00c0, 0x00c1, 0x00c2, 0xfffd, 0x00c4, 0x010a, 0x0108, 0x00c7,
00130   0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
00131   /* 0xd0 */
00132   0xfffd, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
00133   0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
00134   /* 0xe0 */
00135   0x00e0, 0x00e1, 0x00e2, 0xfffd, 0x00e4, 0x010b, 0x0109, 0x00e7,
00136   0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
00137   /* 0xf0 */
00138   0xfffd, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
00139   0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
00140 };
00141 
00142 
00143 /*
00144  * ISO-8859-4
00145  */
00146 
00147 static const unsigned short _as_iso8859_4_2uni[128] = {
00148   /* 0x80 */
00149   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00150   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00151   /* 0x90 */
00152   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00153   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00154   /* 0xa0 */
00155   0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
00156   0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
00157   /* 0xb0 */
00158   0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
00159   0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
00160   /* 0xc0 */
00161   0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
00162   0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
00163   /* 0xd0 */
00164   0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
00165   0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
00166   /* 0xe0 */
00167   0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
00168   0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
00169   /* 0xf0 */
00170   0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
00171   0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
00172 };
00173 
00174 /*
00175  * ISO-8859-5
00176  */
00177 
00178 static const unsigned short _as_iso8859_5_2uni[128] = {
00179   /* 0x80 */
00180   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00181   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00182   /* 0x90 */
00183   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00184   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00185   /* 0xa0 */
00186   0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
00187   0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
00188   /* 0xb0 */
00189   0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
00190   0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
00191   /* 0xc0 */
00192   0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
00193   0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
00194   /* 0xd0 */
00195   0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
00196   0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
00197   /* 0xe0 */
00198   0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
00199   0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
00200   /* 0xf0 */
00201   0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
00202   0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
00203 };
00204 
00205 /*
00206  * ISO-8859-6
00207  */
00208 
00209 static const unsigned short _as_iso8859_6_2uni[128] = {
00210   /* 0x80 */
00211   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00212   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00213   /* 0x90 */
00214   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00215   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00216   /* 0xa0 */
00217   0x00a0, 0xfffd, 0xfffd, 0xfffd, 0x00a4, 0xfffd, 0xfffd, 0xfffd,
00218   0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x060c, 0x00ad, 0xfffd, 0xfffd,
00219   /* 0xb0 */
00220   0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
00221   0xfffd, 0xfffd, 0xfffd, 0x061b, 0xfffd, 0xfffd, 0xfffd, 0x061f,
00222   /* 0xc0 */
00223   0xfffd, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
00224   0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
00225   /* 0xd0 */
00226   0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
00227   0x0638, 0x0639, 0x063a, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
00228   /* 0xe0 */
00229   0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
00230   0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
00231   /* 0xf0 */
00232   0x0650, 0x0651, 0x0652, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
00233   0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
00234 };
00235 
00236 /*
00237  * ISO-8859-7
00238  */
00239 
00240 static const unsigned short _as_iso8859_7_2uni[128] = {
00241   /* 0x80 */
00242   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00243   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00244   /* 0x90 */
00245   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00246   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00247   /* 0xa0 */
00248   0x00a0, 0x2018, 0x2019, 0x00a3, 0xfffd, 0xfffd, 0x00a6, 0x00a7,
00249   0x00a8, 0x00a9, 0xfffd, 0x00ab, 0x00ac, 0x00ad, 0xfffd, 0x2015,
00250   /* 0xb0 */
00251   0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
00252   0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
00253   /* 0xc0 */
00254   0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
00255   0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
00256   /* 0xd0 */
00257   0x03a0, 0x03a1, 0xfffd, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
00258   0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
00259   /* 0xe0 */
00260   0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
00261   0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
00262   /* 0xf0 */
00263   0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
00264   0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0xfffd,
00265 };
00266 
00267 /*
00268  * ISO-8859-8
00269  */
00270 
00271 static const unsigned short _as_iso8859_8_2uni[128] = {
00272   /* 0x80 */
00273   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00274   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00275   /* 0x90 */
00276   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00277   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00278   /* 0xa0 */
00279   0x00a0, 0xfffd, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
00280   0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
00281   /* 0xb0 */
00282   0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
00283   0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0xfffd,
00284   /* 0xc0 */
00285   0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
00286   0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
00287   /* 0xd0 */
00288   0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
00289   0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2017,
00290   /* 0xe0 */
00291   0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
00292   0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
00293   /* 0xf0 */
00294   0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
00295   0x05e8, 0x05e9, 0x05ea, 0xfffd, 0xfffd, 0x200e, 0x200f, 0xfffd,
00296 };
00297 
00298 /*
00299  * ISO-8859-9
00300  */
00301 
00302 static const unsigned short _as_iso8859_9_2uni[128] = {
00303   /* 0x80 */
00304   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00305   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00306   /* 0x90 */
00307   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00308   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00309   /* 0xa0 */
00310   0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
00311   0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
00312   /* 0xb0 */
00313   0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
00314   0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
00315   /* 0xc0 */
00316   0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
00317   0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
00318   /* 0xd0 */
00319   0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
00320   0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
00321   /* 0xe0 */
00322   0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
00323   0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
00324   /* 0xf0 */
00325   0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
00326   0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
00327 };
00328 
00329 /*
00330  * ISO-8859-10
00331  */
00332 
00333 static const unsigned short _as_iso8859_10_2uni[128] = {
00334   /* 0x80 */
00335   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00336   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00337   /* 0x90 */
00338   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00339   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00340   /* 0xa0 */
00341   0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
00342   0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
00343   /* 0xb0 */
00344   0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
00345   0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
00346   /* 0xc0 */
00347   0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
00348   0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
00349   /* 0xd0 */
00350   0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
00351   0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
00352   /* 0xe0 */
00353   0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
00354   0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
00355   /* 0xf0 */
00356   0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
00357   0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
00358 };
00359 
00360 /*
00361  * ISO-8859-13
00362  */
00363 
00364 static const unsigned short _as_iso8859_13_2uni[128] = {
00365   /* 0x80 */
00366   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00367   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00368   /* 0x90 */
00369   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00370   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00371   /* 0xa0 */
00372   0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
00373   0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
00374   /* 0xb0 */
00375   0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
00376   0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
00377   /* 0xc0 */
00378   0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
00379   0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
00380   /* 0xd0 */
00381   0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
00382   0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
00383   /* 0xe0 */
00384   0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
00385   0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
00386   /* 0xf0 */
00387   0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
00388   0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
00389 };
00390 
00391 /*
00392  * ISO-8859-14
00393  */
00394 
00395 static const unsigned short _as_iso8859_14_2uni[128] = {
00396   /* 0x80 */
00397   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00398   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00399   /* 0x90 */
00400   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00401   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00402   /* 0xa0 */
00403   0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
00404   0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
00405   /* 0xb0 */
00406   0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
00407   0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
00408   /* 0xc0 */
00409   0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
00410   0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
00411   /* 0xd0 */
00412   0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
00413   0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
00414   /* 0xe0 */
00415   0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
00416   0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
00417   /* 0xf0 */
00418   0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
00419   0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
00420 };
00421 
00422 /*
00423  * ISO-8859-15
00424  */
00425 
00426 static const unsigned short _as_iso8859_15_2uni[128] = {
00427   /* 0x80 */
00428   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00429   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00430   /* 0x90 */
00431   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00432   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00433   /* 0xa0 */
00434   0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
00435   0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
00436   /* 0xb0 */
00437   0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
00438   0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
00439   /* 0xc0 */
00440   0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
00441   0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
00442   /* 0xd0 */
00443   0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
00444   0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
00445   /* 0xe0 */
00446   0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
00447   0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
00448   /* 0xf0 */
00449   0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
00450   0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff
00451 };
00452 
00453 /*
00454  * ISO-8859-16
00455  */
00456 
00457 static const unsigned short _as_iso8859_16_2uni[128] = {
00458   /* 0x80 */
00459   0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00460   0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00461   /* 0x90 */
00462   0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00463   0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00464   /* 0xa0 */
00465   0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x00ab, 0x0160, 0x00a7,
00466   0x0161, 0x00a9, 0x0218, 0x201e, 0x0179, 0x00ad, 0x017a, 0x017b,
00467   /* 0xb0 */
00468   0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
00469   0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
00470   /* 0xc0 */
00471   0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
00472   0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
00473   /* 0xd0 */
00474   0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
00475   0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
00476   /* 0xe0 */
00477   0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
00478   0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
00479   /* 0xf0 */
00480   0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
00481   0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
00482 };
00483 
00484 /*
00485  * KOI8-R
00486  */
00487 
00488 /* Specification: RFC 1489 */
00489 
00490 static const unsigned short _as_koi8_r_2uni[128] = {
00491   /* 0x80 */
00492   0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 0x2518, 0x251c, 0x2524,
00493   0x252c, 0x2534, 0x253c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,
00494   /* 0x90 */
00495   0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248,
00496   0x2264, 0x2265, 0x00a0, 0x2321, 0x00b0, 0x00b2, 0x00b7, 0x00f7,
00497   /* 0xa0 */
00498   0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
00499   0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e,
00500   /* 0xb0 */
00501   0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
00502   0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9,
00503   /* 0xc0 */
00504   0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
00505   0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,
00506   /* 0xd0 */
00507   0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
00508   0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a,
00509   /* 0xe0 */
00510   0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
00511   0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,
00512   /* 0xf0 */
00513   0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
00514   0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a,
00515 };
00516 
00517 /*
00518  * KOI8-RU
00519  */
00520 
00521 static const unsigned short _as_koi8_ru_2uni[128] = {
00522   /* 0x80 */
00523   0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 0x2518, 0x251c, 0x2524,
00524   0x252c, 0x2534, 0x253c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,
00525   /* 0x90 */
00526   0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248,
00527   0x2264, 0x2265, 0x00a0, 0x2321, 0x00b0, 0x00b2, 0x00b7, 0x00f7,
00528   /* 0xa0 */
00529   0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
00530   0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x0491, 0x045e, 0x255e,
00531   /* 0xb0 */
00532   0x255f, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
00533   0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x0490, 0x040e, 0x00a9,
00534   /* 0xc0 */
00535   0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
00536   0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,
00537   /* 0xd0 */
00538   0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
00539   0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a,
00540   /* 0xe0 */
00541   0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
00542   0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,
00543   /* 0xf0 */
00544   0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
00545   0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a,
00546 };
00547 
00548 /*
00549  * KOI8-U
00550  */
00551 
00552 /* Specification: RFC 2319 */
00553 
00554 static const unsigned short _as_koi8_u_2uni[128] = {
00555   /* 0x80 */
00556   0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 0x2518, 0x251c, 0x2524,
00557   0x252c, 0x2534, 0x253c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,
00558   /* 0x90 */
00559   0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248,
00560   0x2264, 0x2265, 0x00a0, 0x2321, 0x00b0, 0x00b2, 0x00b7, 0x00f7,
00561   /* 0xa0 */
00562   0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
00563   0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x0491, 0x255d, 0x255e,
00564   /* 0xb0 */
00565   0x255f, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
00566   0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x0490, 0x256c, 0x00a9,
00567   /* 0xc0 */
00568   0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
00569   0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,
00570   /* 0xd0 */
00571   0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
00572   0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a,
00573   /* 0xe0 */
00574   0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
00575   0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,
00576   /* 0xf0 */
00577   0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
00578   0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a,
00579 };
00580 
00581 /*
00582  * CP1250 Central European
00583  */
00584 
00585 static const unsigned short _as_cp1250_2uni[128] = {
00586   /* 0x80 */
00587   0x20ac, 0xfffd, 0x201a, 0xfffd, 0x201e, 0x2026, 0x2020, 0x2021,
00588   0xfffd, 0x2030, 0x0160, 0x2039, 0x015a, 0x0164, 0x017d, 0x0179,
00589   /* 0x90 */
00590   0xfffd, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
00591   0xfffd, 0x2122, 0x0161, 0x203a, 0x015b, 0x0165, 0x017e, 0x017a,
00592   /* 0xa0 */
00593   0x00a0, 0x02c7, 0x02d8, 0x0141, 0x00a4, 0x0104, 0x00a6, 0x00a7,
00594   0x00a8, 0x00a9, 0x015e, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x017b,
00595   /* 0xb0 */
00596   0x00b0, 0x00b1, 0x02db, 0x0142, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
00597   0x00b8, 0x0105, 0x015f, 0x00bb, 0x013d, 0x02dd, 0x013e, 0x017c,
00598   /* 0xc0 */
00599   0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
00600   0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
00601   /* 0xd0 */
00602   0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
00603   0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
00604   /* 0xe0 */
00605   0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
00606   0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
00607   /* 0xf0 */
00608   0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
00609   0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
00610 };
00611 
00612 /*
00613  * CP1251 Cyrillic
00614  */
00615 
00616 static const unsigned short _as_cp1251_2uni[128] = {
00617   /* 0x80 */
00618   0x0402, 0x0403, 0x201a, 0x0453, 0x201e, 0x2026, 0x2020, 0x2021,
00619   0x20ac, 0x2030, 0x0409, 0x2039, 0x040a, 0x040c, 0x040b, 0x040f,
00620   /* 0x90 */
00621   0x0452, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
00622   0xfffd, 0x2122, 0x0459, 0x203a, 0x045a, 0x045c, 0x045b, 0x045f,
00623   /* 0xa0 */
00624   0x00a0, 0x040e, 0x045e, 0x0408, 0x00a4, 0x0490, 0x00a6, 0x00a7,
00625   0x0401, 0x00a9, 0x0404, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0407,
00626   /* 0xb0 */
00627   0x00b0, 0x00b1, 0x0406, 0x0456, 0x0491, 0x00b5, 0x00b6, 0x00b7,
00628   0x0451, 0x2116, 0x0454, 0x00bb, 0x0458, 0x0405, 0x0455, 0x0457,
00629   /* 0xc0 */
00630   0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
00631   0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
00632   /* 0xd0 */
00633   0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
00634   0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
00635   /* 0xe0 */
00636   0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
00637   0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
00638   /* 0xf0 */
00639   0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
00640   0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
00641 };
00642 
00643 /*
00644  * CP1252 - Western European
00645  */
00646 
00647 static const unsigned short _as_cp1252_2uni[128] = {
00648   /* 0x80 */
00649   0x20ac, 0xfffd, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
00650   0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0xfffd, 0x017d, 0xfffd,
00651   /* 0x90 */
00652   0xfffd, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
00653   0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0xfffd, 0x017e, 0x0178,
00654   /* 0xa0 */
00655   0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
00656   0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
00657   /* 0xb0 */
00658   0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
00659   0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
00660   /* 0xc0 */
00661   0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
00662   0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
00663   /* 0xd0 */
00664   0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
00665   0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
00666   /* 0xe0 */
00667   0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
00668   0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
00669   /* 0xf0 */
00670   0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
00671   0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff
00672 };
00673 
00674 static const unsigned short *_as_supported_charsets[SUPPORTED_CHARSETS_NUM] = {
00675  &_as_iso8859_1_2uni[0],
00676  &_as_iso8859_2_2uni[0],
00677  &_as_iso8859_3_2uni[0],
00678  &_as_iso8859_4_2uni[0],
00679  &_as_iso8859_5_2uni[0],
00680  &_as_iso8859_6_2uni[0],
00681  &_as_iso8859_7_2uni[0],
00682  &_as_iso8859_8_2uni[0],
00683  &_as_iso8859_9_2uni[0],
00684  &_as_iso8859_10_2uni[0],
00685  &_as_iso8859_13_2uni[0],
00686  &_as_iso8859_14_2uni[0],
00687  &_as_iso8859_15_2uni[0],
00688  &_as_iso8859_16_2uni[0],
00689  &_as_koi8_r_2uni[0],
00690  &_as_koi8_ru_2uni[0],
00691  &_as_koi8_u_2uni[0],
00692  &_as_cp1250_2uni[0],
00693  &_as_cp1251_2uni[0],
00694  &_as_cp1252_2uni[0],
00695  /* UTF-8 requires special processing : */
00696  &_as_iso8859_1_2uni[0],
00697 };
00698 
00699 #if 0
00700 static unsigned short *_as_charset_names[SUPPORTED_CHARSETS_NUM][] = {
00701 /* Standard 8-bit encodings */
00702 {"ISO-8859-1", "ISO_8859-1", "ISO_8859-1:1987", "ISO-IR-100", "LATIN1", "L1", "csISOLatin1",    "ISO8859-1", "ISO8859_1", "CP819", "IBM819", "" },
00703 {"ISO-8859-2", "ISO_8859-2", "ISO_8859-2:1987", "ISO-IR-101", "LATIN2", "L2", "csISOLatin2",    "ISO8859-2", "ISO8859_2", "" },
00704 {"ISO-8859-3", "ISO_8859-3", "ISO_8859-3:1988", "ISO-IR-109", "LATIN3", "L3", "csISOLatin3",    "ISO8859-3", "ISO8859_3", "" },
00705 {"ISO-8859-4", "ISO_8859-4", "ISO_8859-4:1988", "ISO-IR-110", "LATIN4", "L4", "csISOLatin4",    "ISO8859-4", "ISO8859_4", "" },
00706 {"ISO-8859-5", "ISO_8859-5", "ISO_8859-5:1988", "ISO-IR-144", "CYRILLIC", "csISOLatinCyrillic","ISO8859-5", "ISO8859_5", "" },
00707 {"ISO-8859-6", "ISO_8859-6", "ISO_8859-6:1987", "ISO-IR-127", "ARABIC",   "csISOLatinArabic",   "ISO8859-6", "ISO8859_6", "ECMA-114", "ASMO-708", "" },
00708 {"ISO-8859-7", "ISO_8859-7", "ISO_8859-7:1987", "ISO-IR-126", "GREEK",    "csISOLatinGreek",    "ISO8859-7", "ISO8859_7", "ECMA-118", "ELOT_928", "GREEK8", "" },
00709 {"ISO-8859-8", "ISO_8859-8", "ISO_8859-8:1988", "ISO-IR-138", "HEBREW",   "csISOLatinHebrew",   "ISO8859-8", "ISO8859_8", "" },
00710 {"ISO-8859-9", "ISO_8859-9", "ISO_8859-9:1989", "ISO-IR-148", "LATIN5", "L5", "csISOLatin5",    "ISO8859-9", "ISO8859_9", "" },
00711 {"ISO-8859-10","ISO_8859-10","ISO_8859-10:1992","ISO-IR-157", "LATIN6", "L6", "csISOLatin6",    "ISO8859-10", "" },
00712 {"ISO-8859-13","ISO_8859-13",                   "ISO-IR-179", "LATIN7", "L7",                   "ISO8859-13", "" },
00713 {"ISO-8859-14","ISO_8859-14","ISO_8859-14:1998","ISO-IR-199", "LATIN8", "L8",   "ISO-CELTIC",   "ISO8859-14", "" },
00714 {"ISO-8859-15","ISO_8859-15","ISO_8859-15:1998","ISO-IR-203",                                                                   "ISO8859-15", "" },
00715 {"ISO-8859-16","ISO_8859-16","ISO_8859-16:2000","ISO-IR-226",                                                                   "ISO8859-16", "" },
00716 /* Cyrillic 8-bit KOI encodings */
00717 {"KOI8-R",  "csKOI8R", "" },
00718 {"KOI8-U",  "" },
00719 {"KOI8-RU", "" },
00720 /* Windows 8-bit encodings */
00721 {"CP1250", "WINDOWS-1250", "MS-EE",  "" },
00722 {"CP1251", "WINDOWS-1251", "MS-CYRL","" },
00723 {"CP1252", "WINDOWS-1252", "MS-ANSI","" }
00724 };
00725 
00726 #endif
00727 
00728 static ASSupportedCharsets
00729 parse_short_charset_name( const char *name )
00730 {
00731         /* fallbacks in case only language is specified : */
00732         if( name[0] == 'l' || name[0] == 'L' )
00733         {
00734                 switch( name[1] )
00735                 {
00736                         case '1' : return CHARSET_ISO8859_1;
00737                         case '2' : return CHARSET_ISO8859_2;
00738                         case '3' : return CHARSET_ISO8859_3;
00739                         case '4' : return CHARSET_ISO8859_4;
00740                         case '5' : return CHARSET_ISO8859_9;
00741                         case '6' : return CHARSET_ISO8859_10;
00742                         case '7' : return CHARSET_ISO8859_13;
00743                         case '8' : return CHARSET_ISO8859_14;
00744                 }
00745         }
00746         if( mystrncasecmp( &name[0], "en", 2 ) == 0 )
00747                 return CHARSET_ISO8859_1 ;     /* us */
00748         if( mystrncasecmp( &name[0], "el_GR", 5 ) == 0 )
00749                 return CHARSET_ISO8859_7 ;     /* greece */
00750         else if( mystrncasecmp( &name[0], "he", 2 ) == 0 )
00751                 return CHARSET_ISO8859_8 ;     /* Hebrew */
00752         else if( mystrncasecmp( &name[0], "hu", 2 ) == 0 )
00753                 return CHARSET_ISO8859_2 ;     /* Hungary */
00754         else if( mystrncasecmp( &name[0], "lt", 2 ) == 0 )
00755                 return CHARSET_ISO8859_4 ;     /* Lithuanian locale for Lithuania */
00756         else if( mystrncasecmp( &name[0], "pl", 2 ) == 0 )
00757                 return CHARSET_ISO8859_2 ;     /* 2Polish locale for Poland */
00758         else if( mystrncasecmp( &name[0], "ru", 2 ) == 0 )
00759                 return CHARSET_ISO8859_5 ;     /* Russian locale for Russia */
00760         else if( mystrncasecmp( &name[0], "sk", 2 ) == 0 )
00761                 return CHARSET_ISO8859_2 ;     /* Slovak locale for Slovakia */
00762         else if( mystrncasecmp( &name[0], "sl", 2 ) == 0 )
00763                 return CHARSET_ISO8859_2 ;     /* Slovene locale for Slovenia */
00764         else if( mystrncasecmp( &name[0], "tr", 2 ) == 0 )
00765                 return CHARSET_ISO8859_9 ;     /* Turkish */
00766         else if( mystrncasecmp( &name[0], "cs", 2 ) == 0 )
00767                 return CHARSET_ISO8859_2 ;     /* Czech */
00768         else
00769                 return CHARSET_ISO8859_1 ;
00770 }
00771 
00772 
00773 ASSupportedCharsets
00774 parse_charset_name( const char *name )
00775 {
00776         ASSupportedCharsets set = 0;
00777         int i = 0;
00778         if( name == NULL || name[0] == '\0' || name[1] == '\0' ) /* that includes locale "C" */
00779                 return CHARSET_ISO8859_1 ;
00780         /* if locale name came from LANG env var it may have formatof :
00781          *              language.charset@modifier
00782          * we only need charset part of it here: */
00783         while(name[i] != '\0' && name[i] != '.' ) ++i ;
00784 
00785         /* maybe LANG was set to some short thing such as just "RU" */
00786         if( name[i] == '\0' && (i == 2 || i == 5))
00787                 return parse_short_charset_name( name );
00788 
00789         if( name[i] == '.' )
00790         {
00791                 if( name[i+1] == '\0' )
00792                         return parse_short_charset_name( name );
00793                 name = &name[i+1] ;
00794         }
00795         if( name[0] == 'L' || name[0] == 'l' ) /* L. or Latin... */
00796         {
00797                 char latin_n = name[1] ;
00798                 if( mystrncasecmp( &name[1], "ATIN", 4 ) == 0 )
00799                         latin_n = name[5] ;
00800                 switch( latin_n )
00801                 {  /* L# latins : */
00802                         case '1' : return CHARSET_ISO8859_1;
00803                         case '2' : return CHARSET_ISO8859_2;
00804                         case '3' : return CHARSET_ISO8859_3;
00805                         case '4' : return CHARSET_ISO8859_4;
00806                         case '5' : return CHARSET_ISO8859_9;
00807                         case '6' : return CHARSET_ISO8859_10;
00808                         case '7' : return CHARSET_ISO8859_13;
00809                         case '8' : return CHARSET_ISO8859_14;
00810                 }
00811                 return CHARSET_ISO8859_1;
00812         }else if( name[0] == 'I' || name[0] == 'i' ) /* ISO... or IBM819*/
00813         {
00814                 if( name[1] == 'S' && name[1] == 's' )
00815                         if( name[2] == 'O' && name[2] == 'o' )
00816                         {
00817                                 int pos = ( name[3] == '-' || name[3] == '_' )?4:3 ;
00818                                 if( name[pos] == '8' )
00819                                 {
00820                                         if( name[++pos] == '8' )
00821                                                 if( name[++pos] == '5' )
00822                                                         if( name[++pos] == '9' )
00823                                                         {
00824                                                                 pos += 2 ;
00825                                                                 switch( name[pos] )
00826                                                                 {
00827                                                                         case '1' :
00828                                                                                 {       switch(name[pos+1] )
00829                                                                                         {       case '0' : return CHARSET_ISO8859_10;
00830                                                                                                 case '1' :
00831                                                                                                 case '2' : break;
00832                                                                                                 case '3' : return CHARSET_ISO8859_13;
00833                                                                                                 case '4' : return CHARSET_ISO8859_14;
00834                                                                                                 case '5' : return CHARSET_ISO8859_15;
00835                                                                                                 case '6' : return CHARSET_ISO8859_16;
00836                                                                                         }
00837                                                                                 }
00838                                                                                 return CHARSET_ISO8859_1;
00839                                                                         case '2' : return CHARSET_ISO8859_2;
00840                                                                         case '3' : return CHARSET_ISO8859_3;
00841                                                                         case '4' : return CHARSET_ISO8859_4;
00842                                                                         case '5' : return CHARSET_ISO8859_5;
00843                                                                         case '6' : return CHARSET_ISO8859_6;
00844                                                                         case '7' : return CHARSET_ISO8859_7;
00845                                                                         case '8' : return CHARSET_ISO8859_8;
00846                                                                         case '9' : return CHARSET_ISO8859_9;
00847                                                                 }
00848                                                         }
00849                                 }else if( mystrncasecmp( &name[pos], "IR-", 3 ) == 0 )
00850                                 {
00851                                         pos += 3 ;
00852                                         switch( name[pos+2] )
00853                                         {
00854                                                 case '0' : if( name[pos+1] == '0' ) break;
00855                                                         return CHARSET_ISO8859_4;
00856                                                 case '1' : return CHARSET_ISO8859_2;
00857                                                 case '2' : break;
00858                                                 case '3' : return CHARSET_ISO8859_15;
00859                                                 case '4' : return CHARSET_ISO8859_5;
00860                                                 case '5' : break;
00861                                                 case '6' : return (name[pos]  =='2')?CHARSET_ISO8859_16:CHARSET_ISO8859_7;
00862                                                 case '7' : return (name[pos+1]=='2')?CHARSET_ISO8859_6:CHARSET_ISO8859_10;
00863                                                 case '8' : return (name[pos+1]=='3')?CHARSET_ISO8859_8:CHARSET_ISO8859_9;
00864                                                 case '9' : return (name[pos+1]=='0')?CHARSET_ISO8859_3:
00865                                                                   ((name[pos+1]=='7')?CHARSET_ISO8859_13:CHARSET_ISO8859_14);
00866                                         }
00867                                 }
00868                         }
00869                 return CHARSET_ISO8859_1;
00870         }else if( name[0] == 'C' || name[0] == 'c' ) /* cs or CP ... or CYRILLIC*/
00871         {
00872                 if( name[1] == 'S' || name[1] == 's' )
00873                 {/* cs* */
00874                         if( mystrncasecmp( &name[2], "KOI8", 4 ) == 0 )
00875                                 return CHARSET_KOI8_R ;
00876                         if( mystrncasecmp( &name[2], "ISOLatin", 8 ) == 0 )
00877                         {
00878                                 switch( name[10] )
00879                                 {
00880                                         case '1' : return CHARSET_ISO8859_1;
00881                                         case '2' : return CHARSET_ISO8859_2;
00882                                         case '3' : return CHARSET_ISO8859_3;
00883                                         case '4' : return CHARSET_ISO8859_4;
00884                                         case '5' : return CHARSET_ISO8859_9;
00885                                         case '6' : return CHARSET_ISO8859_10;
00886                                         case '7' : return CHARSET_ISO8859_13;
00887                                         case '8' : return CHARSET_ISO8859_14;
00888                                 }
00889                                 if( name[10] == 'A' || name[10] == 'a' )
00890                                         return CHARSET_ISO8859_6;
00891                                 if( name[10] == 'C' || name[10] == 'c' )
00892                                         return CHARSET_ISO8859_5;
00893                                 if( name[10] == 'H' || name[10] == 'h' )
00894                                         return CHARSET_ISO8859_8;
00895                                 if( name[10] == 'G' || name[10] == 'g' )
00896                                         return CHARSET_ISO8859_7;
00897                         }
00898                         return CHARSET_ISO8859_1;
00899                 }else if( name[1] == 'P' || name[1] == 'p' )
00900                 {/* CP- */
00901                         if( strncmp( &name[2], "125", 3 ) == 0 )
00902                         {
00903                                 if( name[5] == '1')
00904                                         return CHARSET_CP1251;
00905                                 if( name[5] == '2')
00906                                         return CHARSET_CP1252;
00907                                 return CHARSET_CP1250;
00908                         }
00909                         return CHARSET_ISO8859_1;
00910                 }
00911                 return CHARSET_ISO8859_5 ; /* CYRILLIC */
00912         }else if( name[0] == 'K' || name[0] == 'k' ) /* KOI... */
00913         {
00914                 if( mystrncasecmp( &name[1], "OI8-", 4) == 0 )
00915                 {
00916                         if( name[5] == 'U' || name[5] == 'u' )
00917                                 return CHARSET_KOI8_U;
00918                         if( name[5] == 'R' || name[5] == 'r' )
00919                             if( name[6] == 'U' || name[6] == 'u' )
00920                                         return CHARSET_KOI8_RU;
00921                 }
00922                 return CHARSET_KOI8_R ;
00923         }else if( name[0] == 'E' || name[0] == 'e' ) /* ECMA... */
00924         {
00925                 if( mystrncasecmp( &name[1], "CMA-11", 6 ) == 0 )
00926                 {
00927                         if( name[7] == '4' )
00928                                 return CHARSET_ISO8859_6 ;
00929                 }
00930                 /* ELOT_928 or ECMA-118 */
00931                 return CHARSET_ISO8859_7 ;
00932         }else if( name[0] == 'M' || name[0] == 'm' ) /* MS-... */
00933         {
00934                 if( name[1] == 'S' || name[1] == 's' ) /* MS-... */
00935                         if( name[2] == '-' )
00936                         {
00937                                 if( name[3] == 'C' || name[3] == 'c' )
00938                                         return CHARSET_CP1251 ;
00939                                 if( name[3] == 'A' || name[3] == 'a' )
00940                                         return CHARSET_CP1252 ;
00941                         }
00942                 set = CHARSET_CP1250 ;
00943         }else if( name[0] == 'A' || name[0] == 'a' ) /* ARABIC or ASMO-708 */
00944         {
00945                 return CHARSET_ISO8859_6 ;
00946         }else if( name[0] == 'G' || name[0] == 'g' ) /* GREEK or GREEK8 */
00947         {
00948                 /* if( strncasecmp( &name[1], "REEK", 4 ) == 0 ) */
00949                 return CHARSET_ISO8859_7 ;
00950         }else if( name[0] == 'H' || name[0] == 'h' ) /* HEBREW */
00951         {
00952                 /* if( strncasecmp( &name[1], "EBREW", 5 ) == 0 ) */
00953                 return CHARSET_ISO8859_8 ;
00954         }else if( name[0] == 'U' || name[0] == 'u' ) /* UTF8 ? */
00955         {
00956                 return CHARSET_UTF8 ;
00957         }
00958 
00959 #if 0
00960         while( set < SUPPORTED_CHARSETS_NUM )
00961         {
00962                 char **aliases =&(_as_charset_names[set][0]) ;
00963                 register int i = 0 ;
00964                 char c;
00965                 while( (c = aliases[i][0]) != '\0' )
00966                 {
00967                         if( c == name[1] || tolower(c) == name[0] )
00968                                 if( strcasecmp( aliases[i], name ) == 0 )
00969                                         return set;
00970                         ++i ;
00971                 }
00972                 ++set;
00973         }
00974 #endif
00975         return CHARSET_ISO8859_1 ;
00976 }
00977 
00978 
00979 const unsigned short *as_current_charset = &_as_iso8859_1_2uni[0];
00980 ASSupportedCharsets as_current_charset_id = CHARSET_ISO8859_1;
00981 
00982 ASSupportedCharsets
00983 as_set_charset( ASSupportedCharsets new_charset )
00984 {
00985 
00986         if( new_charset < 0 || new_charset >= SUPPORTED_CHARSETS_NUM )
00987                 new_charset = CHARSET_ISO8859_1 ;
00988 
00989         as_current_charset = _as_supported_charsets[new_charset] ;
00990         as_current_charset_id = new_charset ;
00991         return new_charset ;
00992 }

Generated on Tue Jul 5 14:13:27 2011 for ROOT_528-00b_version by  doxygen 1.5.1