00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef __G_UNICODE_H__
00023 #define __G_UNICODE_H__
00024
00025 #include <stddef.h>
00026 #include <g_types.h>
00027
00028 G_BEGIN_DECLS
00029
00030 typedef guint32 gunichar;
00031 typedef guint16 gunichar2;
00032
00033
00034
00035
00036 typedef enum
00037 {
00038 G_UNICODE_CONTROL,
00039 G_UNICODE_FORMAT,
00040 G_UNICODE_UNASSIGNED,
00041 G_UNICODE_PRIVATE_USE,
00042 G_UNICODE_SURROGATE,
00043 G_UNICODE_LOWERCASE_LETTER,
00044 G_UNICODE_MODIFIER_LETTER,
00045 G_UNICODE_OTHER_LETTER,
00046 G_UNICODE_TITLECASE_LETTER,
00047 G_UNICODE_UPPERCASE_LETTER,
00048 G_UNICODE_COMBINING_MARK,
00049 G_UNICODE_ENCLOSING_MARK,
00050 G_UNICODE_NON_SPACING_MARK,
00051 G_UNICODE_DECIMAL_NUMBER,
00052 G_UNICODE_LETTER_NUMBER,
00053 G_UNICODE_OTHER_NUMBER,
00054 G_UNICODE_CONNECT_PUNCTUATION,
00055 G_UNICODE_DASH_PUNCTUATION,
00056 G_UNICODE_CLOSE_PUNCTUATION,
00057 G_UNICODE_FINAL_PUNCTUATION,
00058 G_UNICODE_INITIAL_PUNCTUATION,
00059 G_UNICODE_OTHER_PUNCTUATION,
00060 G_UNICODE_OPEN_PUNCTUATION,
00061 G_UNICODE_CURRENCY_SYMBOL,
00062 G_UNICODE_MODIFIER_SYMBOL,
00063 G_UNICODE_MATH_SYMBOL,
00064 G_UNICODE_OTHER_SYMBOL,
00065 G_UNICODE_LINE_SEPARATOR,
00066 G_UNICODE_PARAGRAPH_SEPARATOR,
00067 G_UNICODE_SPACE_SEPARATOR
00068 } GUnicodeType;
00069
00070
00071
00072
00073 typedef enum
00074 {
00075 G_UNICODE_BREAK_MANDATORY,
00076 G_UNICODE_BREAK_CARRIAGE_RETURN,
00077 G_UNICODE_BREAK_LINE_FEED,
00078 G_UNICODE_BREAK_COMBINING_MARK,
00079 G_UNICODE_BREAK_SURROGATE,
00080 G_UNICODE_BREAK_ZERO_WIDTH_SPACE,
00081 G_UNICODE_BREAK_INSEPARABLE,
00082 G_UNICODE_BREAK_NON_BREAKING_GLUE,
00083 G_UNICODE_BREAK_CONTINGENT,
00084 G_UNICODE_BREAK_SPACE,
00085 G_UNICODE_BREAK_AFTER,
00086 G_UNICODE_BREAK_BEFORE,
00087 G_UNICODE_BREAK_BEFORE_AND_AFTER,
00088 G_UNICODE_BREAK_HYPHEN,
00089 G_UNICODE_BREAK_NON_STARTER,
00090 G_UNICODE_BREAK_OPEN_PUNCTUATION,
00091 G_UNICODE_BREAK_CLOSE_PUNCTUATION,
00092 G_UNICODE_BREAK_QUOTATION,
00093 G_UNICODE_BREAK_EXCLAMATION,
00094 G_UNICODE_BREAK_IDEOGRAPHIC,
00095 G_UNICODE_BREAK_NUMERIC,
00096 G_UNICODE_BREAK_INFIX_SEPARATOR,
00097 G_UNICODE_BREAK_SYMBOL,
00098 G_UNICODE_BREAK_ALPHABETIC,
00099 G_UNICODE_BREAK_PREFIX,
00100 G_UNICODE_BREAK_POSTFIX,
00101 G_UNICODE_BREAK_COMPLEX_CONTEXT,
00102 G_UNICODE_BREAK_AMBIGUOUS,
00103 G_UNICODE_BREAK_UNKNOWN
00104 } GUnicodeBreakType;
00105
00106
00107
00108
00109
00110
00111
00112 gboolean g_get_charset (char **charset);
00113
00114
00115
00116 gboolean g_unichar_isalnum (gunichar c) G_GNUC_CONST;
00117 gboolean g_unichar_isalpha (gunichar c) G_GNUC_CONST;
00118 gboolean g_unichar_iscntrl (gunichar c) G_GNUC_CONST;
00119 gboolean g_unichar_isdigit (gunichar c) G_GNUC_CONST;
00120 gboolean g_unichar_isgraph (gunichar c) G_GNUC_CONST;
00121 gboolean g_unichar_islower (gunichar c) G_GNUC_CONST;
00122 gboolean g_unichar_isprint (gunichar c) G_GNUC_CONST;
00123 gboolean g_unichar_ispunct (gunichar c) G_GNUC_CONST;
00124 gboolean g_unichar_isspace (gunichar c) G_GNUC_CONST;
00125 gboolean g_unichar_isupper (gunichar c) G_GNUC_CONST;
00126 gboolean g_unichar_isxdigit (gunichar c) G_GNUC_CONST;
00127 gboolean g_unichar_istitle (gunichar c) G_GNUC_CONST;
00128 gboolean g_unichar_isdefined (gunichar c) G_GNUC_CONST;
00129 gboolean g_unichar_iswide (gunichar c) G_GNUC_CONST;
00130
00131
00132
00133 gunichar g_unichar_toupper (gunichar c) G_GNUC_CONST;
00134 gunichar g_unichar_tolower (gunichar c) G_GNUC_CONST;
00135 gunichar g_unichar_totitle (gunichar c) G_GNUC_CONST;
00136
00137
00138
00139 gint g_unichar_digit_value (gunichar c) G_GNUC_CONST;
00140
00141 gint g_unichar_xdigit_value (gunichar c) G_GNUC_CONST;
00142
00143
00144 GUnicodeType g_unichar_type (gunichar c) G_GNUC_CONST;
00145
00146
00147 GUnicodeBreakType g_unichar_break_type (gunichar c) G_GNUC_CONST;
00148
00149
00150
00151
00152
00153 void g_unicode_canonical_ordering (gunichar *string,
00154 size_t len);
00155
00156
00157
00158
00159 gunichar *g_unicode_canonical_decomposition (gunichar ch,
00160 size_t *result_len);
00161
00162
00163
00164
00165
00166 #ifndef GLIB_VAR
00167 # ifdef G_OS_WIN32
00168 # ifdef GLIB_COMPILATION
00169 # define GLIB_VAR __declspec(dllexport)
00170 # else
00171 # define GLIB_VAR extern __declspec(dllimport)
00172 # endif
00173 # else
00174 # define GLIB_VAR extern
00175 # endif
00176 #endif
00177
00178 GLIB_VAR char g_utf8_skip[256];
00179
00180 #define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)])
00181
00182 gunichar g_utf8_get_char (const gchar *p);
00183 gchar * g_utf8_offset_to_pointer (const gchar *str,
00184 gint offset);
00185 gint g_utf8_pointer_to_offset (const gchar *str,
00186 const gchar *pos);
00187 gchar * g_utf8_prev_char (const gchar *p);
00188 gchar * g_utf8_find_next_char (const gchar *p,
00189 const gchar *end);
00190 gchar * g_utf8_find_prev_char (const gchar *str,
00191 const gchar *p);
00192
00193 gint g_utf8_strlen (const gchar *p,
00194 gint max);
00195
00196
00197 gchar *g_utf8_strncpy (gchar *dest,
00198 const gchar *src,
00199 size_t n);
00200
00201
00202
00203
00204 gchar *g_utf8_strchr (const gchar *p,
00205 gunichar c);
00206 gchar *g_utf8_strrchr (const gchar *p,
00207 gunichar c);
00208
00209 gunichar2 *g_utf8_to_utf16 (const gchar *str,
00210 gint len);
00211 gunichar * g_utf8_to_ucs4 (const gchar *str,
00212 gint len);
00213 gunichar * g_utf16_to_ucs4 (const gunichar2 *str,
00214 gint len);
00215 gchar * g_utf16_to_utf8 (const gunichar2 *str,
00216 gint len);
00217 gunichar * g_ucs4_to_utf16 (const gunichar *str,
00218 gint len);
00219 gchar * g_ucs4_to_utf8 (const gunichar *str,
00220 gint len);
00221
00222
00223
00224
00225
00226 gint g_unichar_to_utf8 (gunichar c,
00227 char *outbuf);
00228
00229
00230
00231
00232
00233 gboolean g_utf8_validate (const gchar *str,
00234 gint max_len,
00235 const gchar **end);
00236
00237 G_END_DECLS
00238
00239 #endif