sombok
2.2.1
|
00001 /* 00002 * sombok.h - common definitions for Sombok library 00003 * 00004 * Copyright (C) 2009-2012 by Hatuka*nezumi - IKEDA Soji. 00005 * 00006 * This file is part of the Sombok Package. This program is free 00007 * software; you can redistribute it and/or modify it under the terms of 00008 * either the GNU General Public License or the Artistic License, as 00009 * specified in the README file. 00010 * 00011 */ 00012 00013 #ifndef _SOMBOK_H_ 00014 00015 #ifdef HAVE_CONFIG_H 00016 # include "config.h" 00017 #endif 00018 #include <errno.h> 00019 #include <stddef.h> 00020 #include <stdlib.h> 00021 #include <string.h> 00022 #ifdef HAVE_STRINGS_H 00023 # include <strings.h> 00024 #endif /* HAVE_STRINGS_H */ 00025 00026 #define SOMBOK_VERSION "2.2.1" 00027 00028 #define SOMBOK_UNICHAR_T_IS_WCHAR_T 00029 #define SOMBOK_UNICHAR_T_IS_UNSIGNED_INT 00030 #undef SOMBOK_UNICHAR_T_IS_UNSIGNED_LONG 00031 00032 /*** 00033 *** Data structure. 00034 ***/ 00035 00036 /* Primitive types */ 00037 00039 typedef unsigned int unichar_t; 00040 00043 typedef unsigned char propval_t; 00044 00047 typedef struct { 00051 unichar_t *str; 00053 size_t len; 00054 } unistr_t; 00055 00059 typedef struct { 00061 size_t idx; 00063 size_t len; 00065 size_t col; 00067 propval_t lbc; 00069 propval_t elbc; 00071 unsigned char flag; 00072 } gcchar_t; 00073 00076 typedef struct { 00078 unichar_t beg; 00080 unichar_t end; 00082 propval_t lbc; 00084 propval_t eaw; 00086 propval_t gcb; 00088 propval_t scr; 00089 } mapent_t; 00090 00091 struct linebreak_t; 00092 00095 typedef struct { 00099 unichar_t *str; 00101 size_t len; 00104 gcchar_t *gcstr; 00106 size_t gclen; 00108 size_t pos; 00110 struct linebreak_t *lbobj; 00111 } gcstring_t; 00112 00115 typedef enum { 00116 LINEBREAK_STATE_NONE = 0, 00117 LINEBREAK_STATE_SOT, LINEBREAK_STATE_SOP, LINEBREAK_STATE_SOL, 00118 LINEBREAK_STATE_LINE, 00119 LINEBREAK_STATE_EOL, LINEBREAK_STATE_EOP, LINEBREAK_STATE_EOT, 00120 LINEBREAK_STATE_MAX 00121 } linebreak_state_t; 00122 00123 typedef void 00124 (*linebreak_ref_func_t) (void *, int, int); 00125 typedef gcstring_t * 00126 (*linebreak_format_func_t) (struct linebreak_t *, linebreak_state_t, 00127 gcstring_t *); 00128 typedef double 00129 (*linebreak_sizing_func_t) (struct linebreak_t *, double, 00130 gcstring_t *, gcstring_t *, gcstring_t *); 00131 typedef gcstring_t * 00132 (*linebreak_urgent_func_t) (struct linebreak_t *, gcstring_t *); 00133 typedef gcstring_t * 00134 (*linebreak_prep_func_t) (struct linebreak_t *, void *, unistr_t *, 00135 unistr_t *); 00136 typedef gcstring_t * 00137 (*linebreak_obs_prep_func_t) (struct linebreak_t *, unistr_t *); 00138 00141 typedef struct linebreak_t { 00145 unsigned long int refcount; 00147 int state; 00149 unistr_t bufstr; 00151 unistr_t bufspc; 00153 double bufcols; 00155 unistr_t unread; 00161 size_t charmax; 00163 double colmax; 00165 double colmin; 00167 mapent_t *map; 00168 size_t mapsiz; 00170 unistr_t newline; 00172 unsigned int options; 00174 void *format_data; 00175 void *sizing_data; 00176 void *urgent_data; 00178 void *user_data; 00180 void *stash; 00182 linebreak_format_func_t format_func; 00184 linebreak_sizing_func_t sizing_func; 00186 linebreak_urgent_func_t urgent_func; 00189 linebreak_obs_prep_func_t user_func; 00197 linebreak_ref_func_t ref_func; 00201 int errnum; 00207 linebreak_prep_func_t * prep_func; 00209 void **prep_data; 00211 } linebreak_t; 00212 00213 /*** 00214 *** Constants. 00215 ***/ 00216 00218 #define PROP_UNKNOWN ((propval_t)~0) 00219 00222 #define LINEBREAK_FLAG_PROHIBIT_BEFORE (1) 00223 #define LINEBREAK_FLAG_ALLOW_BEFORE (2) 00224 #define LINEBREAK_FLAG_BREAK_BEFORE LINEBREAK_FLAG_ALLOW_BEFORE 00225 00228 #define LINEBREAK_DEFAULT_CHARMAX (998) 00229 00232 #define LINEBREAK_OPTION_EASTASIAN_CONTEXT (1) 00233 #define LINEBREAK_OPTION_HANGUL_AS_AL (2) 00234 #define LINEBREAK_OPTION_LEGACY_CM (4) 00235 #define LINEBREAK_OPTION_BREAK_INDENT (8) 00236 #define LINEBREAK_OPTION_COMPLEX_BREAKING (16) 00237 #define LINEBREAK_OPTION_NONSTARTER_LOOSE (32) 00238 #define LINEBREAK_OPTION_VIRAMA_AS_JOINER (64) 00239 00242 #define LINEBREAK_STATE_SOT_FORMAT (-LINEBREAK_STATE_SOT) 00243 #define LINEBREAK_STATE_SOP_FORMAT (-LINEBREAK_STATE_SOP) 00244 #define LINEBREAK_STATE_SOL_FORMAT (-LINEBREAK_STATE_SOL) 00245 00248 #define LINEBREAK_REF_STASH (0) 00249 #define LINEBREAK_REF_FORMAT (1) 00250 #define LINEBREAK_REF_SIZING (2) 00251 #define LINEBREAK_REF_URGENT (3) 00252 #define LINEBREAK_REF_USER (4) 00253 #define LINEBREAK_REF_PREP (5) 00254 00257 #define LINEBREAK_ACTION_MANDATORY (4) 00258 #define LINEBREAK_ACTION_DIRECT (3) 00259 #define LINEBREAK_ACTION_INDIRECT (2) 00260 #define LINEBREAK_ACTION_PROHIBITED (1) 00261 00264 #define LINEBREAK_ELONG (-2) 00265 #define LINEBREAK_EEXTN (-3) 00266 00269 #define SOMBOK_UTF8_CHECK_NONE (0) 00270 #define SOMBOK_UTF8_CHECK_MALFORMED (1) 00271 #define SOMBOK_UTF8_CHECK_SURROGATE (2) 00272 #define SOMBOK_UTF8_CHECK_NONUNICODE (3) 00273 00274 /*** 00275 *** Public functions, global variables and macros. 00276 ***/ 00277 00278 extern void linebreak_charprop(linebreak_t *, unichar_t, 00279 propval_t *, propval_t *, propval_t *, 00280 propval_t *); 00281 00282 extern gcstring_t *gcstring_new(unistr_t *, linebreak_t *); 00283 extern gcstring_t *gcstring_new_from_utf8(char *, size_t, int, 00284 linebreak_t *); 00285 extern gcstring_t *gcstring_newcopy(unistr_t *, linebreak_t *); 00286 extern gcstring_t *gcstring_copy(gcstring_t *); 00287 extern void gcstring_destroy(gcstring_t *); 00288 extern gcstring_t *gcstring_append(gcstring_t *, gcstring_t *); 00289 extern size_t gcstring_columns(gcstring_t *); 00290 extern int gcstring_cmp(gcstring_t *, gcstring_t *); 00291 extern gcstring_t *gcstring_concat(gcstring_t *, gcstring_t *); 00292 extern gcchar_t *gcstring_next(gcstring_t *); 00293 extern void gcstring_setpos(gcstring_t *, int); 00294 extern void gcstring_shrink(gcstring_t *, int); 00295 extern gcstring_t *gcstring_substr(gcstring_t *, int, int); 00296 extern gcstring_t *gcstring_replace(gcstring_t *, int, int, gcstring_t *); 00297 00298 #define gcstring_eos(gcstr) \ 00299 ((gcstr)->gclen <= (gcstr)->pos) 00300 #define gcstring_getpos(gcstr) \ 00301 ((gcstr)->pos) 00302 00303 extern propval_t gcstring_lbclass(gcstring_t *, int); 00304 extern propval_t gcstring_lbclass_ext(gcstring_t *, int); 00305 00306 extern linebreak_t *linebreak_new(linebreak_ref_func_t); 00307 extern linebreak_t *linebreak_copy(linebreak_t *); 00308 extern linebreak_t *linebreak_incref(linebreak_t *); 00309 extern void linebreak_destroy(linebreak_t *); 00310 00311 extern void linebreak_set_newline(linebreak_t *, unistr_t *); 00312 extern void linebreak_set_stash(linebreak_t *, void *); 00313 extern void linebreak_set_format(linebreak_t *, linebreak_format_func_t, 00314 void *); 00315 extern void linebreak_add_prep(linebreak_t *, linebreak_prep_func_t, 00316 void *); 00317 extern void linebreak_set_sizing(linebreak_t *, linebreak_sizing_func_t, 00318 void *); 00319 extern void linebreak_set_urgent(linebreak_t *, linebreak_urgent_func_t, 00320 void *); 00321 extern void linebreak_set_user(linebreak_t *, linebreak_obs_prep_func_t, 00322 void *); 00323 extern void linebreak_reset(linebreak_t *); 00324 extern void linebreak_update_lbclass(linebreak_t *, unichar_t, propval_t); 00325 extern void linebreak_clear_lbclass(linebreak_t *); 00326 extern void linebreak_update_eawidth(linebreak_t *, unichar_t, propval_t); 00327 extern void linebreak_clear_eawidth(linebreak_t *); 00328 extern propval_t linebreak_search_lbclass(linebreak_t *, unichar_t); 00329 extern propval_t linebreak_search_eawidth(linebreak_t *, unichar_t); 00330 extern void linebreak_merge_lbclass(linebreak_t *, linebreak_t *); 00331 extern void linebreak_merge_eawidth(linebreak_t *, linebreak_t *); 00332 00333 extern propval_t linebreak_eawidth(linebreak_t *, unichar_t); /* obs. */ 00334 extern propval_t linebreak_get_lbrule(linebreak_t *, propval_t, propval_t); 00335 extern propval_t linebreak_lbclass(linebreak_t *, unichar_t); /* obs. */ 00336 00337 extern gcstring_t **linebreak_break(linebreak_t *, unistr_t *); 00338 extern gcstring_t **linebreak_break_fast(linebreak_t *, unistr_t *); 00339 extern gcstring_t **linebreak_break_from_utf8(linebreak_t *, char *, 00340 size_t, int); 00341 extern gcstring_t **linebreak_break_partial(linebreak_t *, unistr_t *); 00342 extern void linebreak_free_result(gcstring_t **, int); 00343 extern propval_t linebreak_lbrule(propval_t, propval_t); /* obs. */ 00344 00345 extern const char *linebreak_unicode_version; 00346 extern const char *linebreak_propvals_EA[]; 00347 extern const char *linebreak_propvals_LB[]; 00348 extern const char *linebreak_southeastasian_supported; 00349 extern void linebreak_southeastasian_flagbreak(gcstring_t *); 00350 00351 extern unistr_t *sombok_decode_utf8(unistr_t *, size_t, const char *, 00352 size_t, int); 00353 extern char *sombok_encode_utf8(char *, size_t *, size_t, unistr_t *); 00354 00355 /*** 00356 *** Built-in callbacks for linebreak_t. 00357 ***/ 00358 extern gcstring_t *linebreak_format_SIMPLE(linebreak_t *, 00359 linebreak_state_t, 00360 gcstring_t *); 00361 extern gcstring_t *linebreak_format_NEWLINE(linebreak_t *, 00362 linebreak_state_t, 00363 gcstring_t *); 00364 extern gcstring_t *linebreak_format_TRIM(linebreak_t *, linebreak_state_t, 00365 gcstring_t *); 00366 extern gcstring_t *linebreak_prep_URIBREAK(linebreak_t *, void *, 00367 unistr_t *, unistr_t *); 00368 extern double linebreak_sizing_UAX11(linebreak_t *, double, gcstring_t *, 00369 gcstring_t *, gcstring_t *); 00370 extern gcstring_t *linebreak_urgent_ABORT(linebreak_t *, gcstring_t *); 00371 extern gcstring_t *linebreak_urgent_FORCE(linebreak_t *, gcstring_t *); 00372 00373 #define _SOMBOK_H_ 00374 #endif /* _SOMBOK_H_ */ 00375 00376 #ifdef MALLOC_DEBUG 00377 #include "src/mymalloc.h" 00378 #endif /* MALLOC_DEBUG */