| /* -*- c -*- ------------------------------------------------------------- * |
| * |
| * Copyright 2005 H. Peter Anvin - All Rights Reserved |
| * |
| * Permission is hereby granted, free of charge, to any person |
| * obtaining a copy of this software and associated documentation |
| * files (the "Software"), to deal in the Software without |
| * restriction, including without limitation the rights to use, |
| * copy, modify, merge, publish, distribute, sublicense, and/or |
| * sell copies of the Software, and to permit persons to whom |
| * the Software is furnished to do so, subject to the following |
| * conditions: |
| * |
| * The above copyright notice and this permission notice shall |
| * be included in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
| * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
| * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| * OTHER DEALINGS IN THE SOFTWARE. |
| * |
| * ----------------------------------------------------------------------- */ |
| |
| /* |
| * ucd.h |
| * |
| * Simple interface to the Unicode Character Database |
| */ |
| |
| #ifndef UCD_H |
| #define UCD_H |
| |
| #include <stdint.h> |
| |
| #define LIBUCD_THREAD_SUPPORT |
| |
| #ifdef LIBUCD_THREAD_SUPPORT |
| # include <pthread.h> |
| #endif |
| |
| ENUMS; |
| |
| #define UC_FLAG(x) (UINT64_C(1) << (x)) |
| |
| #define UC_FL_COMPOSITION_EXCLUSION UC_FLAG(0) |
| #define UC_FL_ALPHABETIC UC_FLAG(1) |
| #define UC_FL_DEFAULT_IGNORABLE_CODE_POINT UC_FLAG(2) |
| #define UC_FL_LOWERCASE UC_FLAG(3) |
| #define UC_FL_GRAPHEME_BASE UC_FLAG(4) |
| #define UC_FL_GRAPHEME_EXTEND UC_FLAG(5) |
| #define UC_FL_ID_START UC_FLAG(6) |
| #define UC_FL_ID_CONTINUE UC_FLAG(7) |
| #define UC_FL_MATH UC_FLAG(8) |
| #define UC_FL_UPPERCASE UC_FLAG(9) |
| #define UC_FL_XID_START UC_FLAG(10) |
| #define UC_FL_XID_CONTINUE UC_FLAG(11) |
| #define UC_FL_HEX_DIGIT UC_FLAG(12) |
| #define UC_FL_BIDI_CONTROL UC_FLAG(13) |
| #define UC_FL_DASH UC_FLAG(14) |
| #define UC_FL_DEPRECATED UC_FLAG(15) |
| #define UC_FL_DIACRITIC UC_FLAG(16) |
| #define UC_FL_EXTENDER UC_FLAG(17) |
| #define UC_FL_GRAPHEME_LINK UC_FLAG(18) |
| #define UC_FL_IDEOGRAPHIC UC_FLAG(19) |
| #define UC_FL_IDS_BINARY_OPERATOR UC_FLAG(20) |
| #define UC_FL_IDS_TRINARY_OPERATOR UC_FLAG(21) |
| #define UC_FL_JOIN_CONTROL UC_FLAG(22) |
| #define UC_FL_LOGICAL_ORDER_EXCEPTION UC_FLAG(23) |
| #define UC_FL_NONCHARACTER_CODE_POINT UC_FLAG(24) |
| #define UC_FL_PATTERN_SYNTAX UC_FLAG(25) |
| #define UC_FL_PATTERN_WHITE_SPACE UC_FLAG(26) |
| #define UC_FL_QUOTATION_MARK UC_FLAG(27) |
| #define UC_FL_RADICAL UC_FLAG(28) |
| #define UC_FL_SOFT_DOTTED UC_FLAG(29) |
| #define UC_FL_STERM UC_FLAG(30) |
| #define UC_FL_TERMINAL_PUNCTUATION UC_FLAG(31) |
| #define UC_FL_UNIFIED_IDEOGRAPH UC_FLAG(32) |
| #define UC_FL_VARIATION_SELECTOR UC_FLAG(33) |
| #define UC_FL_WHITE_SPACE UC_FLAG(34) |
| #define UC_FL_BIDI_MIRRORED UC_FLAG(35) |
| |
| struct unicode_character_data { |
| int32_t ucs; /* Actual codepoint */ |
| uint16_t size; /* Size of this structure */ |
| uint16_t alloc_size; /* Allocation size */ |
| uint64_t fl; /* Flags */ |
| const char *name; |
| const char *bidi_mirroring_glyph; |
| const char *uppercase_mapping; |
| const char *lowercase_mapping; |
| const char *titlecase_mapping; |
| int32_t simple_uppercase; |
| int32_t simple_lowercase; |
| int32_t simple_titlecase; |
| /* Numeric value = num/den * 10^exp */ |
| uint8_t numeric_value_num; |
| uint8_t numeric_value_den; |
| uint8_t numeric_value_exp; |
| uint8_t age_ma, age_mi; |
| enum unicode_general_category general_category; |
| enum unicode_block block; |
| enum unicode_script script; |
| enum unicode_joining_type joining_type; |
| enum unicode_joining_group joining_group; |
| enum unicode_east_asian_width east_asian_width; |
| enum unicode_hangul_syllable_type hangul_syllable_type; |
| enum unicode_numeric_type numeric_type; |
| enum unicode_canonical_combining_class canonical_combining_class; |
| enum unicode_bidi_class bidi_class; |
| enum unicode_grapheme_cluster_break grapheme_cluster_break; |
| enum unicode_sentence_break sentence_break; |
| enum unicode_word_break word_break; |
| enum unicode_line_break line_break; |
| }; |
| |
| const struct unicode_character_data *unicode_character_data(int32_t); |
| const struct unicode_character_data *unicode_character_get(const struct unicode_character_data *); |
| void unicode_character_put(const struct unicode_character_data *); |
| const struct unicode_character_data *unicode_character_lookup(const char *); |
| int unicode_database_version(void); |
| |
| #endif /* UCD_H */ |