blob: 3f557b013c15a58237b00675a4a047eecd2f1f14 [file] [log] [blame]
/* -*- c -*- ------------------------------------------------------------- *
*
* Copyright 2005 H. Peter Anvin - All Rights Reserved
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall
* be included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* ----------------------------------------------------------------------- */
/*
* ucd.h
*
* Simple interface to the Unicode Character Database
*/
#ifndef UCD_H
#define UCD_H
#include <stdint.h>
#define LIBUCD_THREAD_SUPPORT
#ifdef LIBUCD_THREAD_SUPPORT
# include <pthread.h>
#endif
ENUMS;
#define UC_FLAG(x) (UINT64_C(1) << (x))
#define UC_FL_COMPOSITION_EXCLUSION UC_FLAG(0)
#define UC_FL_ALPHABETIC UC_FLAG(1)
#define UC_FL_DEFAULT_IGNORABLE_CODE_POINT UC_FLAG(2)
#define UC_FL_LOWERCASE UC_FLAG(3)
#define UC_FL_GRAPHEME_BASE UC_FLAG(4)
#define UC_FL_GRAPHEME_EXTEND UC_FLAG(5)
#define UC_FL_ID_START UC_FLAG(6)
#define UC_FL_ID_CONTINUE UC_FLAG(7)
#define UC_FL_MATH UC_FLAG(8)
#define UC_FL_UPPERCASE UC_FLAG(9)
#define UC_FL_XID_START UC_FLAG(10)
#define UC_FL_XID_CONTINUE UC_FLAG(11)
#define UC_FL_HEX_DIGIT UC_FLAG(12)
#define UC_FL_BIDI_CONTROL UC_FLAG(13)
#define UC_FL_DASH UC_FLAG(14)
#define UC_FL_DEPRECATED UC_FLAG(15)
#define UC_FL_DIACRITIC UC_FLAG(16)
#define UC_FL_EXTENDER UC_FLAG(17)
#define UC_FL_GRAPHEME_LINK UC_FLAG(18)
#define UC_FL_IDEOGRAPHIC UC_FLAG(19)
#define UC_FL_IDS_BINARY_OPERATOR UC_FLAG(20)
#define UC_FL_IDS_TRINARY_OPERATOR UC_FLAG(21)
#define UC_FL_JOIN_CONTROL UC_FLAG(22)
#define UC_FL_LOGICAL_ORDER_EXCEPTION UC_FLAG(23)
#define UC_FL_NONCHARACTER_CODE_POINT UC_FLAG(24)
#define UC_FL_PATTERN_SYNTAX UC_FLAG(25)
#define UC_FL_PATTERN_WHITE_SPACE UC_FLAG(26)
#define UC_FL_QUOTATION_MARK UC_FLAG(27)
#define UC_FL_RADICAL UC_FLAG(28)
#define UC_FL_SOFT_DOTTED UC_FLAG(29)
#define UC_FL_STERM UC_FLAG(30)
#define UC_FL_TERMINAL_PUNCTUATION UC_FLAG(31)
#define UC_FL_UNIFIED_IDEOGRAPH UC_FLAG(32)
#define UC_FL_VARIATION_SELECTOR UC_FLAG(33)
#define UC_FL_WHITE_SPACE UC_FLAG(34)
#define UC_FL_BIDI_MIRRORED UC_FLAG(35)
struct unicode_character_data {
int32_t ucs; /* Actual codepoint */
uint16_t size; /* Size of this structure */
uint16_t alloc_size; /* Allocation size */
uint64_t fl; /* Flags */
const char *name;
const char *bidi_mirroring_glyph;
const char *uppercase_mapping;
const char *lowercase_mapping;
const char *titlecase_mapping;
int32_t simple_uppercase;
int32_t simple_lowercase;
int32_t simple_titlecase;
/* Numeric value = num/den * 10^exp */
uint8_t numeric_value_num;
uint8_t numeric_value_den;
uint8_t numeric_value_exp;
uint8_t age_ma, age_mi;
enum unicode_general_category general_category;
enum unicode_block block;
enum unicode_script script;
enum unicode_joining_type joining_type;
enum unicode_joining_group joining_group;
enum unicode_east_asian_width east_asian_width;
enum unicode_hangul_syllable_type hangul_syllable_type;
enum unicode_numeric_type numeric_type;
enum unicode_canonical_combining_class canonical_combining_class;
enum unicode_bidi_class bidi_class;
enum unicode_grapheme_cluster_break grapheme_cluster_break;
enum unicode_sentence_break sentence_break;
enum unicode_word_break word_break;
enum unicode_line_break line_break;
};
const struct unicode_character_data *unicode_character_data(int32_t);
const struct unicode_character_data *unicode_character_get(const struct unicode_character_data *);
void unicode_character_put(const struct unicode_character_data *);
const struct unicode_character_data *unicode_character_lookup(const char *);
int unicode_database_version(void);
#endif /* UCD_H */