libucd.3 - pub/scm/libs/libucd/libucd - Git at Google

 .\" -----------------------------------------------------------------------
 .\"
 .\"   Copyright 2005 H. Peter Anvin - All Rights Reserved
 .\"
 .\"   Permission is hereby granted, free of charge, to any person
 .\"   obtaining a copy of this software and associated documentation
 .\"   files (the "Software"), to deal in the Software without
 .\"   restriction, including without limitation the rights to use,
 .\"   copy, modify, merge, publish, distribute, sublicense, and/or
 .\"   sell copies of the Software, and to permit persons to whom
 .\"   the Software is furnished to do so, subject to the following
 .\"   conditions:
 .\"
 .\"   The above copyright notice and this permission notice shall
 .\"   be included in all copies or substantial portions of the Software.
 .\"
 .\"   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 .\"   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 .\"   OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 .\"   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 .\"   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 .\"   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 .\"   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 .\"   OTHER DEALINGS IN THE SOFTWARE.
 .\"
 .\" -----------------------------------------------------------------------
 .TH LIBUCD 3 2005-12-29 libucd ""
 .SH NAME
 unicode_character_data, unicode_character_lookup, unicode_character_get,
 unicode_character_put, unicode_database_version \-
 query the Unicode Character Database
 .SH SYNOPSIS
 .nf
 .B #include <ucd.h>
 .sp
 .B const struct unicode_character_data *unicode_character_data(int32_t);
 .sp
 .B const struct unicode_character_data *unicode_character_lookup(const char *);
 .sp
 .B const struct unicode_character_data *unicode_character_get(const struct unicode_character_data *);
 .sp
 .B void unicode_character_put(const struct unicode_character_data *);
 .sp
 .B uint32_t unicode_database_version();
 .fi
 .SH DESCRIPTION
 These functions query the Unicode Character Database for attribute
 information about a certain character.  Attribute information is
 represented by a pointer to a reference-counted structure.  Any
 internal pointers in the structure should be assumed to point to
 memory protected by the same reference count.
 .PP
 \fBunicode_character_data()\fP returns the attributes for a character
 specified by Unicode index.  If the argument is outside the Unicode
 range (0 to 0x10ffff) it returns NULL with \fIerrno\fP set to EINVAL.
 .PP
 \fBunicode_character_lookup()\fP searches for a character by Unicode
 name (e.g. "LATIN CAPITAL LETTER A".)  If no character by that name is
 known, it returns NULL with \fIerrno\fP set to EINVAL.  Only exact
 matches are returned; if a case-insensitive matching is desired, the
 string should be upper-cased.
 .PP
 \fBunicode_character_get()\fP creates a new reference to an attribute
 structure, and returns the new reference.  This pointer may or may not
 be a pointer to the original structure.
 .PP
 \fBunicode_character_put()\fP unreferences an attribute structure and
 frees it if appropriate.
 .PP
 \fBunicode_character_version()\fP returns the version of the Unicode
 database from which the library was generated, in the format
 (major << 16)+(minor << 8)+(subminor).
 .PP
 The \fIunicode_character_data\fP structure is defined in <ucd.h> and
 contains at least the following fields:
 .sp
 .RS
 .nf
 .ne 4
 .ta 0n 4n 44n
 struct unicode_character_data {
 	int32_t					ucs;
 	uint16_t				size;
 	uint64_t				fl;
 	const char				*name;
 	int32_t					simple_uppercase;
 	int32_t					simple_lowercase;
 	int32_t					simple_titlecase;
 	uint8_t					numeric_value_num;
 	uint8_t					numeric_value_den;
 	uint8_t					numeric_value_exp;
 	uint8_t					age_ma, age_mi;
 	enum unicode_general_category		general_category;
 	enum unicode_block			block;
 	enum unicode_script			script;
 	enum unicode_joining_type		joining_type;
 	enum unicode_joining_group		joining_group;
 	enum unicode_east_asian_width		east_asian_width;
 	enum unicode_hangul_syllable_type	hangul_syllable_type;
 	enum unicode_numeric_type		numeric_type;
 	enum unicode_canonical_combining_class	canonical_combining_class;
 	enum unicode_bidi_class			bidi_class;
 	enum unicode_grapheme_cluster_break	grapheme_cluster_break;
 	enum unicode_sentence_break		sentence_break;
 	enum unicode_word_break			word_break;
 	enum unicode_line_break			line_break;
 };
 .ta
 .fi
 .RE
 .PP
 The members of the \fIunicode_character_data\fP structure are:
 .TP
 .B ucs
 The Unicode index of the character.
 .TP
 .B size
 The size of the structure, in bytes.  This can be used to determine
 the availability of a specific field if one is added in future
 versions.
 .TP
 .B fl
 A boolean combination of flags (UC_FL_), defined in ucd.h.
 .TP
 .B name
 The Unicode name of the character.
 .TP
 .B bidi_mirroring_glyph
 The Unicode string which corresponds to the mirror image of this
 character.  \fINot yet implemented.\fP
 .TP
 .B simple_uppercase
 The simple (single codepoint) uppercase mapping string for this character.
 .TP
 .B simple_lowercase
 The simple (single codepoint) lowercase mapping string for this character.
 .TP
 .B simple_titlecase
 The simple (single codepoint) titlecase mapping string for this character.
 .TP
 .B numeric_value_num
 .TP
 .B numeric_value_den
 .TP
 .B numeric_value_exp
 For a number, the numeric value is given as num/den * 10^exp.
 .TP
 .B age_ma
 .TP
 .B age_mi
 Major and minor Unicode version when this character was introduced.
 If this is a vacant codepoint, this has the value 0.0.
 .PP
 All enumerations are properties defined in the Unicode standard.  Most
 Unicode properties has both a long and a short form.  The
 corresponding strings can be obtained by calling the function
 .sp
 .B int unicode_property_names_\fIproperty\fP(enum
 unicode_\fIproperty\fP \fIvalue\fP, const char **\fIlongname\fP, const char **\fIshortname\fP);
 .sp
 where the first argument is the enumeration value, and the \fIlongname\fP
 and \fIshortname\fP arguments return pointers to the respective strings.
 .SH "RETURN VALUE"
 \fBunicode_character_data()\fP, \fBunicode_character_lookup()\fP, or
 \fBunicode_character_get()\fP return an attribute structure pointer on
 success, or NULL on failure.  In the case of failure, \fIerrno\fP is
 set to the appropriate error value (in the current implementation,
 either EINVAL or ENOMEM.)
 .PP
 \fBunicode_database_version()\fP returns the version of the underlying
 Unicode database, in the format (major << 16)+(minor << 8)+(subminor).
 .PP
 The \fBunicode_property_names\fP functions return zero on success, or
 nonzero if the enumeration value was out of range.
 .SH "BUGS"
 The fields related to bidirectional mirroring and non-simple case
 mappings are not yet populated.
 .PP
 There is no interface to the Unihan database.  This perhaps should be
 a separate library.
 .SH "SEE ALSO"
 The Unicode Standard,
 .IR http://www.unicode.org/ .
	.\" -----------------------------------------------------------------------
	.\"
	.\" Copyright 2005 H. Peter Anvin - All Rights Reserved
	.\"
	.\" Permission is hereby granted, free of charge, to any person
	.\" obtaining a copy of this software and associated documentation
	.\" files (the "Software"), to deal in the Software without
	.\" restriction, including without limitation the rights to use,
	.\" copy, modify, merge, publish, distribute, sublicense, and/or
	.\" sell copies of the Software, and to permit persons to whom
	.\" the Software is furnished to do so, subject to the following
	.\" conditions:
	.\"
	.\" The above copyright notice and this permission notice shall
	.\" be included in all copies or substantial portions of the Software.
	.\"
	.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	.\" EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
	.\" OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
	.\" NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
	.\" HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
	.\" WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
	.\" OTHER DEALINGS IN THE SOFTWARE.
	.\"
	.\" -----------------------------------------------------------------------
	.TH LIBUCD 3 2005-12-29 libucd ""
	.SH NAME
	unicode_character_data, unicode_character_lookup, unicode_character_get,
	unicode_character_put, unicode_database_version \-
	query the Unicode Character Database
	.SH SYNOPSIS
	.nf
	.B #include <ucd.h>
	.sp
	.B const struct unicode_character_data *unicode_character_data(int32_t);
	.sp
	.B const struct unicode_character_data unicode_character_lookup(const char );
	.sp
	.B const struct unicode_character_data unicode_character_get(const struct unicode_character_data );
	.sp
	.B void unicode_character_put(const struct unicode_character_data *);
	.sp
	.B uint32_t unicode_database_version();
	.fi
	.SH DESCRIPTION
	These functions query the Unicode Character Database for attribute
	information about a certain character. Attribute information is
	represented by a pointer to a reference-counted structure. Any
	internal pointers in the structure should be assumed to point to
	memory protected by the same reference count.
	.PP
	\fBunicode_character_data()\fP returns the attributes for a character
	specified by Unicode index. If the argument is outside the Unicode
	range (0 to 0x10ffff) it returns NULL with \fIerrno\fP set to EINVAL.
	.PP
	\fBunicode_character_lookup()\fP searches for a character by Unicode
	name (e.g. "LATIN CAPITAL LETTER A".) If no character by that name is
	known, it returns NULL with \fIerrno\fP set to EINVAL. Only exact
	matches are returned; if a case-insensitive matching is desired, the
	string should be upper-cased.
	.PP
	\fBunicode_character_get()\fP creates a new reference to an attribute
	structure, and returns the new reference. This pointer may or may not
	be a pointer to the original structure.
	.PP
	\fBunicode_character_put()\fP unreferences an attribute structure and
	frees it if appropriate.
	.PP
	\fBunicode_character_version()\fP returns the version of the Unicode
	database from which the library was generated, in the format
	(major << 16)+(minor << 8)+(subminor).
	.PP
	The \fIunicode_character_data\fP structure is defined in <ucd.h> and
	contains at least the following fields:
	.sp
	.RS
	.nf
	.ne 4
	.ta 0n 4n 44n
	struct unicode_character_data {
	int32_t ucs;
	uint16_t size;
	uint64_t fl;
	const char *name;
	int32_t simple_uppercase;
	int32_t simple_lowercase;
	int32_t simple_titlecase;
	uint8_t numeric_value_num;
	uint8_t numeric_value_den;
	uint8_t numeric_value_exp;
	uint8_t age_ma, age_mi;
	enum unicode_general_category general_category;
	enum unicode_block block;
	enum unicode_script script;
	enum unicode_joining_type joining_type;
	enum unicode_joining_group joining_group;
	enum unicode_east_asian_width east_asian_width;
	enum unicode_hangul_syllable_type hangul_syllable_type;
	enum unicode_numeric_type numeric_type;
	enum unicode_canonical_combining_class canonical_combining_class;
	enum unicode_bidi_class bidi_class;
	enum unicode_grapheme_cluster_break grapheme_cluster_break;
	enum unicode_sentence_break sentence_break;
	enum unicode_word_break word_break;
	enum unicode_line_break line_break;
	};
	.ta
	.fi
	.RE
	.PP
	The members of the \fIunicode_character_data\fP structure are:
	.TP
	.B ucs
	The Unicode index of the character.
	.TP
	.B size
	The size of the structure, in bytes. This can be used to determine
	the availability of a specific field if one is added in future
	versions.
	.TP
	.B fl
	A boolean combination of flags (UC_FL_), defined in ucd.h.
	.TP
	.B name
	The Unicode name of the character.
	.TP
	.B bidi_mirroring_glyph
	The Unicode string which corresponds to the mirror image of this
	character. \fINot yet implemented.\fP
	.TP
	.B simple_uppercase
	The simple (single codepoint) uppercase mapping string for this character.
	.TP
	.B simple_lowercase
	The simple (single codepoint) lowercase mapping string for this character.
	.TP
	.B simple_titlecase
	The simple (single codepoint) titlecase mapping string for this character.
	.TP
	.B numeric_value_num
	.TP
	.B numeric_value_den
	.TP
	.B numeric_value_exp
	For a number, the numeric value is given as num/den * 10^exp.
	.TP
	.B age_ma
	.TP
	.B age_mi
	Major and minor Unicode version when this character was introduced.
	If this is a vacant codepoint, this has the value 0.0.
	.PP
	All enumerations are properties defined in the Unicode standard. Most
	Unicode properties has both a long and a short form. The
	corresponding strings can be obtained by calling the function
	.sp
	.B int unicode_property_names_\fIproperty\fP(enum
	unicode_\fIproperty\fP \fIvalue\fP, const char \fIlongname\fP, const char \fIshortname\fP);
	.sp
	where the first argument is the enumeration value, and the \fIlongname\fP
	and \fIshortname\fP arguments return pointers to the respective strings.
	.SH "RETURN VALUE"
	\fBunicode_character_data()\fP, \fBunicode_character_lookup()\fP, or
	\fBunicode_character_get()\fP return an attribute structure pointer on
	success, or NULL on failure. In the case of failure, \fIerrno\fP is
	set to the appropriate error value (in the current implementation,
	either EINVAL or ENOMEM.)
	.PP
	\fBunicode_database_version()\fP returns the version of the underlying
	Unicode database, in the format (major << 16)+(minor << 8)+(subminor).
	.PP
	The \fBunicode_property_names\fP functions return zero on success, or
	nonzero if the enumeration value was out of range.
	.SH "BUGS"
	The fields related to bidirectional mirroring and non-simple case
	mappings are not yet populated.
	.PP
	There is no interface to the Unihan database. This perhaps should be
	a separate library.
	.SH "SEE ALSO"
	The Unicode Standard,
	.IR http://www.unicode.org/ .