| .\" Copyright (c) Bruno Haible <haible@clisp.cons.org> |
| .\" and Copyright 2014 Michael Kerrisk <mtk.manpages@gmail.com> |
| .\" |
| .\" %%%LICENSE_START(GPLv2+_DOC_ONEPARA) |
| .\" This is free documentation; you can redistribute it and/or |
| .\" modify it under the terms of the GNU General Public License as |
| .\" published by the Free Software Foundation; either version 2 of |
| .\" the License, or (at your option) any later version. |
| .\" %%%LICENSE_END |
| .\" |
| .\" References consulted: |
| .\" GNU glibc-2 source code and manual |
| .\" Dinkumware C library reference http://www.dinkumware.com/ |
| .\" OpenGroup's Single UNIX specification http://www.UNIX-systems.org/online.html |
| .\" ISO/IEC 9899:1999 |
| .\" |
| .TH MBSTOWCS 3 2021-03-22 "GNU" "Linux Programmer's Manual" |
| .SH NAME |
| mbstowcs \- convert a multibyte string to a wide-character string |
| .SH SYNOPSIS |
| .nf |
| .B #include <stdlib.h> |
| .PP |
| .BI "size_t mbstowcs(wchar_t *restrict " dest ", const char *restrict " src , |
| .BI " size_t " n ); |
| .fi |
| .SH DESCRIPTION |
| If |
| .I dest |
| is not NULL, |
| the |
| .BR mbstowcs () |
| function converts the |
| multibyte string |
| .I src |
| to a wide-character string starting at |
| .IR dest . |
| At most |
| .I n |
| wide characters are written to |
| .IR dest . |
| The sequence of characters in the string |
| .I src |
| shall begin in the initial shift state. |
| The conversion can stop for three reasons: |
| .IP 1. 3 |
| An invalid multibyte sequence has been encountered. |
| In this case, |
| .I (size_t)\ \-1 |
| is returned. |
| .IP 2. |
| .I n |
| non-L\(aq\e0\(aq wide characters have been stored at |
| .IR dest . |
| In this case, the number of wide characters written to |
| .I dest |
| is returned, but the |
| shift state at this point is lost. |
| .IP 3. |
| The multibyte string has been completely converted, including the |
| terminating null character (\(aq\e0\(aq). |
| In this case, the number of wide characters written to |
| .IR dest , |
| excluding the terminating null wide character, is returned. |
| .PP |
| The programmer must ensure that there is room for at least |
| .I n |
| wide |
| characters at |
| .IR dest . |
| .PP |
| If |
| .IR dest |
| is NULL, |
| .I n |
| is ignored, and the conversion proceeds as |
| above, except that the converted wide characters are not written out to memory, |
| and that no length limit exists. |
| .PP |
| In order to avoid the case 2 above, the programmer should make sure |
| .I n |
| is |
| greater than or equal to |
| .IR "mbstowcs(NULL,src,0)+1" . |
| .SH RETURN VALUE |
| The |
| .BR mbstowcs () |
| function returns the number of wide characters that make |
| up the converted part of the wide-character string, not including the |
| terminating null wide character. |
| If an invalid multibyte sequence was |
| encountered, |
| .I (size_t)\ \-1 |
| is returned. |
| .SH ATTRIBUTES |
| For an explanation of the terms used in this section, see |
| .BR attributes (7). |
| .ad l |
| .nh |
| .TS |
| allbox; |
| lbx lb lb |
| l l l. |
| Interface Attribute Value |
| T{ |
| .BR mbstowcs () |
| T} Thread safety MT-Safe |
| .TE |
| .hy |
| .ad |
| .sp 1 |
| .SH CONFORMING TO |
| POSIX.1-2001, POSIX.1-2008, C99. |
| .SH NOTES |
| The behavior of |
| .BR mbstowcs () |
| depends on the |
| .B LC_CTYPE |
| category of the |
| current locale. |
| .PP |
| The function |
| .BR mbsrtowcs (3) |
| provides a better interface to the same |
| functionality. |
| .SH EXAMPLES |
| The program below illustrates the use of |
| .BR mbstowcs (), |
| as well as some of the wide character classification functions. |
| An example run is the following: |
| .PP |
| .in +4n |
| .EX |
| $ ./t_mbstowcs de_DE.UTF\-8 Grüße! |
| Length of source string (excluding terminator): |
| 8 bytes |
| 6 multibyte characters |
| |
| Wide character string is: Grüße! (6 characters) |
| G alpha upper |
| r alpha lower |
| ü alpha lower |
| ß alpha lower |
| e alpha lower |
| ! !alpha |
| .EE |
| .in |
| .SS Program source |
| \& |
| .EX |
| #include <wctype.h> |
| #include <locale.h> |
| #include <wchar.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| |
| int |
| main(int argc, char *argv[]) |
| { |
| size_t mbslen; /* Number of multibyte characters in source */ |
| wchar_t *wcs; /* Pointer to converted wide character string */ |
| |
| if (argc < 3) { |
| fprintf(stderr, "Usage: %s <locale> <string>\en", argv[0]); |
| exit(EXIT_FAILURE); |
| } |
| |
| /* Apply the specified locale. */ |
| |
| if (setlocale(LC_ALL, argv[1]) == NULL) { |
| perror("setlocale"); |
| exit(EXIT_FAILURE); |
| } |
| |
| /* Calculate the length required to hold argv[2] converted to |
| a wide character string. */ |
| |
| mbslen = mbstowcs(NULL, argv[2], 0); |
| if (mbslen == (size_t) \-1) { |
| perror("mbstowcs"); |
| exit(EXIT_FAILURE); |
| } |
| |
| /* Describe the source string to the user. */ |
| |
| printf("Length of source string (excluding terminator):\en"); |
| printf(" %zu bytes\en", strlen(argv[2])); |
| printf(" %zu multibyte characters\en\en", mbslen); |
| |
| /* Allocate wide character string of the desired size. Add 1 |
| to allow for terminating null wide character (L\(aq\e0\(aq). */ |
| |
| wcs = calloc(mbslen + 1, sizeof(*wcs)); |
| if (wcs == NULL) { |
| perror("calloc"); |
| exit(EXIT_FAILURE); |
| } |
| |
| /* Convert the multibyte character string in argv[2] to a |
| wide character string. */ |
| |
| if (mbstowcs(wcs, argv[2], mbslen + 1) == (size_t) \-1) { |
| perror("mbstowcs"); |
| exit(EXIT_FAILURE); |
| } |
| |
| printf("Wide character string is: %ls (%zu characters)\en", |
| wcs, mbslen); |
| |
| /* Now do some inspection of the classes of the characters in |
| the wide character string. */ |
| |
| for (wchar_t *wp = wcs; *wp != 0; wp++) { |
| printf(" %lc ", (wint_t) *wp); |
| |
| if (!iswalpha(*wp)) |
| printf("!"); |
| printf("alpha "); |
| |
| if (iswalpha(*wp)) { |
| if (iswupper(*wp)) |
| printf("upper "); |
| |
| if (iswlower(*wp)) |
| printf("lower "); |
| } |
| |
| putchar(\(aq\en\(aq); |
| } |
| |
| exit(EXIT_SUCCESS); |
| } |
| .EE |
| .SH SEE ALSO |
| .BR mblen (3), |
| .BR mbsrtowcs (3), |
| .BR mbtowc (3), |
| .BR wcstombs (3), |
| .BR wctomb (3) |