| /* |
| * |
| * Embedded Linux library |
| * |
| * Copyright (C) 2011-2014 Intel Corporation. All rights reserved. |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with this library; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| * |
| */ |
| |
| #ifdef HAVE_CONFIG_H |
| #include <config.h> |
| #endif |
| |
| #include <assert.h> |
| |
| #include <ell/ell.h> |
| |
| enum utf8_validate_type { |
| UTF8_VALIDATE_TYPE_VALID, |
| UTF8_VALIDATE_TYPE_INCOMPLETE, |
| UTF8_VALIDATE_TYPE_NOTUNICODE, |
| UTF8_VALIDATE_TYPE_OVERLONG, |
| UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| struct utf8_validate_test { |
| const char *utf8; |
| size_t utf8_len; |
| enum utf8_validate_type type; |
| const wchar_t *ucs4; |
| size_t ucs4_len; |
| }; |
| |
| static const char utf8_1[] = { |
| 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf, 0x83, 0xce, |
| 0xbc, 0xce, 0xb5 }; |
| static const wchar_t ucs4_1[] = { 0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5 }; |
| |
| static struct utf8_validate_test utf8_validate_test1 = { |
| .utf8 = utf8_1, |
| .utf8_len = 11, |
| .type = UTF8_VALIDATE_TYPE_VALID, |
| .ucs4 = ucs4_1, |
| .ucs4_len = 5, |
| }; |
| |
| static const char utf8_2[] = { 0xc2, 0x80 }; |
| static const wchar_t ucs4_2[] = { 0x0080 }; |
| |
| static struct utf8_validate_test utf8_validate_test2 = { |
| .utf8 = utf8_2, |
| .utf8_len = 2, |
| .type = UTF8_VALIDATE_TYPE_VALID, |
| .ucs4 = ucs4_2, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_3[] = { 0xe0, 0xa0, 0x80 }; |
| static const wchar_t ucs4_3[] = { 0x0800 }; |
| |
| static struct utf8_validate_test utf8_validate_test3 = { |
| .utf8 = utf8_3, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_VALID, |
| .ucs4 = ucs4_3, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_4[] = { 0xf0, 0x90, 0x80, 0x80 }; |
| static const wchar_t ucs4_4[] = { 0x00010000 }; |
| |
| static struct utf8_validate_test utf8_validate_test4 = { |
| .utf8 = utf8_4, |
| .utf8_len = 4, |
| .type = UTF8_VALIDATE_TYPE_VALID, |
| .ucs4 = ucs4_4, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_5[] = { 0xf8, 0x88, 0x80, 0x80, 0x80 }; |
| static const wchar_t ucs4_5[] = { 0x00200000 }; |
| |
| static struct utf8_validate_test utf8_validate_test5 = { |
| .utf8 = utf8_5, |
| .utf8_len = 5, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_5, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_6[] = { 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80 }; |
| static const wchar_t ucs4_6[] = { 0x04000000 }; |
| |
| static struct utf8_validate_test utf8_validate_test6 = { |
| .utf8 = utf8_6, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_6, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_7[] = { 0x7f }; |
| static const wchar_t ucs4_7[] = { 0x0000007f }; |
| |
| static struct utf8_validate_test utf8_validate_test7 = { |
| .utf8 = utf8_7, |
| .utf8_len = 1, |
| .type = UTF8_VALIDATE_TYPE_VALID, |
| .ucs4 = ucs4_7, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_8[] = { 0xdf, 0xbf }; |
| static const wchar_t ucs4_8[] = { 0x000007ff }; |
| |
| static struct utf8_validate_test utf8_validate_test8 = { |
| .utf8 = utf8_8, |
| .utf8_len = 2, |
| .type = UTF8_VALIDATE_TYPE_VALID, |
| .ucs4 = ucs4_8, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_9[] = { 0xef, 0xbf, 0xbf }; |
| static const wchar_t ucs4_9[] = { 0x0000ffff }; |
| |
| static struct utf8_validate_test utf8_validate_test9 = { |
| .utf8 = utf8_9, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_9, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_10[] = { 0xf7, 0xbf, 0xbf, 0xbf }; |
| static const wchar_t ucs4_10[] = { 0x001fffff }; |
| |
| static struct utf8_validate_test utf8_validate_test10 = { |
| .utf8 = utf8_10, |
| .utf8_len = 4, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_10, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_11[] = { 0xfb, 0xbf, 0xbf, 0xbf, 0xbf }; |
| static const wchar_t ucs4_11[] = { 0x03ffffff }; |
| |
| static struct utf8_validate_test utf8_validate_test11 = { |
| .utf8 = utf8_11, |
| .utf8_len = 5, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_11, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_12[] = { 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf }; |
| static const wchar_t ucs4_12[] = { 0x7fffffff }; |
| |
| static struct utf8_validate_test utf8_validate_test12 = { |
| .utf8 = utf8_12, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_12, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_13[] = { 0xed, 0x9f, 0xbf }; |
| static const wchar_t ucs4_13[] = { 0xd7ff }; |
| |
| static struct utf8_validate_test utf8_validate_test13 = { |
| .utf8 = utf8_13, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_VALID, |
| .ucs4 = ucs4_13, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_14[] = { 0xee, 0x80, 0x80 }; |
| static const wchar_t ucs4_14[] = { 0xe000 }; |
| |
| static struct utf8_validate_test utf8_validate_test14 = { |
| .utf8 = utf8_14, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_VALID, |
| .ucs4 = ucs4_14, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_15[] = { 0xef, 0xbf, 0xbd }; |
| static const wchar_t ucs4_15[] = { 0xfffd }; |
| |
| static struct utf8_validate_test utf8_validate_test15 = { |
| .utf8 = utf8_15, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_VALID, |
| .ucs4 = ucs4_15, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_16[] = { 0xf4, 0x8f, 0xbf, 0xbd }; |
| static const wchar_t ucs4_16[] = { 0x0010fffd }; |
| |
| static struct utf8_validate_test utf8_validate_test16 = { |
| .utf8 = utf8_16, |
| .utf8_len = 4, |
| .type = UTF8_VALIDATE_TYPE_VALID, |
| .ucs4 = ucs4_16, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_17[] = { 0xf4, 0x8f, 0xbf, 0xbf }; |
| static const wchar_t ucs4_17[] = { 0x0010ffff }; |
| |
| static struct utf8_validate_test utf8_validate_test17 = { |
| .utf8 = utf8_17, |
| .utf8_len = 4, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_17, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_18[] = { 0xf4, 0x90, 0x80, 0x80 }; |
| static const wchar_t ucs4_18[] = { 0x00110000 }; |
| |
| static struct utf8_validate_test utf8_validate_test18 = { |
| .utf8 = utf8_18, |
| .utf8_len = 4, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_18, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_19[] = { 0x80 }; |
| |
| static struct utf8_validate_test utf8_validate_test19 = { |
| .utf8 = utf8_19, |
| .utf8_len = 1, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_20[] = { 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test20 = { |
| .utf8 = utf8_20, |
| .utf8_len = 1, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_21[] = { 0x80, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test21 = { |
| .utf8 = utf8_21, |
| .utf8_len = 2, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_22[] = { 0x80, 0xbf, 0x80 }; |
| |
| static struct utf8_validate_test utf8_validate_test22 = { |
| .utf8 = utf8_22, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_23[] = { 0x80, 0xbf, 0x80, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test23 = { |
| .utf8 = utf8_23, |
| .utf8_len = 4, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_24[] = { 0x80, 0xbf, 0x80, 0xbf, 0x80 }; |
| |
| static struct utf8_validate_test utf8_validate_test24 = { |
| .utf8 = utf8_24, |
| .utf8_len = 5, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_25[] = { 0x80, 0xbf, 0x80, 0xbf, 0x80, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test25 = { |
| .utf8 = utf8_25, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_26[] = { 0x80, 0xbf, 0x80, 0xbf, 0x80, 0xbf, 0x80 }; |
| |
| static struct utf8_validate_test utf8_validate_test26 = { |
| .utf8 = utf8_26, |
| .utf8_len = 7, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_27[] = { |
| 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, |
| 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, |
| 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, |
| 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, |
| 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test27 = { |
| .utf8 = utf8_27, |
| .utf8_len = 63, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_28[] = { |
| 0xc0, 0x20, 0xc1, 0x20, 0xc2, 0x20, 0xc3, 0x20, |
| 0xc4, 0x20, 0xc5, 0x20, 0xc6, 0x20, 0xc7, 0x20, |
| 0xc8, 0x20, 0xc9, 0x20, 0xca, 0x20, 0xcb, 0x20, |
| 0xcc, 0x20, 0xcd, 0x20, 0xce, 0x20, 0xcf, 0x20, |
| 0xd0, 0x20, 0xd1, 0x20, 0xd2, 0x20, 0xd3, 0x20, |
| 0xd4, 0x20, 0xd5, 0x20, 0xd6, 0x20, 0xd7, 0x20, |
| 0xd8, 0x20, 0xd9, 0x20, 0xda, 0x20, 0xdb, 0x20, |
| 0xdc, 0x20, 0xdd, 0x20, 0xde, 0x20, 0xdf, 0x20 }; |
| |
| static struct utf8_validate_test utf8_validate_test28 = { |
| .utf8 = utf8_28, |
| .utf8_len = 64, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_29[] = { |
| 0xe0, 0x20, 0xe1, 0x20, 0xe2, 0x20, 0xe3, 0x20, |
| 0xe4, 0x20, 0xe5, 0x20, 0xe6, 0x20, 0xe7, 0x20, |
| 0xe8, 0x20, 0xe9, 0x20, 0xea, 0x20, 0xeb, 0x20, |
| 0xec, 0x20, 0xed, 0x20, 0xee, 0x20, 0xef, 0x20 }; |
| |
| static struct utf8_validate_test utf8_validate_test29 = { |
| .utf8 = utf8_29, |
| .utf8_len = 32, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_30[] = { |
| 0xf0, 0x20, 0xf1, 0x20, 0xf2, 0x20, 0xf3, 0x20, |
| 0xf4, 0x20, 0xf5, 0x20, 0xf6, 0x20, 0xf7, 0x20 }; |
| |
| static struct utf8_validate_test utf8_validate_test30 = { |
| .utf8 = utf8_30, |
| .utf8_len = 16, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_31[] = { |
| 0xf8, 0x20, 0xf9, 0x20, 0xfa, 0x20, 0xfb, 0x20 }; |
| |
| static struct utf8_validate_test utf8_validate_test31 = { |
| .utf8 = utf8_31, |
| .utf8_len = 8, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_32[] = { 0xfc, 0x20, 0xfd, 0x20 }; |
| |
| static struct utf8_validate_test utf8_validate_test32 = { |
| .utf8 = utf8_32, |
| .utf8_len = 4, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_33[] = { 0xc0 }; |
| |
| static struct utf8_validate_test utf8_validate_test33 = { |
| .utf8 = utf8_33, |
| .utf8_len = 1, |
| .type = UTF8_VALIDATE_TYPE_INCOMPLETE, |
| }; |
| |
| static const char utf8_34[] = { 0xe0, 0x80 }; |
| |
| static struct utf8_validate_test utf8_validate_test34 = { |
| .utf8 = utf8_34, |
| .utf8_len = 2, |
| .type = UTF8_VALIDATE_TYPE_INCOMPLETE, |
| }; |
| |
| static const char utf8_35[] = { 0xf0, 0x80, 0x80 }; |
| |
| static struct utf8_validate_test utf8_validate_test35 = { |
| .utf8 = utf8_35, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_INCOMPLETE, |
| }; |
| |
| static const char utf8_36[] = { 0xf8, 0x80, 0x80, 0x80 }; |
| |
| static struct utf8_validate_test utf8_validate_test36 = { |
| .utf8 = utf8_36, |
| .utf8_len = 4, |
| .type = UTF8_VALIDATE_TYPE_INCOMPLETE, |
| }; |
| |
| static const char utf8_37[] = { 0xfc, 0x80, 0x80, 0x80, 0x80 }; |
| |
| static struct utf8_validate_test utf8_validate_test37 = { |
| .utf8 = utf8_37, |
| .utf8_len = 5, |
| .type = UTF8_VALIDATE_TYPE_INCOMPLETE, |
| }; |
| |
| static const char utf8_38[] = { 0xdf }; |
| |
| static struct utf8_validate_test utf8_validate_test38 = { |
| .utf8 = utf8_38, |
| .utf8_len = 1, |
| .type = UTF8_VALIDATE_TYPE_INCOMPLETE, |
| }; |
| |
| static const char utf8_39[] = { 0xef, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test39 = { |
| .utf8 = utf8_39, |
| .utf8_len = 2, |
| .type = UTF8_VALIDATE_TYPE_INCOMPLETE, |
| }; |
| |
| static const char utf8_40[] = { 0xf7, 0xbf, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test40 = { |
| .utf8 = utf8_40, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_INCOMPLETE, |
| }; |
| |
| static const char utf8_41[] = { 0xfb, 0xbf, 0xbf, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test41 = { |
| .utf8 = utf8_41, |
| .utf8_len = 4, |
| .type = UTF8_VALIDATE_TYPE_INCOMPLETE, |
| }; |
| |
| static const char utf8_42[] = { 0xfd, 0xbf, 0xbf, 0xbf, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test42 = { |
| .utf8 = utf8_42, |
| .utf8_len = 5, |
| .type = UTF8_VALIDATE_TYPE_INCOMPLETE, |
| }; |
| |
| static const char utf8_43[] = { |
| 0xc0, 0xe0, 0x80, 0xf0, 0x80, 0x80, 0xf8, 0x80, |
| 0x80, 0x80, 0xfc, 0x80, 0x80, 0x80, 0x80, 0xdf, |
| 0xef, 0xbf, 0xf7, 0xbf, 0xbf, 0xfb, 0xbf, 0xbf, |
| 0xbf, 0xfd, 0xbf, 0xbf, 0xbf, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test43 = { |
| .utf8 = utf8_43, |
| .utf8_len = 30, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_44[] = { 0xfe }; |
| |
| static struct utf8_validate_test utf8_validate_test44 = { |
| .utf8 = utf8_44, |
| .utf8_len = 1, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_45[] = { 0xff }; |
| |
| static struct utf8_validate_test utf8_validate_test45 = { |
| .utf8 = utf8_45, |
| .utf8_len = 1, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_46[] = { 0xfe, 0xfe, 0xff, 0xff }; |
| |
| static struct utf8_validate_test utf8_validate_test46 = { |
| .utf8 = utf8_46, |
| .utf8_len = 4, |
| .type = UTF8_VALIDATE_TYPE_MALFORMED, |
| }; |
| |
| static const char utf8_47[] = { 0xc0, 0xaf }; |
| |
| static struct utf8_validate_test utf8_validate_test47 = { |
| .utf8 = utf8_47, |
| .utf8_len = 2, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_48[] = { 0xe0, 0x80, 0xaf }; |
| |
| static struct utf8_validate_test utf8_validate_test48 = { |
| .utf8 = utf8_48, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_49[] = { 0xf0, 0x80, 0x80, 0xaf }; |
| |
| static struct utf8_validate_test utf8_validate_test49 = { |
| .utf8 = utf8_49, |
| .utf8_len = 4, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_50[] = { 0xf8, 0x80, 0x80, 0x80, 0xaf }; |
| |
| static struct utf8_validate_test utf8_validate_test50 = { |
| .utf8 = utf8_50, |
| .utf8_len = 5, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_51[] = { 0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf }; |
| |
| static struct utf8_validate_test utf8_validate_test51 = { |
| .utf8 = utf8_51, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_52[] = { 0xc1, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test52 = { |
| .utf8 = utf8_52, |
| .utf8_len = 2, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_53[] = { 0xe0, 0x9f, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test53 = { |
| .utf8 = utf8_53, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_54[] = { 0xf0, 0x8f, 0xbf, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test54 = { |
| .utf8 = utf8_54, |
| .utf8_len = 4, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_55[] = { 0xf8, 0x87, 0xbf, 0xbf, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test55 = { |
| .utf8 = utf8_55, |
| .utf8_len = 5, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_56[] = { 0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf }; |
| |
| static struct utf8_validate_test utf8_validate_test56 = { |
| .utf8 = utf8_56, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_57[] = { 0xc0, 0x80 }; |
| |
| static struct utf8_validate_test utf8_validate_test57 = { |
| .utf8 = utf8_57, |
| .utf8_len = 2, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_58[] = { 0xe0, 0x80, 0x80 }; |
| |
| static struct utf8_validate_test utf8_validate_test58 = { |
| .utf8 = utf8_58, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_59[] = { 0xf0, 0x80, 0x80, 0x80 }; |
| |
| static struct utf8_validate_test utf8_validate_test59 = { |
| .utf8 = utf8_59, |
| .utf8_len = 4, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_60[] = { 0xf8, 0x80, 0x80, 0x80, 0x80 }; |
| |
| static struct utf8_validate_test utf8_validate_test60 = { |
| .utf8 = utf8_60, |
| .utf8_len = 5, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_61[] = { 0xfc, 0x80, 0x80, 0x80, 0x80, 0x80 }; |
| |
| static struct utf8_validate_test utf8_validate_test61 = { |
| .utf8 = utf8_61, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_OVERLONG, |
| }; |
| |
| static const char utf8_62[] = { 0xed, 0xa0, 0x80 }; |
| static const wchar_t ucs4_62[] = { 0xd800 }; |
| |
| static struct utf8_validate_test utf8_validate_test62 = { |
| .utf8 = utf8_62, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_62, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_63[] = { 0xed, 0xad, 0xbf }; |
| static const wchar_t ucs4_63[] = { 0xdb7f }; |
| |
| static struct utf8_validate_test utf8_validate_test63 = { |
| .utf8 = utf8_63, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_63, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_64[] = { 0xed, 0xae, 0x80 }; |
| static const wchar_t ucs4_64[] = { 0xdb80 }; |
| |
| static struct utf8_validate_test utf8_validate_test64 = { |
| .utf8 = utf8_64, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_64, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_65[] = { 0xed, 0xaf, 0xbf }; |
| static const wchar_t ucs4_65[] = { 0xdbff }; |
| |
| static struct utf8_validate_test utf8_validate_test65 = { |
| .utf8 = utf8_65, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_65, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_66[] = { 0xed, 0xb0, 0x80 }; |
| static const wchar_t ucs4_66[] = { 0xdc00 }; |
| |
| static struct utf8_validate_test utf8_validate_test66 = { |
| .utf8 = utf8_66, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_66, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_67[] = { 0xed, 0xbe, 0x80 }; |
| static const wchar_t ucs4_67[] = { 0xdf80 }; |
| |
| static struct utf8_validate_test utf8_validate_test67 = { |
| .utf8 = utf8_67, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_67, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_68[] = { 0xed, 0xbf, 0xbf }; |
| static const wchar_t ucs4_68[] = { 0xdfff }; |
| |
| static struct utf8_validate_test utf8_validate_test68 = { |
| .utf8 = utf8_68, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_68, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_69[] = { 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80 }; |
| static const wchar_t ucs4_69[] = { 0xd800, 0xdc00 }; |
| |
| static struct utf8_validate_test utf8_validate_test69 = { |
| .utf8 = utf8_69, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_69, |
| .ucs4_len = 2, |
| }; |
| |
| static const char utf8_70[] = { 0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf }; |
| static const wchar_t ucs4_70[] = { 0xd800, 0xdfff }; |
| |
| static struct utf8_validate_test utf8_validate_test70 = { |
| .utf8 = utf8_70, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_70, |
| .ucs4_len = 2, |
| }; |
| |
| static const char utf8_71[] = { 0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80 }; |
| static const wchar_t ucs4_71[] = { 0xdb7f, 0xdc00 }; |
| |
| static struct utf8_validate_test utf8_validate_test71 = { |
| .utf8 = utf8_71, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_71, |
| .ucs4_len = 2, |
| }; |
| |
| static const char utf8_72[] = { 0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf }; |
| static const wchar_t ucs4_72[] = { 0xdb7f, 0xdfff }; |
| |
| static struct utf8_validate_test utf8_validate_test72 = { |
| .utf8 = utf8_72, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_72, |
| .ucs4_len = 2, |
| }; |
| |
| static const char utf8_73[] = { 0xed, 0xae, 0x80, 0xed, 0xb0, 0x80 }; |
| static const wchar_t ucs4_73[] = { 0xdb80, 0xdc00 }; |
| |
| static struct utf8_validate_test utf8_validate_test73 = { |
| .utf8 = utf8_73, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_73, |
| .ucs4_len = 2, |
| }; |
| |
| static const char utf8_74[] = { 0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf }; |
| static const wchar_t ucs4_74[] = { 0xdb80, 0xdfff }; |
| |
| static struct utf8_validate_test utf8_validate_test74 = { |
| .utf8 = utf8_74, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_74, |
| .ucs4_len = 2, |
| }; |
| |
| static const char utf8_75[] = { 0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80 }; |
| static const wchar_t ucs4_75[] = { 0xdbff, 0xdc00 }; |
| |
| static struct utf8_validate_test utf8_validate_test75 = { |
| .utf8 = utf8_75, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_75, |
| .ucs4_len = 2, |
| }; |
| |
| static const char utf8_76[] = { 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf }; |
| static const wchar_t ucs4_76[] = { 0xdbff, 0xdfff }; |
| |
| static struct utf8_validate_test utf8_validate_test76 = { |
| .utf8 = utf8_76, |
| .utf8_len = 6, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_76, |
| .ucs4_len = 2, |
| }; |
| |
| static const char utf8_77[] = { 0xef, 0xbf, 0xbe }; |
| static const wchar_t ucs4_77[] = { 0xfffe }; |
| |
| static struct utf8_validate_test utf8_validate_test77 = { |
| .utf8 = utf8_77, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_77, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_78[] = { 0xef, 0xbf, 0xbf }; |
| static const wchar_t ucs4_78[] = { 0xffff }; |
| |
| static struct utf8_validate_test utf8_validate_test78 = { |
| .utf8 = utf8_78, |
| .utf8_len = 3, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_78, |
| .ucs4_len = 1, |
| }; |
| |
| static const char utf8_79[] = { |
| 0x41, 0xf0, 0x90, 0x80, 0x80, 0x42, 0xf4, 0x8f, |
| 0xbf, 0xbd, 0x43 }; |
| static const wchar_t ucs4_79[] = { 0x41, 0x00010000, 0x42, 0x10fffd, 0x43 }; |
| |
| static struct utf8_validate_test utf8_validate_test79 = { |
| .utf8 = utf8_79, |
| .utf8_len = 11, |
| .type = UTF8_VALIDATE_TYPE_VALID, |
| .ucs4 = ucs4_79, |
| .ucs4_len = 5, |
| }; |
| |
| static const char utf8_80[] = { 0xdf, 0x65 }; |
| static const wchar_t ucs4_80[] = { 0xffff }; |
| |
| static struct utf8_validate_test utf8_validate_test80 = { |
| .utf8 = utf8_80, |
| .utf8_len = 2, |
| .type = UTF8_VALIDATE_TYPE_NOTUNICODE, |
| .ucs4 = ucs4_80, |
| .ucs4_len = 1, |
| }; |
| |
| static void test_utf8_codepoint(const struct utf8_validate_test *test) |
| { |
| unsigned int i, pos; |
| int ret; |
| wchar_t val; |
| |
| for (i = 0, pos = 0; i < test->ucs4_len; ++i) { |
| ret = l_utf8_get_codepoint(test->utf8 + pos, |
| test->utf8_len - pos, &val); |
| assert(ret > 0 && val == test->ucs4[i]); |
| pos += ret; |
| } |
| } |
| |
| static void test_utf8_validate(const void *test_data) |
| { |
| const struct utf8_validate_test *test = test_data; |
| const char *end; |
| bool res; |
| |
| res = l_utf8_validate(test->utf8, test->utf8_len, &end); |
| |
| if (test->type == UTF8_VALIDATE_TYPE_VALID) |
| assert(res == true); |
| else |
| assert(res == false); |
| |
| if (test->type == UTF8_VALIDATE_TYPE_VALID && test->ucs4_len) { |
| test_utf8_codepoint(test); |
| } |
| } |
| |
| struct utf8_strlen_test { |
| const char *utf8; |
| size_t utf8_len; |
| }; |
| |
| static struct utf8_strlen_test utf8_strlen_test1 = { |
| .utf8 = "abc\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", |
| .utf8_len = 8, |
| }; |
| |
| static void test_utf8_strlen(const void *test_data) |
| { |
| const struct utf8_strlen_test *test = test_data; |
| size_t len; |
| |
| len = l_utf8_strlen(test->utf8); |
| assert(len == test->utf8_len); |
| } |
| |
| struct utf8_from_utf16_test { |
| uint16_t utf16[64]; |
| size_t utf16_size; |
| const char *utf8; |
| }; |
| |
| static struct utf8_from_utf16_test utf8_from_utf16_test1 = { |
| .utf16 = { 0x61, 0x62, 0x63, 0x00 }, |
| .utf16_size = 8, |
| .utf8 = "abc", |
| }; |
| |
| static struct utf8_from_utf16_test utf8_from_utf16_test2 = { |
| .utf16 = { 0x03b1, 0x03b2, 0x03b3, 0x00 }, |
| .utf16_size = 8, |
| .utf8 = "\316\261\316\262\316\263", |
| }; |
| |
| static struct utf8_from_utf16_test utf8_from_utf16_test3 = { |
| .utf16 = { 0x61, 0x62, 0xd801, 0x00 }, |
| .utf16_size = 8, |
| }; |
| |
| static struct utf8_from_utf16_test utf8_from_utf16_test4 = { |
| .utf16 = { 0x61, 0x62, 0xdc01, 0x00 }, |
| .utf16_size = 8, |
| }; |
| |
| static void test_utf8_from_utf16(const void *test_data) |
| { |
| const struct utf8_from_utf16_test *test = test_data; |
| char *utf8; |
| |
| utf8 = l_utf8_from_utf16(test->utf16, test->utf16_size); |
| |
| if (test->utf8) { |
| assert(utf8); |
| assert(!strcmp(utf8, test->utf8)); |
| l_free(utf8); |
| } else |
| assert(!utf8); |
| } |
| |
| static void test_utf8_to_utf16(const void *test_data) |
| { |
| const struct utf8_from_utf16_test *test = test_data; |
| void *utf16; |
| size_t size; |
| |
| utf16 = l_utf8_to_utf16(test->utf8, &size); |
| assert(utf16); |
| assert(size == test->utf16_size); |
| assert(!memcmp(utf16, test->utf16, size)); |
| |
| l_free(utf16); |
| } |
| |
| int main(int argc, char *argv[]) |
| { |
| l_test_init(&argc, &argv); |
| |
| l_test_add("Validate UTF 1", test_utf8_validate, |
| &utf8_validate_test1); |
| l_test_add("Validate UTF 2", test_utf8_validate, |
| &utf8_validate_test2); |
| l_test_add("Validate UTF 3", test_utf8_validate, |
| &utf8_validate_test3); |
| l_test_add("Validate UTF 4", test_utf8_validate, |
| &utf8_validate_test4); |
| l_test_add("Validate UTF 5", test_utf8_validate, |
| &utf8_validate_test5); |
| l_test_add("Validate UTF 6", test_utf8_validate, |
| &utf8_validate_test6); |
| l_test_add("Validate UTF 7", test_utf8_validate, |
| &utf8_validate_test7); |
| l_test_add("Validate UTF 8", test_utf8_validate, |
| &utf8_validate_test8); |
| l_test_add("Validate UTF 9", test_utf8_validate, |
| &utf8_validate_test9); |
| l_test_add("Validate UTF 10", test_utf8_validate, |
| &utf8_validate_test10); |
| l_test_add("Validate UTF 11", test_utf8_validate, |
| &utf8_validate_test11); |
| l_test_add("Validate UTF 12", test_utf8_validate, |
| &utf8_validate_test12); |
| l_test_add("Validate UTF 13", test_utf8_validate, |
| &utf8_validate_test13); |
| l_test_add("Validate UTF 14", test_utf8_validate, |
| &utf8_validate_test14); |
| l_test_add("Validate UTF 15", test_utf8_validate, |
| &utf8_validate_test15); |
| l_test_add("Validate UTF 16", test_utf8_validate, |
| &utf8_validate_test16); |
| l_test_add("Validate UTF 17", test_utf8_validate, |
| &utf8_validate_test17); |
| l_test_add("Validate UTF 18", test_utf8_validate, |
| &utf8_validate_test18); |
| l_test_add("Validate UTF 19", test_utf8_validate, |
| &utf8_validate_test19); |
| l_test_add("Validate UTF 20", test_utf8_validate, |
| &utf8_validate_test20); |
| l_test_add("Validate UTF 21", test_utf8_validate, |
| &utf8_validate_test21); |
| l_test_add("Validate UTF 22", test_utf8_validate, |
| &utf8_validate_test22); |
| l_test_add("Validate UTF 23", test_utf8_validate, |
| &utf8_validate_test23); |
| l_test_add("Validate UTF 24", test_utf8_validate, |
| &utf8_validate_test24); |
| l_test_add("Validate UTF 25", test_utf8_validate, |
| &utf8_validate_test25); |
| l_test_add("Validate UTF 26", test_utf8_validate, |
| &utf8_validate_test26); |
| l_test_add("Validate UTF 27", test_utf8_validate, |
| &utf8_validate_test27); |
| l_test_add("Validate UTF 28", test_utf8_validate, |
| &utf8_validate_test28); |
| l_test_add("Validate UTF 29", test_utf8_validate, |
| &utf8_validate_test29); |
| l_test_add("Validate UTF 30", test_utf8_validate, |
| &utf8_validate_test30); |
| l_test_add("Validate UTF 31", test_utf8_validate, |
| &utf8_validate_test31); |
| l_test_add("Validate UTF 32", test_utf8_validate, |
| &utf8_validate_test32); |
| l_test_add("Validate UTF 33", test_utf8_validate, |
| &utf8_validate_test33); |
| l_test_add("Validate UTF 34", test_utf8_validate, |
| &utf8_validate_test34); |
| l_test_add("Validate UTF 35", test_utf8_validate, |
| &utf8_validate_test35); |
| l_test_add("Validate UTF 36", test_utf8_validate, |
| &utf8_validate_test36); |
| l_test_add("Validate UTF 37", test_utf8_validate, |
| &utf8_validate_test37); |
| l_test_add("Validate UTF 38", test_utf8_validate, |
| &utf8_validate_test38); |
| l_test_add("Validate UTF 39", test_utf8_validate, |
| &utf8_validate_test39); |
| l_test_add("Validate UTF 40", test_utf8_validate, |
| &utf8_validate_test40); |
| l_test_add("Validate UTF 41", test_utf8_validate, |
| &utf8_validate_test41); |
| l_test_add("Validate UTF 42", test_utf8_validate, |
| &utf8_validate_test42); |
| l_test_add("Validate UTF 43", test_utf8_validate, |
| &utf8_validate_test43); |
| l_test_add("Validate UTF 44", test_utf8_validate, |
| &utf8_validate_test44); |
| l_test_add("Validate UTF 45", test_utf8_validate, |
| &utf8_validate_test45); |
| l_test_add("Validate UTF 46", test_utf8_validate, |
| &utf8_validate_test46); |
| l_test_add("Validate UTF 47", test_utf8_validate, |
| &utf8_validate_test47); |
| l_test_add("Validate UTF 48", test_utf8_validate, |
| &utf8_validate_test48); |
| l_test_add("Validate UTF 49", test_utf8_validate, |
| &utf8_validate_test49); |
| l_test_add("Validate UTF 50", test_utf8_validate, |
| &utf8_validate_test50); |
| l_test_add("Validate UTF 51", test_utf8_validate, |
| &utf8_validate_test51); |
| l_test_add("Validate UTF 52", test_utf8_validate, |
| &utf8_validate_test52); |
| l_test_add("Validate UTF 53", test_utf8_validate, |
| &utf8_validate_test53); |
| l_test_add("Validate UTF 54", test_utf8_validate, |
| &utf8_validate_test54); |
| l_test_add("Validate UTF 55", test_utf8_validate, |
| &utf8_validate_test55); |
| l_test_add("Validate UTF 56", test_utf8_validate, |
| &utf8_validate_test56); |
| l_test_add("Validate UTF 57", test_utf8_validate, |
| &utf8_validate_test57); |
| l_test_add("Validate UTF 58", test_utf8_validate, |
| &utf8_validate_test58); |
| l_test_add("Validate UTF 59", test_utf8_validate, |
| &utf8_validate_test59); |
| l_test_add("Validate UTF 60", test_utf8_validate, |
| &utf8_validate_test60); |
| l_test_add("Validate UTF 61", test_utf8_validate, |
| &utf8_validate_test61); |
| l_test_add("Validate UTF 62", test_utf8_validate, |
| &utf8_validate_test62); |
| l_test_add("Validate UTF 63", test_utf8_validate, |
| &utf8_validate_test63); |
| l_test_add("Validate UTF 64", test_utf8_validate, |
| &utf8_validate_test64); |
| l_test_add("Validate UTF 65", test_utf8_validate, |
| &utf8_validate_test65); |
| l_test_add("Validate UTF 66", test_utf8_validate, |
| &utf8_validate_test66); |
| l_test_add("Validate UTF 67", test_utf8_validate, |
| &utf8_validate_test67); |
| l_test_add("Validate UTF 68", test_utf8_validate, |
| &utf8_validate_test68); |
| l_test_add("Validate UTF 69", test_utf8_validate, |
| &utf8_validate_test69); |
| l_test_add("Validate UTF 70", test_utf8_validate, |
| &utf8_validate_test70); |
| l_test_add("Validate UTF 71", test_utf8_validate, |
| &utf8_validate_test71); |
| l_test_add("Validate UTF 72", test_utf8_validate, |
| &utf8_validate_test72); |
| l_test_add("Validate UTF 73", test_utf8_validate, |
| &utf8_validate_test73); |
| l_test_add("Validate UTF 74", test_utf8_validate, |
| &utf8_validate_test74); |
| l_test_add("Validate UTF 75", test_utf8_validate, |
| &utf8_validate_test75); |
| l_test_add("Validate UTF 76", test_utf8_validate, |
| &utf8_validate_test76); |
| l_test_add("Validate UTF 77", test_utf8_validate, |
| &utf8_validate_test77); |
| l_test_add("Validate UTF 78", test_utf8_validate, |
| &utf8_validate_test78); |
| l_test_add("Validate UTF 79", test_utf8_validate, |
| &utf8_validate_test79); |
| l_test_add("Validate UTF 80", test_utf8_validate, |
| &utf8_validate_test80); |
| |
| l_test_add("Strlen UTF 1", test_utf8_strlen, |
| &utf8_strlen_test1); |
| |
| l_test_add("utf8_from_utf16 1", test_utf8_from_utf16, |
| &utf8_from_utf16_test1); |
| l_test_add("utf8_from_utf16 2", test_utf8_from_utf16, |
| &utf8_from_utf16_test2); |
| l_test_add("utf8_from_utf16 3", test_utf8_from_utf16, |
| &utf8_from_utf16_test3); |
| l_test_add("utf8_from_utf16 4", test_utf8_from_utf16, |
| &utf8_from_utf16_test4); |
| |
| l_test_add("utf8_to_utf16 1", test_utf8_to_utf16, |
| &utf8_from_utf16_test1); |
| l_test_add("utf8_to_utf16 2", test_utf8_to_utf16, |
| &utf8_from_utf16_test2); |
| |
| return l_test_run(); |
| } |