// Copyright (C) 2020-2023 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the // terms of the GNU General Public License as published by the // Free Software Foundation; either version 3, or (at your option) // any later version. // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // You should have received a copy of the GNU General Public License along // with this library; see the file COPYING3. If not see // . #include #include #include #include struct test_offsets_ok { size_t in_size, out_size; }; struct test_offsets_partial { size_t in_size, out_size, expected_in_next, expected_out_next; }; template struct test_offsets_error { size_t in_size, out_size, expected_in_next, expected_out_next; CharT replace_char; size_t replace_pos; }; template auto constexpr array_size (const T (&)[N]) -> size_t { return N; } template void utf8_to_utf32_in_ok (const std::codecvt &cvt) { using namespace std; // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP const char in[] = "bш\uAAAA\U0010AAAA"; const char32_t exp_literal[] = U"bш\uAAAA\U0010AAAA"; CharT exp[array_size (exp_literal)] = {}; std::copy (begin (exp_literal), end (exp_literal), begin (exp)); static_assert (array_size (in) == 11, ""); static_assert (array_size (exp_literal) == 5, ""); static_assert (array_size (exp) == 5, ""); VERIFY (char_traits::length (in) == 10); VERIFY (char_traits::length (exp_literal) == 4); VERIFY (char_traits::length (exp) == 4); test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 4}}; for (auto t : offsets) { CharT out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); auto state = mbstate_t{}; auto in_next = (const char *) nullptr; auto out_next = (CharT *) nullptr; auto res = codecvt_base::result (); res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.ok); VERIFY (in_next == in + t.in_size); VERIFY (out_next == out + t.out_size); VERIFY (char_traits::compare (out, exp, t.out_size) == 0); if (t.out_size < array_size (out)) VERIFY (out[t.out_size] == 0); } for (auto t : offsets) { CharT out[array_size (exp)] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); auto state = mbstate_t{}; auto in_next = (const char *) nullptr; auto out_next = (CharT *) nullptr; auto res = codecvt_base::result (); res = cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next); VERIFY (res == cvt.ok); VERIFY (in_next == in + t.in_size); VERIFY (out_next == out + t.out_size); VERIFY (char_traits::compare (out, exp, t.out_size) == 0); if (t.out_size < array_size (out)) VERIFY (out[t.out_size] == 0); } } template void utf8_to_utf32_in_partial (const std::codecvt &cvt) { using namespace std; // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP const char in[] = "bш\uAAAA\U0010AAAA"; const char32_t exp_literal[] = U"bш\uAAAA\U0010AAAA"; CharT exp[array_size (exp_literal)] = {}; std::copy (begin (exp_literal), end (exp_literal), begin (exp)); static_assert (array_size (in) == 11, ""); static_assert (array_size (exp_literal) == 5, ""); static_assert (array_size (exp) == 5, ""); VERIFY (char_traits::length (in) == 10); VERIFY (char_traits::length (exp_literal) == 4); VERIFY (char_traits::length (exp) == 4); test_offsets_partial offsets[] = { {1, 0, 0, 0}, // no space for first CP {3, 1, 1, 1}, // no space for second CP {2, 2, 1, 1}, // incomplete second CP {2, 1, 1, 1}, // incomplete second CP, and no space for it {6, 2, 3, 2}, // no space for third CP {4, 3, 3, 2}, // incomplete third CP {5, 3, 3, 2}, // incomplete third CP {4, 2, 3, 2}, // incomplete third CP, and no space for it {5, 2, 3, 2}, // incomplete third CP, and no space for it {10, 3, 6, 3}, // no space for fourth CP {7, 4, 6, 3}, // incomplete fourth CP {8, 4, 6, 3}, // incomplete fourth CP {9, 4, 6, 3}, // incomplete fourth CP {7, 3, 6, 3}, // incomplete fourth CP, and no space for it {8, 3, 6, 3}, // incomplete fourth CP, and no space for it {9, 3, 6, 3}, // incomplete fourth CP, and no space for it }; for (auto t : offsets) { CharT out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); VERIFY (t.expected_in_next <= t.in_size); VERIFY (t.expected_out_next <= t.out_size); auto state = mbstate_t{}; auto in_next = (const char *) nullptr; auto out_next = (CharT *) nullptr; auto res = codecvt_base::result (); res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.partial); VERIFY (in_next == in + t.expected_in_next); VERIFY (out_next == out + t.expected_out_next); VERIFY (char_traits::compare (out, exp, t.expected_out_next) == 0); if (t.expected_out_next < array_size (out)) VERIFY (out[t.expected_out_next] == 0); } } template void utf8_to_utf32_in_error (const std::codecvt &cvt) { using namespace std; // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP const char valid_in[] = "bш\uAAAA\U0010AAAA"; const char32_t exp_literal[] = U"bш\uAAAA\U0010AAAA"; CharT exp[array_size (exp_literal)] = {}; std::copy (begin (exp_literal), end (exp_literal), begin (exp)); static_assert (array_size (valid_in) == 11, ""); static_assert (array_size (exp_literal) == 5, ""); static_assert (array_size (exp) == 5, ""); VERIFY (char_traits::length (valid_in) == 10); VERIFY (char_traits::length (exp_literal) == 4); VERIFY (char_traits::length (exp) == 4); test_offsets_error offsets[] = { // replace leading byte with invalid byte {1, 4, 0, 0, '\xFF', 0}, {3, 4, 1, 1, '\xFF', 1}, {6, 4, 3, 2, '\xFF', 3}, {10, 4, 6, 3, '\xFF', 6}, // replace first trailing byte with ASCII byte {3, 4, 1, 1, 'z', 2}, {6, 4, 3, 2, 'z', 4}, {10, 4, 6, 3, 'z', 7}, // replace first trailing byte with invalid byte {3, 4, 1, 1, '\xFF', 2}, {6, 4, 3, 2, '\xFF', 4}, {10, 4, 6, 3, '\xFF', 7}, // replace second trailing byte with ASCII byte {6, 4, 3, 2, 'z', 5}, {10, 4, 6, 3, 'z', 8}, // replace second trailing byte with invalid byte {6, 4, 3, 2, '\xFF', 5}, {10, 4, 6, 3, '\xFF', 8}, // replace third trailing byte {10, 4, 6, 3, 'z', 9}, {10, 4, 6, 3, '\xFF', 9}, // replace first trailing byte with ASCII byte, also incomplete at end {5, 4, 3, 2, 'z', 4}, {8, 4, 6, 3, 'z', 7}, {9, 4, 6, 3, 'z', 7}, // replace first trailing byte with invalid byte, also incomplete at end {5, 4, 3, 2, '\xFF', 4}, {8, 4, 6, 3, '\xFF', 7}, {9, 4, 6, 3, '\xFF', 7}, // replace second trailing byte with ASCII byte, also incomplete at end {9, 4, 6, 3, 'z', 8}, // replace second trailing byte with invalid byte, also incomplete at end {9, 4, 6, 3, '\xFF', 8}, }; for (auto t : offsets) { char in[array_size (valid_in)] = {}; CharT out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); VERIFY (t.expected_in_next <= t.in_size); VERIFY (t.expected_out_next <= t.out_size); char_traits::copy (in, valid_in, array_size (valid_in)); in[t.replace_pos] = t.replace_char; auto state = mbstate_t{}; auto in_next = (const char *) nullptr; auto out_next = (CharT *) nullptr; auto res = codecvt_base::result (); res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.error); VERIFY (in_next == in + t.expected_in_next); VERIFY (out_next == out + t.expected_out_next); VERIFY (char_traits::compare (out, exp, t.expected_out_next) == 0); if (t.expected_out_next < array_size (out)) VERIFY (out[t.expected_out_next] == 0); } } template void utf8_to_utf32_in (const std::codecvt &cvt) { utf8_to_utf32_in_ok (cvt); utf8_to_utf32_in_partial (cvt); utf8_to_utf32_in_error (cvt); } template void utf32_to_utf8_out_ok (const std::codecvt &cvt) { using namespace std; // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP const char32_t in_literal[] = U"bш\uAAAA\U0010AAAA"; const char exp[] = "bш\uAAAA\U0010AAAA"; CharT in[array_size (in_literal)] = {}; copy (begin (in_literal), end (in_literal), begin (in)); static_assert (array_size (in_literal) == 5, ""); static_assert (array_size (in) == 5, ""); static_assert (array_size (exp) == 11, ""); VERIFY (char_traits::length (in_literal) == 4); VERIFY (char_traits::length (in) == 4); VERIFY (char_traits::length (exp) == 10); const test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {4, 10}}; for (auto t : offsets) { char out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); auto state = mbstate_t{}; auto in_next = (const CharT *) nullptr; auto out_next = (char *) nullptr; auto res = codecvt_base::result (); res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.ok); VERIFY (in_next == in + t.in_size); VERIFY (out_next == out + t.out_size); VERIFY (char_traits::compare (out, exp, t.out_size) == 0); if (t.out_size < array_size (out)) VERIFY (out[t.out_size] == 0); } } template void utf32_to_utf8_out_partial (const std::codecvt &cvt) { using namespace std; // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP const char32_t in_literal[] = U"bш\uAAAA\U0010AAAA"; const char exp[] = "bш\uAAAA\U0010AAAA"; CharT in[array_size (in_literal)] = {}; copy (begin (in_literal), end (in_literal), begin (in)); static_assert (array_size (in_literal) == 5, ""); static_assert (array_size (in) == 5, ""); static_assert (array_size (exp) == 11, ""); VERIFY (char_traits::length (in_literal) == 4); VERIFY (char_traits::length (in) == 4); VERIFY (char_traits::length (exp) == 10); const test_offsets_partial offsets[] = { {1, 0, 0, 0}, // no space for first CP {2, 1, 1, 1}, // no space for second CP {2, 2, 1, 1}, // no space for second CP {3, 3, 2, 3}, // no space for third CP {3, 4, 2, 3}, // no space for third CP {3, 5, 2, 3}, // no space for third CP {4, 6, 3, 6}, // no space for fourth CP {4, 7, 3, 6}, // no space for fourth CP {4, 8, 3, 6}, // no space for fourth CP {4, 9, 3, 6}, // no space for fourth CP }; for (auto t : offsets) { char out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); VERIFY (t.expected_in_next <= t.in_size); VERIFY (t.expected_out_next <= t.out_size); auto state = mbstate_t{}; auto in_next = (const CharT *) nullptr; auto out_next = (char *) nullptr; auto res = codecvt_base::result (); res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.partial); VERIFY (in_next == in + t.expected_in_next); VERIFY (out_next == out + t.expected_out_next); VERIFY (char_traits::compare (out, exp, t.expected_out_next) == 0); if (t.expected_out_next < array_size (out)) VERIFY (out[t.expected_out_next] == 0); } } template void utf32_to_utf8_out_error (const std::codecvt &cvt) { using namespace std; const char32_t valid_in[] = U"bш\uAAAA\U0010AAAA"; const char exp[] = "bш\uAAAA\U0010AAAA"; static_assert (array_size (valid_in) == 5, ""); static_assert (array_size (exp) == 11, ""); VERIFY (char_traits::length (valid_in) == 4); VERIFY (char_traits::length (exp) == 10); test_offsets_error offsets[] = {{4, 10, 0, 0, 0x00110000, 0}, {4, 10, 1, 1, 0x00110000, 1}, {4, 10, 2, 3, 0x00110000, 2}, {4, 10, 3, 6, 0x00110000, 3}}; for (auto t : offsets) { CharT in[array_size (valid_in)] = {}; char out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); VERIFY (t.expected_in_next <= t.in_size); VERIFY (t.expected_out_next <= t.out_size); copy (begin (valid_in), end (valid_in), begin (in)); in[t.replace_pos] = t.replace_char; auto state = mbstate_t{}; auto in_next = (const CharT *) nullptr; auto out_next = (char *) nullptr; auto res = codecvt_base::result (); res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.error); VERIFY (in_next == in + t.expected_in_next); VERIFY (out_next == out + t.expected_out_next); VERIFY (char_traits::compare (out, exp, t.expected_out_next) == 0); if (t.expected_out_next < array_size (out)) VERIFY (out[t.expected_out_next] == 0); } } template void utf32_to_utf8_out (const std::codecvt &cvt) { utf32_to_utf8_out_ok (cvt); utf32_to_utf8_out_partial (cvt); utf32_to_utf8_out_error (cvt); } template void test_utf8_utf32_codecvts (const std::codecvt &cvt) { utf8_to_utf32_in (cvt); utf32_to_utf8_out (cvt); } template void utf8_to_utf16_in_ok (const std::codecvt &cvt) { using namespace std; // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP const char in[] = "bш\uAAAA\U0010AAAA"; const char16_t exp_literal[] = u"bш\uAAAA\U0010AAAA"; CharT exp[array_size (exp_literal)] = {}; copy (begin (exp_literal), end (exp_literal), begin (exp)); static_assert (array_size (in) == 11, ""); static_assert (array_size (exp_literal) == 6, ""); static_assert (array_size (exp) == 6, ""); VERIFY (char_traits::length (in) == 10); VERIFY (char_traits::length (exp_literal) == 5); VERIFY (char_traits::length (exp) == 5); test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 5}}; for (auto t : offsets) { CharT out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); auto state = mbstate_t{}; auto in_next = (const char *) nullptr; auto out_next = (CharT *) nullptr; auto res = codecvt_base::result (); res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.ok); VERIFY (in_next == in + t.in_size); VERIFY (out_next == out + t.out_size); VERIFY (char_traits::compare (out, exp, t.out_size) == 0); if (t.out_size < array_size (out)) VERIFY (out[t.out_size] == 0); } for (auto t : offsets) { CharT out[array_size (exp)] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); auto state = mbstate_t{}; auto in_next = (const char *) nullptr; auto out_next = (CharT *) nullptr; auto res = codecvt_base::result (); res = cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next); VERIFY (res == cvt.ok); VERIFY (in_next == in + t.in_size); VERIFY (out_next == out + t.out_size); VERIFY (char_traits::compare (out, exp, t.out_size) == 0); if (t.out_size < array_size (out)) VERIFY (out[t.out_size] == 0); } } template void utf8_to_utf16_in_partial (const std::codecvt &cvt) { using namespace std; // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP const char in[] = "bш\uAAAA\U0010AAAA"; const char16_t exp_literal[] = u"bш\uAAAA\U0010AAAA"; CharT exp[array_size (exp_literal)] = {}; copy (begin (exp_literal), end (exp_literal), begin (exp)); static_assert (array_size (in) == 11, ""); static_assert (array_size (exp_literal) == 6, ""); static_assert (array_size (exp) == 6, ""); VERIFY (char_traits::length (in) == 10); VERIFY (char_traits::length (exp_literal) == 5); VERIFY (char_traits::length (exp) == 5); test_offsets_partial offsets[] = { {1, 0, 0, 0}, // no space for first CP {3, 1, 1, 1}, // no space for second CP {2, 2, 1, 1}, // incomplete second CP {2, 1, 1, 1}, // incomplete second CP, and no space for it {6, 2, 3, 2}, // no space for third CP {4, 3, 3, 2}, // incomplete third CP {5, 3, 3, 2}, // incomplete third CP {4, 2, 3, 2}, // incomplete third CP, and no space for it {5, 2, 3, 2}, // incomplete third CP, and no space for it {10, 3, 6, 3}, // no space for fourth CP {10, 4, 6, 3}, // no space for fourth CP {7, 5, 6, 3}, // incomplete fourth CP {8, 5, 6, 3}, // incomplete fourth CP {9, 5, 6, 3}, // incomplete fourth CP {7, 3, 6, 3}, // incomplete fourth CP, and no space for it {8, 3, 6, 3}, // incomplete fourth CP, and no space for it {9, 3, 6, 3}, // incomplete fourth CP, and no space for it {7, 4, 6, 3}, // incomplete fourth CP, and no space for it {8, 4, 6, 3}, // incomplete fourth CP, and no space for it {9, 4, 6, 3}, // incomplete fourth CP, and no space for it }; for (auto t : offsets) { CharT out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); VERIFY (t.expected_in_next <= t.in_size); VERIFY (t.expected_out_next <= t.out_size); auto state = mbstate_t{}; auto in_next = (const char *) nullptr; auto out_next = (CharT *) nullptr; auto res = codecvt_base::result (); res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.partial); VERIFY (in_next == in + t.expected_in_next); VERIFY (out_next == out + t.expected_out_next); VERIFY (char_traits::compare (out, exp, t.expected_out_next) == 0); if (t.expected_out_next < array_size (out)) VERIFY (out[t.expected_out_next] == 0); } } template void utf8_to_utf16_in_error (const std::codecvt &cvt) { using namespace std; const char valid_in[] = "bш\uAAAA\U0010AAAA"; const char16_t exp_literal[] = u"bш\uAAAA\U0010AAAA"; CharT exp[array_size (exp_literal)] = {}; copy (begin (exp_literal), end (exp_literal), begin (exp)); static_assert (array_size (valid_in) == 11, ""); static_assert (array_size (exp_literal) == 6, ""); static_assert (array_size (exp) == 6, ""); VERIFY (char_traits::length (valid_in) == 10); VERIFY (char_traits::length (exp_literal) == 5); VERIFY (char_traits::length (exp) == 5); test_offsets_error offsets[] = { // replace leading byte with invalid byte {1, 5, 0, 0, '\xFF', 0}, {3, 5, 1, 1, '\xFF', 1}, {6, 5, 3, 2, '\xFF', 3}, {10, 5, 6, 3, '\xFF', 6}, // replace first trailing byte with ASCII byte {3, 5, 1, 1, 'z', 2}, {6, 5, 3, 2, 'z', 4}, {10, 5, 6, 3, 'z', 7}, // replace first trailing byte with invalid byte {3, 5, 1, 1, '\xFF', 2}, {6, 5, 3, 2, '\xFF', 4}, {10, 5, 6, 3, '\xFF', 7}, // replace second trailing byte with ASCII byte {6, 5, 3, 2, 'z', 5}, {10, 5, 6, 3, 'z', 8}, // replace second trailing byte with invalid byte {6, 5, 3, 2, '\xFF', 5}, {10, 5, 6, 3, '\xFF', 8}, // replace third trailing byte {10, 5, 6, 3, 'z', 9}, {10, 5, 6, 3, '\xFF', 9}, // replace first trailing byte with ASCII byte, also incomplete at end {5, 5, 3, 2, 'z', 4}, {8, 5, 6, 3, 'z', 7}, {9, 5, 6, 3, 'z', 7}, // replace first trailing byte with invalid byte, also incomplete at end {5, 5, 3, 2, '\xFF', 4}, {8, 5, 6, 3, '\xFF', 7}, {9, 5, 6, 3, '\xFF', 7}, // replace second trailing byte with ASCII byte, also incomplete at end {9, 5, 6, 3, 'z', 8}, // replace second trailing byte with invalid byte, also incomplete at end {9, 5, 6, 3, '\xFF', 8}, }; for (auto t : offsets) { char in[array_size (valid_in)] = {}; CharT out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); VERIFY (t.expected_in_next <= t.in_size); VERIFY (t.expected_out_next <= t.out_size); char_traits::copy (in, valid_in, array_size (valid_in)); in[t.replace_pos] = t.replace_char; auto state = mbstate_t{}; auto in_next = (const char *) nullptr; auto out_next = (CharT *) nullptr; auto res = codecvt_base::result (); res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.error); VERIFY (in_next == in + t.expected_in_next); VERIFY (out_next == out + t.expected_out_next); VERIFY (char_traits::compare (out, exp, t.expected_out_next) == 0); if (t.expected_out_next < array_size (out)) VERIFY (out[t.expected_out_next] == 0); } } template void utf8_to_utf16_in (const std::codecvt &cvt) { utf8_to_utf16_in_ok (cvt); utf8_to_utf16_in_partial (cvt); utf8_to_utf16_in_error (cvt); } template void utf16_to_utf8_out_ok (const std::codecvt &cvt) { using namespace std; // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP const char16_t in_literal[] = u"bш\uAAAA\U0010AAAA"; const char exp[] = "bш\uAAAA\U0010AAAA"; CharT in[array_size (in_literal)]; copy (begin (in_literal), end (in_literal), begin (in)); static_assert (array_size (in_literal) == 6, ""); static_assert (array_size (exp) == 11, ""); static_assert (array_size (in) == 6, ""); VERIFY (char_traits::length (in_literal) == 5); VERIFY (char_traits::length (exp) == 10); VERIFY (char_traits::length (in) == 5); const test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {5, 10}}; for (auto t : offsets) { char out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); auto state = mbstate_t{}; auto in_next = (const CharT *) nullptr; auto out_next = (char *) nullptr; auto res = codecvt_base::result (); res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.ok); VERIFY (in_next == in + t.in_size); VERIFY (out_next == out + t.out_size); VERIFY (char_traits::compare (out, exp, t.out_size) == 0); if (t.out_size < array_size (out)) VERIFY (out[t.out_size] == 0); } } template void utf16_to_utf8_out_partial (const std::codecvt &cvt) { using namespace std; // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP const char16_t in_literal[] = u"bш\uAAAA\U0010AAAA"; const char exp[] = "bш\uAAAA\U0010AAAA"; CharT in[array_size (in_literal)]; copy (begin (in_literal), end (in_literal), begin (in)); static_assert (array_size (in_literal) == 6, ""); static_assert (array_size (exp) == 11, ""); static_assert (array_size (in) == 6, ""); VERIFY (char_traits::length (in_literal) == 5); VERIFY (char_traits::length (exp) == 10); VERIFY (char_traits::length (in) == 5); const test_offsets_partial offsets[] = { {1, 0, 0, 0}, // no space for first CP {2, 1, 1, 1}, // no space for second CP {2, 2, 1, 1}, // no space for second CP {3, 3, 2, 3}, // no space for third CP {3, 4, 2, 3}, // no space for third CP {3, 5, 2, 3}, // no space for third CP {5, 6, 3, 6}, // no space for fourth CP {5, 7, 3, 6}, // no space for fourth CP {5, 8, 3, 6}, // no space for fourth CP {5, 9, 3, 6}, // no space for fourth CP {4, 10, 3, 6}, // incomplete fourth CP {4, 6, 3, 6}, // incomplete fourth CP, and no space for it {4, 7, 3, 6}, // incomplete fourth CP, and no space for it {4, 8, 3, 6}, // incomplete fourth CP, and no space for it {4, 9, 3, 6}, // incomplete fourth CP, and no space for it }; for (auto t : offsets) { char out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); VERIFY (t.expected_in_next <= t.in_size); VERIFY (t.expected_out_next <= t.out_size); auto state = mbstate_t{}; auto in_next = (const CharT *) nullptr; auto out_next = (char *) nullptr; auto res = codecvt_base::result (); res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.partial); VERIFY (in_next == in + t.expected_in_next); VERIFY (out_next == out + t.expected_out_next); VERIFY (char_traits::compare (out, exp, t.expected_out_next) == 0); if (t.expected_out_next < array_size (out)) VERIFY (out[t.expected_out_next] == 0); } } template void utf16_to_utf8_out_error (const std::codecvt &cvt) { using namespace std; const char16_t valid_in[] = u"bш\uAAAA\U0010AAAA"; const char exp[] = "bш\uAAAA\U0010AAAA"; static_assert (array_size (valid_in) == 6, ""); static_assert (array_size (exp) == 11, ""); VERIFY (char_traits::length (valid_in) == 5); VERIFY (char_traits::length (exp) == 10); test_offsets_error offsets[] = { {5, 10, 0, 0, 0xD800, 0}, {5, 10, 0, 0, 0xDBFF, 0}, {5, 10, 0, 0, 0xDC00, 0}, {5, 10, 0, 0, 0xDFFF, 0}, {5, 10, 1, 1, 0xD800, 1}, {5, 10, 1, 1, 0xDBFF, 1}, {5, 10, 1, 1, 0xDC00, 1}, {5, 10, 1, 1, 0xDFFF, 1}, {5, 10, 2, 3, 0xD800, 2}, {5, 10, 2, 3, 0xDBFF, 2}, {5, 10, 2, 3, 0xDC00, 2}, {5, 10, 2, 3, 0xDFFF, 2}, // make the leading surrogate a trailing one {5, 10, 3, 6, 0xDC00, 3}, {5, 10, 3, 6, 0xDFFF, 3}, // make the trailing surrogate a leading one {5, 10, 3, 6, 0xD800, 4}, {5, 10, 3, 6, 0xDBFF, 4}, // make the trailing surrogate a BMP char {5, 10, 3, 6, u'z', 4}, }; for (auto t : offsets) { CharT in[array_size (valid_in)] = {}; char out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); VERIFY (t.expected_in_next <= t.in_size); VERIFY (t.expected_out_next <= t.out_size); copy (begin (valid_in), end (valid_in), begin (in)); in[t.replace_pos] = t.replace_char; auto state = mbstate_t{}; auto in_next = (const CharT *) nullptr; auto out_next = (char *) nullptr; auto res = codecvt_base::result (); res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.error); VERIFY (in_next == in + t.expected_in_next); VERIFY (out_next == out + t.expected_out_next); VERIFY (char_traits::compare (out, exp, t.expected_out_next) == 0); if (t.expected_out_next < array_size (out)) VERIFY (out[t.expected_out_next] == 0); } } template void utf16_to_utf8_out (const std::codecvt &cvt) { utf16_to_utf8_out_ok (cvt); utf16_to_utf8_out_partial (cvt); utf16_to_utf8_out_error (cvt); } template void test_utf8_utf16_cvts (const std::codecvt &cvt) { utf8_to_utf16_in (cvt); utf16_to_utf8_out (cvt); } template void utf8_to_ucs2_in_ok (const std::codecvt &cvt) { using namespace std; // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP const char in[] = "bш\uAAAA"; const char16_t exp_literal[] = u"bш\uAAAA"; CharT exp[array_size (exp_literal)] = {}; copy (begin (exp_literal), end (exp_literal), begin (exp)); static_assert (array_size (in) == 7, ""); static_assert (array_size (exp_literal) == 4, ""); static_assert (array_size (exp) == 4, ""); VERIFY (char_traits::length (in) == 6); VERIFY (char_traits::length (exp_literal) == 3); VERIFY (char_traits::length (exp) == 3); test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}}; for (auto t : offsets) { CharT out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); auto state = mbstate_t{}; auto in_next = (const char *) nullptr; auto out_next = (CharT *) nullptr; auto res = codecvt_base::result (); res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.ok); VERIFY (in_next == in + t.in_size); VERIFY (out_next == out + t.out_size); VERIFY (char_traits::compare (out, exp, t.out_size) == 0); if (t.out_size < array_size (out)) VERIFY (out[t.out_size] == 0); } for (auto t : offsets) { CharT out[array_size (exp)] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); auto state = mbstate_t{}; auto in_next = (const char *) nullptr; auto out_next = (CharT *) nullptr; auto res = codecvt_base::result (); res = cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next); VERIFY (res == cvt.ok); VERIFY (in_next == in + t.in_size); VERIFY (out_next == out + t.out_size); VERIFY (char_traits::compare (out, exp, t.out_size) == 0); if (t.out_size < array_size (out)) VERIFY (out[t.out_size] == 0); } } template void utf8_to_ucs2_in_partial (const std::codecvt &cvt) { using namespace std; // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP const char in[] = "bш\uAAAA"; const char16_t exp_literal[] = u"bш\uAAAA"; CharT exp[array_size (exp_literal)] = {}; copy (begin (exp_literal), end (exp_literal), begin (exp)); static_assert (array_size (in) == 7, ""); static_assert (array_size (exp_literal) == 4, ""); static_assert (array_size (exp) == 4, ""); VERIFY (char_traits::length (in) == 6); VERIFY (char_traits::length (exp_literal) == 3); VERIFY (char_traits::length (exp) == 3); test_offsets_partial offsets[] = { {1, 0, 0, 0}, // no space for first CP {3, 1, 1, 1}, // no space for second CP {2, 2, 1, 1}, // incomplete second CP {2, 1, 1, 1}, // incomplete second CP, and no space for it {6, 2, 3, 2}, // no space for third CP {4, 3, 3, 2}, // incomplete third CP {5, 3, 3, 2}, // incomplete third CP {4, 2, 3, 2}, // incomplete third CP, and no space for it {5, 2, 3, 2}, // incomplete third CP, and no space for it }; for (auto t : offsets) { CharT out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); VERIFY (t.expected_in_next <= t.in_size); VERIFY (t.expected_out_next <= t.out_size); auto state = mbstate_t{}; auto in_next = (const char *) nullptr; auto out_next = (CharT *) nullptr; auto res = codecvt_base::result (); res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.partial); VERIFY (in_next == in + t.expected_in_next); VERIFY (out_next == out + t.expected_out_next); VERIFY (char_traits::compare (out, exp, t.expected_out_next) == 0); if (t.expected_out_next < array_size (out)) VERIFY (out[t.expected_out_next] == 0); } } template void utf8_to_ucs2_in_error (const std::codecvt &cvt) { using namespace std; const char valid_in[] = "bш\uAAAA\U0010AAAA"; const char16_t exp_literal[] = u"bш\uAAAA\U0010AAAA"; CharT exp[array_size (exp_literal)] = {}; copy (begin (exp_literal), end (exp_literal), begin (exp)); static_assert (array_size (valid_in) == 11, ""); static_assert (array_size (exp_literal) == 6, ""); static_assert (array_size (exp) == 6, ""); VERIFY (char_traits::length (valid_in) == 10); VERIFY (char_traits::length (exp_literal) == 5); VERIFY (char_traits::length (exp) == 5); test_offsets_error offsets[] = { // replace leading byte with invalid byte {1, 5, 0, 0, '\xFF', 0}, {3, 5, 1, 1, '\xFF', 1}, {6, 5, 3, 2, '\xFF', 3}, {10, 5, 6, 3, '\xFF', 6}, // replace first trailing byte with ASCII byte {3, 5, 1, 1, 'z', 2}, {6, 5, 3, 2, 'z', 4}, {10, 5, 6, 3, 'z', 7}, // replace first trailing byte with invalid byte {3, 5, 1, 1, '\xFF', 2}, {6, 5, 3, 2, '\xFF', 4}, {10, 5, 6, 3, '\xFF', 7}, // replace second trailing byte with ASCII byte {6, 5, 3, 2, 'z', 5}, {10, 5, 6, 3, 'z', 8}, // replace second trailing byte with invalid byte {6, 5, 3, 2, '\xFF', 5}, {10, 5, 6, 3, '\xFF', 8}, // replace third trailing byte {10, 5, 6, 3, 'z', 9}, {10, 5, 6, 3, '\xFF', 9}, // When we see a leading byte of 4-byte CP, we should return error, no // matter if it is incomplete at the end or has errors in the trailing // bytes. // Don't replace anything, show full 4-byte CP {10, 4, 6, 3, 'b', 0}, {10, 5, 6, 3, 'b', 0}, // Don't replace anything, show incomplete 4-byte CP at the end {7, 4, 6, 3, 'b', 0}, // incomplete fourth CP {8, 4, 6, 3, 'b', 0}, // incomplete fourth CP {9, 4, 6, 3, 'b', 0}, // incomplete fourth CP {7, 5, 6, 3, 'b', 0}, // incomplete fourth CP {8, 5, 6, 3, 'b', 0}, // incomplete fourth CP {9, 5, 6, 3, 'b', 0}, // incomplete fourth CP // replace first trailing byte with ASCII byte, also incomplete at end {5, 5, 3, 2, 'z', 4}, // replace first trailing byte with invalid byte, also incomplete at end {5, 5, 3, 2, '\xFF', 4}, // replace first trailing byte with ASCII byte, also incomplete at end {8, 5, 6, 3, 'z', 7}, {9, 5, 6, 3, 'z', 7}, // replace first trailing byte with invalid byte, also incomplete at end {8, 5, 6, 3, '\xFF', 7}, {9, 5, 6, 3, '\xFF', 7}, // replace second trailing byte with ASCII byte, also incomplete at end {9, 5, 6, 3, 'z', 8}, // replace second trailing byte with invalid byte, also incomplete at end {9, 5, 6, 3, '\xFF', 8}, }; for (auto t : offsets) { char in[array_size (valid_in)] = {}; CharT out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); VERIFY (t.expected_in_next <= t.in_size); VERIFY (t.expected_out_next <= t.out_size); char_traits::copy (in, valid_in, array_size (valid_in)); in[t.replace_pos] = t.replace_char; auto state = mbstate_t{}; auto in_next = (const char *) nullptr; auto out_next = (CharT *) nullptr; auto res = codecvt_base::result (); res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.error); VERIFY (in_next == in + t.expected_in_next); VERIFY (out_next == out + t.expected_out_next); VERIFY (char_traits::compare (out, exp, t.expected_out_next) == 0); if (t.expected_out_next < array_size (out)) VERIFY (out[t.expected_out_next] == 0); } } template void utf8_to_ucs2_in (const std::codecvt &cvt) { utf8_to_ucs2_in_ok (cvt); utf8_to_ucs2_in_partial (cvt); utf8_to_ucs2_in_error (cvt); } template void ucs2_to_utf8_out_ok (const std::codecvt &cvt) { using namespace std; // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP const char16_t in_literal[] = u"bш\uAAAA"; const char exp[] = "bш\uAAAA"; CharT in[array_size (in_literal)] = {}; copy (begin (in_literal), end (in_literal), begin (in)); static_assert (array_size (in_literal) == 4, ""); static_assert (array_size (exp) == 7, ""); static_assert (array_size (in) == 4, ""); VERIFY (char_traits::length (in_literal) == 3); VERIFY (char_traits::length (exp) == 6); VERIFY (char_traits::length (in) == 3); const test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}}; for (auto t : offsets) { char out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); auto state = mbstate_t{}; auto in_next = (const CharT *) nullptr; auto out_next = (char *) nullptr; auto res = codecvt_base::result (); res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.ok); VERIFY (in_next == in + t.in_size); VERIFY (out_next == out + t.out_size); VERIFY (char_traits::compare (out, exp, t.out_size) == 0); if (t.out_size < array_size (out)) VERIFY (out[t.out_size] == 0); } } template void ucs2_to_utf8_out_partial (const std::codecvt &cvt) { using namespace std; // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP const char16_t in_literal[] = u"bш\uAAAA"; const char exp[] = "bш\uAAAA"; CharT in[array_size (in_literal)] = {}; copy (begin (in_literal), end (in_literal), begin (in)); static_assert (array_size (in_literal) == 4, ""); static_assert (array_size (exp) == 7, ""); static_assert (array_size (in) == 4, ""); VERIFY (char_traits::length (in_literal) == 3); VERIFY (char_traits::length (exp) == 6); VERIFY (char_traits::length (in) == 3); const test_offsets_partial offsets[] = { {1, 0, 0, 0}, // no space for first CP {2, 1, 1, 1}, // no space for second CP {2, 2, 1, 1}, // no space for second CP {3, 3, 2, 3}, // no space for third CP {3, 4, 2, 3}, // no space for third CP {3, 5, 2, 3}, // no space for third CP }; for (auto t : offsets) { char out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); VERIFY (t.expected_in_next <= t.in_size); VERIFY (t.expected_out_next <= t.out_size); auto state = mbstate_t{}; auto in_next = (const CharT *) nullptr; auto out_next = (char *) nullptr; auto res = codecvt_base::result (); res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.partial); VERIFY (in_next == in + t.expected_in_next); VERIFY (out_next == out + t.expected_out_next); VERIFY (char_traits::compare (out, exp, t.expected_out_next) == 0); if (t.expected_out_next < array_size (out)) VERIFY (out[t.expected_out_next] == 0); } } template void ucs2_to_utf8_out_error (const std::codecvt &cvt) { using namespace std; const char16_t valid_in[] = u"bш\uAAAA\U0010AAAA"; const char exp[] = "bш\uAAAA\U0010AAAA"; static_assert (array_size (valid_in) == 6, ""); static_assert (array_size (exp) == 11, ""); VERIFY (char_traits::length (valid_in) == 5); VERIFY (char_traits::length (exp) == 10); test_offsets_error offsets[] = { {5, 10, 0, 0, 0xD800, 0}, {5, 10, 0, 0, 0xDBFF, 0}, {5, 10, 0, 0, 0xDC00, 0}, {5, 10, 0, 0, 0xDFFF, 0}, {5, 10, 1, 1, 0xD800, 1}, {5, 10, 1, 1, 0xDBFF, 1}, {5, 10, 1, 1, 0xDC00, 1}, {5, 10, 1, 1, 0xDFFF, 1}, {5, 10, 2, 3, 0xD800, 2}, {5, 10, 2, 3, 0xDBFF, 2}, {5, 10, 2, 3, 0xDC00, 2}, {5, 10, 2, 3, 0xDFFF, 2}, // dont replace anything, just show the surrogate pair {5, 10, 3, 6, u'b', 0}, // make the leading surrogate a trailing one {5, 10, 3, 6, 0xDC00, 3}, {5, 10, 3, 6, 0xDFFF, 3}, // make the trailing surrogate a leading one {5, 10, 3, 6, 0xD800, 4}, {5, 10, 3, 6, 0xDBFF, 4}, // make the trailing surrogate a BMP char {5, 10, 3, 6, u'z', 4}, {5, 7, 3, 6, u'b', 0}, // no space for fourth CP {5, 8, 3, 6, u'b', 0}, // no space for fourth CP {5, 9, 3, 6, u'b', 0}, // no space for fourth CP {4, 10, 3, 6, u'b', 0}, // incomplete fourth CP {4, 7, 3, 6, u'b', 0}, // incomplete fourth CP, and no space for it {4, 8, 3, 6, u'b', 0}, // incomplete fourth CP, and no space for it {4, 9, 3, 6, u'b', 0}, // incomplete fourth CP, and no space for it }; for (auto t : offsets) { CharT in[array_size (valid_in)] = {}; char out[array_size (exp) - 1] = {}; VERIFY (t.in_size <= array_size (in)); VERIFY (t.out_size <= array_size (out)); VERIFY (t.expected_in_next <= t.in_size); VERIFY (t.expected_out_next <= t.out_size); copy (begin (valid_in), end (valid_in), begin (in)); in[t.replace_pos] = t.replace_char; auto state = mbstate_t{}; auto in_next = (const CharT *) nullptr; auto out_next = (char *) nullptr; auto res = codecvt_base::result (); res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size, out_next); VERIFY (res == cvt.error); VERIFY (in_next == in + t.expected_in_next); VERIFY (out_next == out + t.expected_out_next); VERIFY (char_traits::compare (out, exp, t.expected_out_next) == 0); if (t.expected_out_next < array_size (out)) VERIFY (out[t.expected_out_next] == 0); } } template void ucs2_to_utf8_out (const std::codecvt &cvt) { ucs2_to_utf8_out_ok (cvt); ucs2_to_utf8_out_partial (cvt); ucs2_to_utf8_out_error (cvt); } template void test_utf8_ucs2_cvts (const std::codecvt &cvt) { utf8_to_ucs2_in (cvt); ucs2_to_utf8_out (cvt); }