mirror of
https://github.com/cmclark00/retro-imager.git
synced 2025-05-19 08:25:21 +01:00
- Update bunlded libarchive version used on Windows/Mac - Enable requested zstd support while we are at it. Closes #211
885 lines
25 KiB
C
885 lines
25 KiB
C
/*-
|
|
* Copyright (c) 2011-2012 Michihiro NAKAJIMA
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
#include "test.h"
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <locale.h>
|
|
|
|
#define __LIBARCHIVE_TEST
|
|
#include "archive_string.h"
|
|
|
|
/*
|
|
Execute the following to rebuild the data for this program:
|
|
tail -n +36 test_archive_string_conversion.c | /bin/sh
|
|
#
|
|
# This requires http://unicode.org/Public/6.0.0/ucd/NormalizationTest.txt
|
|
#
|
|
if="NormalizationTest.txt"
|
|
if [ ! -f ${if} ]; then
|
|
echo "Not found: \"${if}\""
|
|
exit 0
|
|
fi
|
|
of=test_archive_string_conversion.txt.Z
|
|
echo "\$FreeBSD\$" > ${of}.uu
|
|
awk -F ';' '$0 ~/^[0-9A-F]+/ {printf "%s;%s\n", $2, $3}' ${if} | compress | uuencode ${of} >> ${of}.uu
|
|
exit 1
|
|
*/
|
|
|
|
static int
|
|
unicode_to_utf8(char *p, uint32_t uc)
|
|
{
|
|
char *_p = p;
|
|
|
|
/* Translate code point to UTF8 */
|
|
if (uc <= 0x7f) {
|
|
*p++ = (char)uc;
|
|
} else if (uc <= 0x7ff) {
|
|
*p++ = 0xc0 | ((uc >> 6) & 0x1f);
|
|
*p++ = 0x80 | (uc & 0x3f);
|
|
} else if (uc <= 0xffff) {
|
|
*p++ = 0xe0 | ((uc >> 12) & 0x0f);
|
|
*p++ = 0x80 | ((uc >> 6) & 0x3f);
|
|
*p++ = 0x80 | (uc & 0x3f);
|
|
} else {
|
|
*p++ = 0xf0 | ((uc >> 18) & 0x07);
|
|
*p++ = 0x80 | ((uc >> 12) & 0x3f);
|
|
*p++ = 0x80 | ((uc >> 6) & 0x3f);
|
|
*p++ = 0x80 | (uc & 0x3f);
|
|
}
|
|
return ((int)(p - _p));
|
|
}
|
|
|
|
static void
|
|
archive_be16enc(void *pp, uint16_t u)
|
|
{
|
|
unsigned char *p = (unsigned char *)pp;
|
|
|
|
p[0] = (u >> 8) & 0xff;
|
|
p[1] = u & 0xff;
|
|
}
|
|
|
|
static int
|
|
unicode_to_utf16be(char *p, uint32_t uc)
|
|
{
|
|
char *utf16 = p;
|
|
|
|
if (uc > 0xffff) {
|
|
/* We have a code point that won't fit into a
|
|
* wchar_t; convert it to a surrogate pair. */
|
|
uc -= 0x10000;
|
|
archive_be16enc(utf16, ((uc >> 10) & 0x3ff) + 0xD800);
|
|
archive_be16enc(utf16+2, (uc & 0x3ff) + 0xDC00);
|
|
return (4);
|
|
} else {
|
|
archive_be16enc(utf16, uc);
|
|
return (2);
|
|
}
|
|
}
|
|
|
|
static void
|
|
archive_le16enc(void *pp, uint16_t u)
|
|
{
|
|
unsigned char *p = (unsigned char *)pp;
|
|
|
|
p[0] = u & 0xff;
|
|
p[1] = (u >> 8) & 0xff;
|
|
}
|
|
|
|
static size_t
|
|
unicode_to_utf16le(char *p, uint32_t uc)
|
|
{
|
|
char *utf16 = p;
|
|
|
|
if (uc > 0xffff) {
|
|
/* We have a code point that won't fit into a
|
|
* wchar_t; convert it to a surrogate pair. */
|
|
uc -= 0x10000;
|
|
archive_le16enc(utf16, ((uc >> 10) & 0x3ff) + 0xD800);
|
|
archive_le16enc(utf16+2, (uc & 0x3ff) + 0xDC00);
|
|
return (4);
|
|
} else {
|
|
archive_le16enc(utf16, uc);
|
|
return (2);
|
|
}
|
|
}
|
|
|
|
static int
|
|
wc_size(void)
|
|
{
|
|
return (sizeof(wchar_t));
|
|
}
|
|
|
|
static int
|
|
unicode_to_wc(wchar_t *wp, uint32_t uc)
|
|
{
|
|
if (wc_size() == 4) {
|
|
*wp = (wchar_t)uc;
|
|
return (1);
|
|
}
|
|
if (uc > 0xffff) {
|
|
/* We have a code point that won't fit into a
|
|
* wchar_t; convert it to a surrogate pair. */
|
|
uc -= 0x10000;
|
|
*wp++ = (wchar_t)(((uc >> 10) & 0x3ff) + 0xD800);
|
|
*wp = (wchar_t)((uc & 0x3ff) + 0xDC00);
|
|
return (2);
|
|
} else {
|
|
*wp = (wchar_t)uc;
|
|
return (1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Note: U+2000 - U+2FFF, U+F900 - U+FAFF and U+2F800 - U+2FAFF are not
|
|
* converted to NFD on Mac OS.
|
|
* see also http://developer.apple.com/library/mac/#qa/qa2001/qa1173.html
|
|
*/
|
|
static int
|
|
scan_unicode_pattern(char *out, wchar_t *wout, char *u16be, char *u16le,
|
|
const char *pattern, int mac_nfd)
|
|
{
|
|
unsigned uc = 0;
|
|
const char *p = pattern;
|
|
char *op = out;
|
|
wchar_t *owp = wout;
|
|
char *op16be = u16be;
|
|
char *op16le = u16le;
|
|
int ret = 0;
|
|
|
|
for (;;) {
|
|
if (*p >= '0' && *p <= '9')
|
|
uc = (uc << 4) + (*p - '0');
|
|
else if (*p >= 'A' && *p <= 'F')
|
|
uc = (uc << 4) + (*p - 'A' + 0x0a);
|
|
else {
|
|
if (mac_nfd && op == out) {
|
|
/*
|
|
* These are not converted to NFD on Mac OS.
|
|
* U+2000 - U+2FFF
|
|
* U+F900 - U+FAFF
|
|
* U+2F800 - U+2FAFF
|
|
*/
|
|
switch (uc) {
|
|
case 0x2194: case 0x219A: case 0x219B:
|
|
case 0x21AE: case 0x21CD: case 0x21CE:
|
|
case 0x21CF: case 0x2204: case 0x2209:
|
|
case 0x220C: case 0x2224: case 0x2226:
|
|
case 0x2241: case 0x2244: case 0x2247:
|
|
case 0x2249: case 0x2260: case 0x2262:
|
|
case 0x226D: case 0x226E: case 0x226F:
|
|
case 0x2270: case 0x2271: case 0x2274:
|
|
case 0x2275: case 0x2276: case 0x2278:
|
|
case 0x2279: case 0x227A: case 0x227B:
|
|
case 0x2280: case 0x2281: case 0x2284:
|
|
case 0x2285: case 0x2288: case 0x2289:
|
|
case 0x22AC: case 0x22AD: case 0x22AE:
|
|
case 0x22AF: case 0x22E0: case 0x22E1:
|
|
case 0x22E2: case 0x22E3: case 0x22EA:
|
|
case 0x22EB: case 0x22EC: case 0x22ED:
|
|
|
|
/*
|
|
* Those code points are not converted to
|
|
* NFD on Mac OS. I do not know the reason
|
|
* because it is undocumented.
|
|
* NFC NFD
|
|
* 1109A ==> 11099 110BA
|
|
* 1109C ==> 1109B 110BA
|
|
* 110AB ==> 110A5 110BA
|
|
*/
|
|
case 0x1109A: case 0x1109C: case 0x110AB:
|
|
ret = 1;
|
|
break;
|
|
}
|
|
}
|
|
op16be += unicode_to_utf16be(op16be, uc);
|
|
op16le += unicode_to_utf16le(op16le, uc);
|
|
owp += unicode_to_wc(owp, uc);
|
|
op += unicode_to_utf8(op, uc);
|
|
if (!*p) {
|
|
*op16be++ = 0;
|
|
*op16be = 0;
|
|
*op16le++ = 0;
|
|
*op16le = 0;
|
|
*owp = L'\0';
|
|
*op = '\0';
|
|
break;
|
|
}
|
|
uc = 0;
|
|
}
|
|
p++;
|
|
}
|
|
return (ret);
|
|
}
|
|
|
|
static int
|
|
is_wc_unicode(void)
|
|
{
|
|
#if defined(_WIN32) && !defined(__CYGWIN__)
|
|
return (1);
|
|
#else
|
|
return (0);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* A conversion test that we correctly normalize UTF-8 and UTF-16BE characters.
|
|
* On Mac OS, the characters to be Form D.
|
|
* On other platforms, the characters to be Form C.
|
|
*/
|
|
static void
|
|
test_archive_string_normalization_nfc(const char *testdata)
|
|
{
|
|
struct archive *a, *a2;
|
|
struct archive_string utf8;
|
|
struct archive_mstring mstr;
|
|
struct archive_string_conv *f_sconv8, *t_sconv8;
|
|
struct archive_string_conv *f_sconv16be, *f_sconv16le;
|
|
FILE *fp;
|
|
char buff[512];
|
|
int line = 0;
|
|
int locale_is_utf8, wc_is_unicode;
|
|
int sconv_opt = SCONV_SET_OPT_NORMALIZATION_C;
|
|
|
|
locale_is_utf8 = (NULL != setlocale(LC_ALL, "en_US.UTF-8"));
|
|
wc_is_unicode = is_wc_unicode();
|
|
/* If it doesn't exist, just warn and return. */
|
|
if (!locale_is_utf8 && !wc_is_unicode) {
|
|
skipping("A test of string normalization for NFC requires "
|
|
"a suitable locale; en_US.UTF-8 not available on this "
|
|
"system");
|
|
return;
|
|
}
|
|
|
|
archive_string_init(&utf8);
|
|
memset(&mstr, 0, sizeof(mstr));
|
|
|
|
/*
|
|
* Create string conversion objects.
|
|
*/
|
|
assert((a = archive_read_new()) != NULL);
|
|
assertA(NULL != (f_sconv8 =
|
|
archive_string_conversion_from_charset(a, "UTF-8", 0)));
|
|
assertA(NULL != (f_sconv16be =
|
|
archive_string_conversion_from_charset(a, "UTF-16BE", 0)));
|
|
assertA(NULL != (f_sconv16le =
|
|
archive_string_conversion_from_charset(a, "UTF-16LE", 0)));
|
|
assert((a2 = archive_write_new()) != NULL);
|
|
assertA(NULL != (t_sconv8 =
|
|
archive_string_conversion_to_charset(a2, "UTF-8", 0)));
|
|
if (f_sconv8 == NULL || f_sconv16be == NULL || f_sconv16le == NULL ||
|
|
t_sconv8 == NULL) {
|
|
/* We cannot continue this test. */
|
|
assertEqualInt(ARCHIVE_OK, archive_read_free(a));
|
|
return;
|
|
}
|
|
archive_string_conversion_set_opt(f_sconv8, sconv_opt);
|
|
archive_string_conversion_set_opt(f_sconv16be, sconv_opt);
|
|
archive_string_conversion_set_opt(f_sconv16le, sconv_opt);
|
|
archive_string_conversion_set_opt(t_sconv8, sconv_opt);
|
|
|
|
/* Open a test pattern file. */
|
|
assert((fp = fopen(testdata, "r")) != NULL);
|
|
|
|
/*
|
|
* Read test data.
|
|
* Test data format:
|
|
* <NFC Unicode pattern> ';' <NFD Unicode pattern> '\n'
|
|
* Unicode pattern format:
|
|
* [0-9A-F]{4,5}([ ][0-9A-F]{4,5}){0,}
|
|
*/
|
|
while (fgets(buff, sizeof(buff), fp) != NULL) {
|
|
char nfc[80], nfd[80];
|
|
char utf8_nfc[80], utf8_nfd[80];
|
|
char utf16be_nfc[80], utf16be_nfd[80];
|
|
char utf16le_nfc[80], utf16le_nfd[80];
|
|
wchar_t wc_nfc[40], wc_nfd[40];
|
|
char *e, *p;
|
|
const wchar_t *wp;
|
|
const char *mp;
|
|
size_t mplen;
|
|
|
|
line++;
|
|
if (buff[0] == '#')
|
|
continue;
|
|
p = strchr(buff, ';');
|
|
if (p == NULL)
|
|
continue;
|
|
*p++ = '\0';
|
|
/* Copy an NFC pattern */
|
|
strncpy(nfc, buff, sizeof(nfc)-1);
|
|
nfc[sizeof(nfc)-1] = '\0';
|
|
e = p;
|
|
p = strchr(p, '\n');
|
|
if (p == NULL)
|
|
continue;
|
|
*p = '\0';
|
|
/* Copy an NFD pattern */
|
|
strncpy(nfd, e, sizeof(nfd)-1);
|
|
nfd[sizeof(nfd)-1] = '\0';
|
|
|
|
/*
|
|
* Get an NFC patterns.
|
|
*/
|
|
scan_unicode_pattern(utf8_nfc, wc_nfc, utf16be_nfc, utf16le_nfc,
|
|
nfc, 0);
|
|
|
|
/*
|
|
* Get an NFD patterns.
|
|
*/
|
|
scan_unicode_pattern(utf8_nfd, wc_nfd, utf16be_nfd, utf16le_nfd,
|
|
nfd, 0);
|
|
|
|
if (locale_is_utf8) {
|
|
/*
|
|
* Normalize an NFD string for import.
|
|
*/
|
|
assertEqualInt(0, archive_strcpy_l(
|
|
&utf8, utf8_nfd, f_sconv8));
|
|
failure("NFD(%s) should be converted to NFC(%s):%d",
|
|
nfd, nfc, line);
|
|
assertEqualUTF8String(utf8_nfc, utf8.s);
|
|
|
|
/*
|
|
* Normalize an NFC string for import.
|
|
*/
|
|
assertEqualInt(0, archive_strcpy_l(
|
|
&utf8, utf8_nfc, f_sconv8));
|
|
failure("NFC(%s) should not be any changed:%d",
|
|
nfc, line);
|
|
assertEqualUTF8String(utf8_nfc, utf8.s);
|
|
|
|
/*
|
|
* Copy an NFC string for export.
|
|
*/
|
|
assertEqualInt(0, archive_strcpy_l(
|
|
&utf8, utf8_nfc, t_sconv8));
|
|
failure("NFC(%s) should not be any changed:%d",
|
|
nfc, line);
|
|
assertEqualUTF8String(utf8_nfc, utf8.s);
|
|
|
|
/*
|
|
* Normalize an NFD string in UTF-16BE for import.
|
|
*/
|
|
assertEqualInt(0, archive_strncpy_l(
|
|
&utf8, utf16be_nfd, 100000, f_sconv16be));
|
|
failure("NFD(%s) should be converted to NFC(%s):%d",
|
|
nfd, nfc, line);
|
|
assertEqualUTF8String(utf8_nfc, utf8.s);
|
|
|
|
/*
|
|
* Normalize an NFD string in UTF-16LE for import.
|
|
*/
|
|
assertEqualInt(0, archive_strncpy_l(
|
|
&utf8, utf16le_nfd, 100000, f_sconv16le));
|
|
failure("NFD(%s) should be converted to NFC(%s):%d",
|
|
nfd, nfc, line);
|
|
assertEqualUTF8String(utf8_nfc, utf8.s);
|
|
}
|
|
|
|
/*
|
|
* Test for archive_mstring interface.
|
|
* In specific, Windows platform UTF-16BE is directly
|
|
* converted to/from wide-character to avoid the effect of
|
|
* current locale since windows platform cannot make
|
|
* locale UTF-8.
|
|
*/
|
|
if (locale_is_utf8 || wc_is_unicode) {
|
|
/*
|
|
* Normalize an NFD string in UTF-8 for import.
|
|
*/
|
|
assertEqualInt(0, archive_mstring_copy_mbs_len_l(
|
|
&mstr, utf8_nfd, 100000, f_sconv8));
|
|
assertEqualInt(0,
|
|
archive_mstring_get_wcs(a, &mstr, &wp));
|
|
failure("UTF-8 NFD(%s) should be converted "
|
|
"to WCS NFC(%s):%d", nfd, nfc, line);
|
|
assertEqualWString(wc_nfc, wp);
|
|
|
|
/*
|
|
* Normalize an NFD string in UTF-16BE for import.
|
|
*/
|
|
assertEqualInt(0, archive_mstring_copy_mbs_len_l(
|
|
&mstr, utf16be_nfd, 100000, f_sconv16be));
|
|
assertEqualInt(0,
|
|
archive_mstring_get_wcs(a, &mstr, &wp));
|
|
failure("UTF-8 NFD(%s) should be converted "
|
|
"to WCS NFC(%s):%d", nfd, nfc, line);
|
|
assertEqualWString(wc_nfc, wp);
|
|
|
|
/*
|
|
* Normalize an NFD string in UTF-16LE for import.
|
|
*/
|
|
assertEqualInt(0, archive_mstring_copy_mbs_len_l(
|
|
&mstr, utf16le_nfd, 100000, f_sconv16le));
|
|
assertEqualInt(0,
|
|
archive_mstring_get_wcs(a, &mstr, &wp));
|
|
failure("UTF-8 NFD(%s) should be converted "
|
|
"to WCS NFC(%s):%d", nfd, nfc, line);
|
|
assertEqualWString(wc_nfc, wp);
|
|
|
|
/*
|
|
* Copy an NFC wide-string for export.
|
|
*/
|
|
assertEqualInt(0,
|
|
archive_mstring_copy_wcs(&mstr, wc_nfc));
|
|
assertEqualInt(0, archive_mstring_get_mbs_l(
|
|
a, &mstr, &mp, &mplen, t_sconv8));
|
|
failure("WCS NFC(%s) should be UTF-8 NFC:%d"
|
|
,nfc, line);
|
|
assertEqualUTF8String(utf8_nfc, mp);
|
|
}
|
|
}
|
|
|
|
archive_string_free(&utf8);
|
|
archive_mstring_clean(&mstr);
|
|
fclose(fp);
|
|
assertEqualInt(ARCHIVE_OK, archive_read_free(a));
|
|
assertEqualInt(ARCHIVE_OK, archive_write_free(a2));
|
|
}
|
|
|
|
static void
|
|
test_archive_string_normalization_mac_nfd(const char *testdata)
|
|
{
|
|
struct archive *a, *a2;
|
|
struct archive_string utf8;
|
|
struct archive_mstring mstr;
|
|
struct archive_string_conv *f_sconv8, *t_sconv8;
|
|
struct archive_string_conv *f_sconv16be, *f_sconv16le;
|
|
FILE *fp;
|
|
char buff[512];
|
|
int line = 0;
|
|
int locale_is_utf8, wc_is_unicode;
|
|
int sconv_opt = SCONV_SET_OPT_NORMALIZATION_D;
|
|
|
|
locale_is_utf8 = (NULL != setlocale(LC_ALL, "en_US.UTF-8"));
|
|
wc_is_unicode = is_wc_unicode();
|
|
/* If it doesn't exist, just warn and return. */
|
|
if (!locale_is_utf8 && !wc_is_unicode) {
|
|
skipping("A test of string normalization for NFD requires "
|
|
"a suitable locale; en_US.UTF-8 not available on this "
|
|
"system");
|
|
return;
|
|
}
|
|
|
|
archive_string_init(&utf8);
|
|
memset(&mstr, 0, sizeof(mstr));
|
|
|
|
/*
|
|
* Create string conversion objects.
|
|
*/
|
|
assert((a = archive_read_new()) != NULL);
|
|
assertA(NULL != (f_sconv8 =
|
|
archive_string_conversion_from_charset(a, "UTF-8", 0)));
|
|
assertA(NULL != (f_sconv16be =
|
|
archive_string_conversion_from_charset(a, "UTF-16BE", 0)));
|
|
assertA(NULL != (f_sconv16le =
|
|
archive_string_conversion_from_charset(a, "UTF-16LE", 0)));
|
|
assert((a2 = archive_write_new()) != NULL);
|
|
assertA(NULL != (t_sconv8 =
|
|
archive_string_conversion_to_charset(a2, "UTF-8", 0)));
|
|
if (f_sconv8 == NULL || f_sconv16be == NULL || f_sconv16le == NULL ||
|
|
t_sconv8 == NULL) {
|
|
/* We cannot continue this test. */
|
|
assertEqualInt(ARCHIVE_OK, archive_read_free(a));
|
|
return;
|
|
}
|
|
archive_string_conversion_set_opt(f_sconv8, sconv_opt);
|
|
archive_string_conversion_set_opt(f_sconv16be, sconv_opt);
|
|
archive_string_conversion_set_opt(f_sconv16le, sconv_opt);
|
|
archive_string_conversion_set_opt(t_sconv8, sconv_opt);
|
|
|
|
/* Open a test pattern file. */
|
|
assert((fp = fopen(testdata, "r")) != NULL);
|
|
|
|
/*
|
|
* Read test data.
|
|
* Test data format:
|
|
* <NFC Unicode pattern> ';' <NFD Unicode pattern> '\n'
|
|
* Unicode pattern format:
|
|
* [0-9A-F]{4,5}([ ][0-9A-F]{4,5}){0,}
|
|
*/
|
|
while (fgets(buff, sizeof(buff), fp) != NULL) {
|
|
char nfc[80], nfd[80];
|
|
char utf8_nfc[80], utf8_nfd[80];
|
|
char utf16be_nfc[80], utf16be_nfd[80];
|
|
char utf16le_nfc[80], utf16le_nfd[80];
|
|
wchar_t wc_nfc[40], wc_nfd[40];
|
|
char *e, *p;
|
|
const wchar_t *wp;
|
|
const char *mp;
|
|
size_t mplen;
|
|
int should_be_nfc;
|
|
|
|
line++;
|
|
if (buff[0] == '#')
|
|
continue;
|
|
p = strchr(buff, ';');
|
|
if (p == NULL)
|
|
continue;
|
|
*p++ = '\0';
|
|
/* Copy an NFC pattern */
|
|
strncpy(nfc, buff, sizeof(nfc)-1);
|
|
nfc[sizeof(nfc)-1] = '\0';
|
|
e = p;
|
|
p = strchr(p, '\n');
|
|
if (p == NULL)
|
|
continue;
|
|
*p = '\0';
|
|
/* Copy an NFD pattern */
|
|
strncpy(nfd, e, sizeof(nfd)-1);
|
|
nfd[sizeof(nfd)-1] = '\0';
|
|
|
|
/*
|
|
* Get an NFC patterns.
|
|
*/
|
|
should_be_nfc = scan_unicode_pattern(utf8_nfc, wc_nfc,
|
|
utf16be_nfc, utf16le_nfc, nfc, 1);
|
|
|
|
/*
|
|
* Get an NFD patterns.
|
|
*/
|
|
scan_unicode_pattern(utf8_nfd, wc_nfd, utf16be_nfd, utf16le_nfd,
|
|
nfd, 0);
|
|
|
|
if (locale_is_utf8) {
|
|
/*
|
|
* Normalize an NFC string for import.
|
|
*/
|
|
assertEqualInt(0, archive_strcpy_l(
|
|
&utf8, utf8_nfc, f_sconv8));
|
|
if (should_be_nfc) {
|
|
failure("NFC(%s) should not be converted to"
|
|
" NFD(%s):%d", nfc, nfd, line);
|
|
assertEqualUTF8String(utf8_nfc, utf8.s);
|
|
} else {
|
|
failure("NFC(%s) should be converted to"
|
|
" NFD(%s):%d", nfc, nfd, line);
|
|
assertEqualUTF8String(utf8_nfd, utf8.s);
|
|
}
|
|
|
|
/*
|
|
* Normalize an NFD string for import.
|
|
*/
|
|
assertEqualInt(0, archive_strcpy_l(
|
|
&utf8, utf8_nfd, f_sconv8));
|
|
failure("NFD(%s) should not be any changed:%d",
|
|
nfd, line);
|
|
assertEqualUTF8String(utf8_nfd, utf8.s);
|
|
|
|
/*
|
|
* Copy an NFD string for export.
|
|
*/
|
|
assertEqualInt(0, archive_strcpy_l(
|
|
&utf8, utf8_nfd, t_sconv8));
|
|
failure("NFD(%s) should not be any changed:%d",
|
|
nfd, line);
|
|
assertEqualUTF8String(utf8_nfd, utf8.s);
|
|
|
|
/*
|
|
* Normalize an NFC string in UTF-16BE for import.
|
|
*/
|
|
assertEqualInt(0, archive_strncpy_l(
|
|
&utf8, utf16be_nfc, 100000, f_sconv16be));
|
|
if (should_be_nfc) {
|
|
failure("NFC(%s) should not be converted to"
|
|
" NFD(%s):%d", nfc, nfd, line);
|
|
assertEqualUTF8String(utf8_nfc, utf8.s);
|
|
} else {
|
|
failure("NFC(%s) should be converted to"
|
|
" NFD(%s):%d", nfc, nfd, line);
|
|
assertEqualUTF8String(utf8_nfd, utf8.s);
|
|
}
|
|
|
|
/*
|
|
* Normalize an NFC string in UTF-16LE for import.
|
|
*/
|
|
assertEqualInt(0, archive_strncpy_l(
|
|
&utf8, utf16le_nfc, 100000, f_sconv16le));
|
|
if (should_be_nfc) {
|
|
failure("NFC(%s) should not be converted to"
|
|
" NFD(%s):%d", nfc, nfd, line);
|
|
assertEqualUTF8String(utf8_nfc, utf8.s);
|
|
} else {
|
|
failure("NFC(%s) should be converted to"
|
|
" NFD(%s):%d", nfc, nfd, line);
|
|
assertEqualUTF8String(utf8_nfd, utf8.s);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Test for archive_mstring interface.
|
|
* In specific, Windows platform UTF-16BE is directly
|
|
* converted to/from wide-character to avoid the effect of
|
|
* current locale since windows platform cannot make
|
|
* locale UTF-8.
|
|
*/
|
|
if (locale_is_utf8 || wc_is_unicode) {
|
|
/*
|
|
* Normalize an NFD string in UTF-8 for import.
|
|
*/
|
|
assertEqualInt(0, archive_mstring_copy_mbs_len_l(
|
|
&mstr, utf8_nfc, 100000, f_sconv8));
|
|
assertEqualInt(0,
|
|
archive_mstring_get_wcs(a, &mstr, &wp));
|
|
if (should_be_nfc) {
|
|
failure("UTF-8 NFC(%s) should not be converted "
|
|
"to WCS NFD(%s):%d", nfc, nfd, line);
|
|
assertEqualWString(wc_nfc, wp);
|
|
} else {
|
|
failure("UTF-8 NFC(%s) should be converted "
|
|
"to WCS NFD(%s):%d", nfc, nfd, line);
|
|
assertEqualWString(wc_nfd, wp);
|
|
}
|
|
|
|
/*
|
|
* Normalize an NFD string in UTF-16BE for import.
|
|
*/
|
|
assertEqualInt(0, archive_mstring_copy_mbs_len_l(
|
|
&mstr, utf16be_nfc, 100000, f_sconv16be));
|
|
assertEqualInt(0,
|
|
archive_mstring_get_wcs(a, &mstr, &wp));
|
|
if (should_be_nfc) {
|
|
failure("UTF-16BE NFC(%s) should not be "
|
|
"converted to WCS NFD(%s):%d",
|
|
nfc, nfd, line);
|
|
assertEqualWString(wc_nfc, wp);
|
|
} else {
|
|
failure("UTF-16BE NFC(%s) should be converted "
|
|
"to WCS NFD(%s):%d", nfc, nfd, line);
|
|
assertEqualWString(wc_nfd, wp);
|
|
}
|
|
|
|
/*
|
|
* Normalize an NFD string in UTF-16LE for import.
|
|
*/
|
|
assertEqualInt(0, archive_mstring_copy_mbs_len_l(
|
|
&mstr, utf16le_nfc, 100000, f_sconv16le));
|
|
assertEqualInt(0,
|
|
archive_mstring_get_wcs(a, &mstr, &wp));
|
|
if (should_be_nfc) {
|
|
failure("UTF-16LE NFC(%s) should not be "
|
|
"converted to WCS NFD(%s):%d",
|
|
nfc, nfd, line);
|
|
assertEqualWString(wc_nfc, wp);
|
|
} else {
|
|
failure("UTF-16LE NFC(%s) should be converted "
|
|
"to WCS NFD(%s):%d", nfc, nfd, line);
|
|
assertEqualWString(wc_nfd, wp);
|
|
}
|
|
|
|
/*
|
|
* Copy an NFD wide-string for export.
|
|
*/
|
|
assertEqualInt(0, archive_mstring_copy_wcs(
|
|
&mstr, wc_nfd));
|
|
assertEqualInt(0, archive_mstring_get_mbs_l(
|
|
a, &mstr, &mp, &mplen, t_sconv8));
|
|
failure("WCS NFD(%s) should be UTF-8 NFD:%d"
|
|
,nfd, line);
|
|
assertEqualUTF8String(utf8_nfd, mp);
|
|
}
|
|
}
|
|
|
|
archive_string_free(&utf8);
|
|
archive_mstring_clean(&mstr);
|
|
fclose(fp);
|
|
assertEqualInt(ARCHIVE_OK, archive_read_free(a));
|
|
assertEqualInt(ARCHIVE_OK, archive_write_free(a2));
|
|
}
|
|
|
|
static void
|
|
test_archive_string_canonicalization(void)
|
|
{
|
|
struct archive *a;
|
|
struct archive_string_conv *sconv;
|
|
|
|
setlocale(LC_ALL, "en_US.UTF-8");
|
|
|
|
assert((a = archive_read_new()) != NULL);
|
|
|
|
assertA(NULL != (sconv =
|
|
archive_string_conversion_to_charset(a, "UTF-8", 1)));
|
|
failure("Charset name should be UTF-8");
|
|
assertEqualString("UTF-8",
|
|
archive_string_conversion_charset_name(sconv));
|
|
|
|
assertA(NULL != (sconv =
|
|
archive_string_conversion_to_charset(a, "UTF8", 1)));
|
|
failure("Charset name should be UTF-8");
|
|
assertEqualString("UTF-8",
|
|
archive_string_conversion_charset_name(sconv));
|
|
|
|
assertA(NULL != (sconv =
|
|
archive_string_conversion_to_charset(a, "utf8", 1)));
|
|
failure("Charset name should be UTF-8");
|
|
assertEqualString("UTF-8",
|
|
archive_string_conversion_charset_name(sconv));
|
|
|
|
assertA(NULL != (sconv =
|
|
archive_string_conversion_to_charset(a, "UTF-16BE", 1)));
|
|
failure("Charset name should be UTF-16BE");
|
|
assertEqualString("UTF-16BE",
|
|
archive_string_conversion_charset_name(sconv));
|
|
|
|
assertA(NULL != (sconv =
|
|
archive_string_conversion_to_charset(a, "UTF16BE", 1)));
|
|
failure("Charset name should be UTF-16BE");
|
|
assertEqualString("UTF-16BE",
|
|
archive_string_conversion_charset_name(sconv));
|
|
|
|
assertA(NULL != (sconv =
|
|
archive_string_conversion_to_charset(a, "utf16be", 1)));
|
|
failure("Charset name should be UTF-16BE");
|
|
assertEqualString("UTF-16BE",
|
|
archive_string_conversion_charset_name(sconv));
|
|
|
|
assertA(NULL != (sconv =
|
|
archive_string_conversion_to_charset(a, "UTF-16LE", 1)));
|
|
failure("Charset name should be UTF-16LE");
|
|
assertEqualString("UTF-16LE",
|
|
archive_string_conversion_charset_name(sconv));
|
|
|
|
assertA(NULL != (sconv =
|
|
archive_string_conversion_to_charset(a, "UTF16LE", 1)));
|
|
failure("Charset name should be UTF-16LE");
|
|
assertEqualString("UTF-16LE",
|
|
archive_string_conversion_charset_name(sconv));
|
|
|
|
assertA(NULL != (sconv =
|
|
archive_string_conversion_to_charset(a, "utf16le", 1)));
|
|
failure("Charset name should be UTF-16LE");
|
|
assertEqualString("UTF-16LE",
|
|
archive_string_conversion_charset_name(sconv));
|
|
|
|
assertEqualInt(ARCHIVE_OK, archive_read_free(a));
|
|
|
|
}
|
|
|
|
static void
|
|
check_string(struct archive *a, struct archive_mstring *mstr, struct archive_string_conv *sc,
|
|
const char *exp, const wchar_t *wexp)
|
|
{
|
|
/* Do all the tests on a copy so that we can have a clear initial state every time */
|
|
struct archive_mstring mstr2;
|
|
const char *p = NULL;
|
|
const wchar_t *wp = NULL;
|
|
size_t len = 0;
|
|
|
|
memset(&mstr2, 0, sizeof(mstr2));
|
|
|
|
archive_mstring_copy(&mstr2, mstr);
|
|
assertEqualInt(0, archive_mstring_get_mbs(a, &mstr2, &p));
|
|
assertEqualString(exp, p);
|
|
p = NULL;
|
|
|
|
archive_mstring_copy(&mstr2, mstr);
|
|
assertEqualInt(0, archive_mstring_get_utf8(a, &mstr2, &p));
|
|
assertEqualString(exp, p);
|
|
p = NULL;
|
|
|
|
archive_mstring_copy(&mstr2, mstr);
|
|
assertEqualInt(0, archive_mstring_get_wcs(a, &mstr2, &wp));
|
|
assertEqualWString(wexp, wp);
|
|
wp = NULL;
|
|
|
|
archive_mstring_copy(&mstr2, mstr);
|
|
assertEqualInt(0, archive_mstring_get_mbs_l(a, &mstr2, &p, &len, sc));
|
|
assertEqualString(exp, p);
|
|
assertEqualInt(len, strlen(exp));
|
|
p = NULL;
|
|
len = 0;
|
|
|
|
archive_mstring_clean(&mstr2);
|
|
}
|
|
|
|
/*
|
|
* Make sure no matter what the input encoding is, the string can be
|
|
* converted too all the output encodings.
|
|
*/
|
|
static void
|
|
test_archive_string_set_get(void)
|
|
{
|
|
struct archive *a;
|
|
struct archive_mstring mstr;
|
|
struct archive_string_conv *sc;
|
|
|
|
setlocale(LC_ALL, "en_US.UTF-8");
|
|
|
|
assert((a = archive_read_new()) != NULL);
|
|
memset(&mstr, 0, sizeof(mstr));
|
|
|
|
assertA(NULL != (sc =
|
|
archive_string_conversion_to_charset(a, "UTF-8", 1)));
|
|
failure("Charset name should be UTF-8");
|
|
assertEqualString("UTF-8",
|
|
archive_string_conversion_charset_name(sc));
|
|
|
|
assertEqualInt(0, archive_mstring_copy_mbs(&mstr, "AAA"));
|
|
check_string(a, &mstr, sc, "AAA", L"AAA");
|
|
assertEqualInt(4, archive_mstring_copy_utf8(&mstr, "BBBB"));
|
|
check_string(a, &mstr, sc, "BBBB", L"BBBB");
|
|
assertEqualInt(0, archive_mstring_copy_wcs(&mstr, L"CCC12"));
|
|
check_string(a, &mstr, sc, "CCC12", L"CCC12");
|
|
assertEqualInt(0, archive_mstring_copy_mbs_len_l(&mstr, "DDDD-l", 6, sc));
|
|
check_string(a, &mstr, sc, "DDDD-l", L"DDDD-l");
|
|
assertEqualInt(0, archive_mstring_update_utf8(a, &mstr, "EEEEE---H"));
|
|
check_string(a, &mstr, sc, "EEEEE---H", L"EEEEE---H");
|
|
|
|
assertEqualInt(ARCHIVE_OK, archive_read_free(a));
|
|
|
|
}
|
|
|
|
DEFINE_TEST(test_archive_string_conversion)
|
|
{
|
|
static const char reffile[] = "test_archive_string_conversion.txt.Z";
|
|
static const char testdata[] = "testdata.txt";
|
|
struct archive *a;
|
|
struct archive_entry *ae;
|
|
char buff[512];
|
|
ssize_t size;
|
|
FILE *fp;
|
|
|
|
/*
|
|
* Extract a test pattern file.
|
|
*/
|
|
extract_reference_file(reffile);
|
|
assert((a = archive_read_new()) != NULL);
|
|
assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
|
|
assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_raw(a));
|
|
assertEqualIntA(a, ARCHIVE_OK,
|
|
archive_read_open_filename(a, reffile, 512));
|
|
|
|
assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
|
|
assert((fp = fopen(testdata, "w")) != NULL);
|
|
while ((size = archive_read_data(a, buff, 512)) > 0)
|
|
assertEqualInt(size, fwrite(buff, 1, size, fp));
|
|
assertEqualInt(0, fclose(fp));
|
|
assertEqualInt(ARCHIVE_OK, archive_read_free(a));
|
|
|
|
test_archive_string_normalization_nfc(testdata);
|
|
test_archive_string_normalization_mac_nfd(testdata);
|
|
test_archive_string_canonicalization();
|
|
test_archive_string_set_get();
|
|
}
|