* Copyright 2003-2008, Haiku, Inc. All Rights Reserved.
* Distributed under the terms of the MIT License.
*
* Authors:
* Andrew Bachmann
*/
#include <CharacterSet.h>
#include <CharacterSetRoster.h>
#include <UTF8.h>
#include <errno.h>
#include <iconv.h>
#include <stdio.h>
#ifdef DEBUG_CONV
# define DEBPRINT(ARGS) printf ARGS;
#else
# define DEBPRINT(ARGS) ;
#endif
using namespace BPrivate;
int iconvctl(iconv_t icd, int request, void* argument);
static void
discard_invalid_input_character(iconv_t* conversion, char** inputBuffer,
size_t* inputLeft)
{
if (*inputLeft == 0)
return;
char outputBuffer[1];
size_t left = 1;
for (; left <= *inputLeft; left ++) {
iconv(*conversion, NULL, NULL, NULL, NULL);
char* buffer = *inputBuffer;
char* output = outputBuffer;
size_t outputLeft = 1;
size_t size = iconv(*conversion, &buffer, &left,
&output, &outputLeft);
if (size != (size_t)-1) {
break;
}
if (errno == EINVAL) {
continue;
}
if (errno == EILSEQ) {
break;
}
};
*inputBuffer += left;
*inputLeft -= left;
}
status_t
convert_encoding(const char* from, const char* to, const char* src,
int32* srcLen, char* dst, int32* dstLen, int32* state,
char substitute)
{
if (*srcLen == 0) {
*dstLen = 0;
return B_OK;
}
iconv_t conversion = iconv_open(to, from);
if (conversion == (iconv_t)-1) {
DEBPRINT(("iconv_open failed\n"));
return B_ERROR;
}
size_t outputLeft = *dstLen;
if (state == NULL || *state == 0) {
if (state != NULL)
*state = 1;
iconv(conversion, NULL, NULL, &dst, &outputLeft);
}
char** inputBuffer = const_cast<char**>(&src);
size_t inputLeft = *srcLen;
do {
size_t nonReversibleConversions = iconv(conversion, inputBuffer,
&inputLeft, &dst, &outputLeft);
if (nonReversibleConversions == (size_t)-1) {
if (errno == E2BIG) {
break;
}
switch (errno) {
case EILSEQ:
{
discard_invalid_input_character(&conversion, inputBuffer,
&inputLeft);
char original = substitute;
size_t len = 1;
char* copy = &original;
iconv_t iso8859_1to = iconv_open(to,"ISO-8859-1");
if (iso8859_1to != (iconv_t)-1) {
iconv(iso8859_1to, 0, 0, 0, 0);
iconv(iso8859_1to, ©, &len, &dst, &outputLeft);
iconv_close(iso8859_1to);
}
break;
}
case EINVAL:
inputBuffer++;
inputLeft--;
break;
default:
status_t status = errno;
iconv_close(conversion);
return status;
}
}
} while (inputLeft > 0 && outputLeft > 0);
*srcLen -= inputLeft;
*dstLen -= outputLeft;
iconv_close(conversion);
return B_OK;
}
status_t
convert_to_utf8(uint32 srcEncoding, const char* src, int32* srcLen,
char* dst, int32* dstLen, int32* state, char substitute)
{
const BCharacterSet* charset = BCharacterSetRoster::GetCharacterSetByConversionID(
srcEncoding);
if (charset == NULL)
return B_ERROR;
#if DEBUG_CONV
fprintf(stderr, "convert_to_utf8(%s) : \"", charset->GetName());
for (int i = 0 ; i < *srcLen ; i++) {
fprintf(stderr, "%c", src[i]);
}
fprintf(stderr, "\"\n");
#endif
return convert_encoding(charset->GetName(), "UTF-8", src, srcLen,
dst, dstLen, state, substitute);
}
status_t
convert_from_utf8(uint32 dstEncoding, const char* src, int32* srcLen,
char* dst, int32* dstLen, int32* state, char substitute)
{
const BCharacterSet* charset = BCharacterSetRoster::GetCharacterSetByConversionID(
dstEncoding);
if (charset == NULL)
return B_ERROR;
#if DEBUG_CONV
fprintf(stderr, "convert_from_utf8(%s) : \"", charset->GetName());
for (int i = 0 ; i < *srcLen ; i++) {
fprintf(stderr, "%c", src[i]);
}
fprintf(stderr, "\"\n");
#endif
return convert_encoding("UTF-8", charset->GetName(), src, srcLen,
dst, dstLen, state, substitute);
}