diff --git a/c_bridge.c b/c_bridge.c index f4e2839..2a574d3 100644 --- a/c_bridge.c +++ b/c_bridge.c @@ -6,16 +6,16 @@ #include // See description in c_bridge.h -const int detectCharset(void *detector, - void *input, - int input_len, - int *status, - MatchData *matchBuffer, +const int detectCharset(void *detector, + void *input, + int input_len, + int *status, + MatchData *matchBuffer, int matchBufferSize) { // Put input bytes in the detector. ucsdet_setText((UCharsetDetector*)detector, (char*)input, input_len, status); - if (*status != U_ZERO_ERROR) { + if U_FAILURE(*status) { return 0; } @@ -25,7 +25,7 @@ const int detectCharset(void *detector, // Perform analysis and return all guesses and their count. bestGuesses = ucsdet_detectAll((UCharsetDetector*)detector, &matchCount, status); - if (*status != U_ZERO_ERROR) { + if U_FAILURE(*status) { return 0; } @@ -42,19 +42,19 @@ const int detectCharset(void *detector, // Fill guessed encoding bestGuessedCharset = ucsdet_getName(bestGuess, status); - if (*status != U_ZERO_ERROR) { + if U_FAILURE(*status) { return 0; } // Fill guessed language bestGuessedLanguage = ucsdet_getLanguage(bestGuess, status); - if (*status != U_ZERO_ERROR) { + if U_FAILURE(*status) { return 0; } // Fill its confidence rating int32_t conf = ucsdet_getConfidence(bestGuess, status); - if (*status != U_ZERO_ERROR) { + if U_FAILURE(*status) { return 0; } @@ -69,7 +69,7 @@ const int detectCharset(void *detector, // See description in c_bridge.h int convertToUtf16(const char *srcEncoding, - UChar *dest, + UChar *dest, int32_t destCapacity, const char *src, int32_t srcLength, @@ -77,13 +77,13 @@ int convertToUtf16(const char *srcEncoding, UConverter *conv; conv = ucnv_open(srcEncoding, status); - if (*status != U_ZERO_ERROR) { + if U_FAILURE(*status) { return 0; } /* Convert from original encoding to UTF-16 */ int len = ucnv_toUChars(conv, dest, destCapacity, src, srcLength, status); - if (*status != U_ZERO_ERROR) { + if U_FAILURE(*status) { return 0; } @@ -94,7 +94,7 @@ int convertToUtf16(const char *srcEncoding, // See description in c_bridge.h int convertFromUtf16(const char *destEncoding, - char *dest, + char *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, @@ -102,17 +102,17 @@ int convertFromUtf16(const char *destEncoding, UConverter *conv; conv = ucnv_open(destEncoding, status); - if (*status != U_ZERO_ERROR) { + if U_FAILURE(*status) { return 0; } /* Convert from UTF-16 to destination encoding */ int len = ucnv_fromUChars(conv, dest, destCapacity, src, srcLength, status); - if (*status != U_ZERO_ERROR) { + if U_FAILURE(*status) { return 0; } ucnv_close(conv); return len; -} \ No newline at end of file +} diff --git a/convert.go b/convert.go index 3577a53..4d5e3ab 100644 --- a/convert.go +++ b/convert.go @@ -14,7 +14,7 @@ const ( DefaultMaxTextSize = 1024 * 1024 // Default value for the max text length in conversion operations utf8MaxCharSize = 4 utf16MaxCharSize = 4 -) +) var ( Utf8CString = C.CString("UTF-8") @@ -35,7 +35,7 @@ type CharsetConverter struct { // are created in memory once and then used. 'maxTextSize' sets the size of these buffers. // ICU library would return error if any processed text is longer than this parameter. // -// NOTE: +// NOTE: // // UTF8 uses 1 to 4 bytes for each symbol. // UTF16 uses 2 bytes to 4 bytes for each symbol. @@ -79,7 +79,7 @@ func (conv *CharsetConverter) ConvertToUtf8(input []byte, srcEncoding string) ([ C.int32_t(len(input)), (*C.int)(unsafe.Pointer(&status))) - if status == U_ZERO_ERROR { + if isSuccess(status) { nConvLen := C.convertFromUtf16( Utf8CString, (*C.char)(unsafe.Pointer(&conv.utf8Buffer[0])), @@ -88,7 +88,7 @@ func (conv *CharsetConverter) ConvertToUtf8(input []byte, srcEncoding string) ([ C.int32_t(convLen), (*C.int)(unsafe.Pointer(&status))) - if status == U_ZERO_ERROR { + if isSuccess(status) { resStr := conv.utf8Buffer[:nConvLen] return ([]byte)(resStr), nil } diff --git a/detect.go b/detect.go index d966d04..649424b 100644 --- a/detect.go +++ b/detect.go @@ -11,9 +11,21 @@ import ( ) const ( - U_ZERO_ERROR = 0 // ICU common constant error code which means that no error occured - MatchDataBufferSize = 25 // Size of the buffer for detection results (Max count of returned guesses per detect call) -) + U_ZERO_ERROR = 0 // ICU common constant error code which means that no error occured + U_ERROR_LIMIT = 0x7FFFFFFF // Dirty hack, negative error codes are are being turned into large positive ints + MatchDataBufferSize = 25 // Size of the buffer for detection results (Max count of returned guesses per detect call) +) + +// Go implementation of the icu U_SUCCESS macro. Negative status codes are +// warnings, 0 is a success without warnings, > 0 is an error +func isSuccess(status int) bool { + return status <= U_ZERO_ERROR || status >= U_ERROR_LIMIT +} + +// Go implementation of the icu U_FAILURE macro. +func isFailure(status int) bool { + return status > U_ZERO_ERROR && status < U_ERROR_LIMIT +} // CharsetDetector provides ICU charset detection functionality. type CharsetDetector struct { @@ -39,7 +51,7 @@ func NewCharsetDetector() (*CharsetDetector, error) { det.ptr = C.ucsdet_open((*C.UErrorCode)(statusPtr)) - if status != U_ZERO_ERROR { + if isFailure(status) { return nil, fmt.Errorf("ICU Error code returned: %d", status) } @@ -63,14 +75,14 @@ func (det *CharsetDetector) GuessCharset(input []byte) (matches []Match, err err // Perform detection. Guess count is the number of matches returned. // The matches themself are put in the result buffer guessCount := C.detectCharset( - unsafe.Pointer(det.ptr), - unsafe.Pointer(&input[0]), - C.int(inputLen), - (*C.int)(unsafe.Pointer(&status)), + unsafe.Pointer(det.ptr), + unsafe.Pointer(&input[0]), + C.int(inputLen), + (*C.int)(unsafe.Pointer(&status)), (*C.MatchData)(unsafe.Pointer(&det.resBuffer[0])), C.int(MatchDataBufferSize)) - if status == U_ZERO_ERROR { + if isSuccess(status) { // Convert the returned number of entries from result buffer to a slice // that will be returned count := int(guessCount)