---
src/util.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
src/util.h | 4 ++++
2 files changed, 63 insertions(+), 0 deletions(-)
diff --git a/src/util.c b/src/util.c
index 84ce507..7a96afe 100644
--- a/src/util.c
+++ b/src/util.c
@@ -26,6 +26,7 @@
#include <stdio.h>
#include <string.h>
#include <ctype.h>
+#include <endian.h>
#include <glib.h>
@@ -692,3 +693,61 @@ unsigned char *pack_7bit(const unsigned char *in, long len, int
byte_offset,
return pack_7bit_own_buf(in, len, byte_offset, ussd, items_written,
terminator, buf);
}
+
+/*!
+ * Converts text coded using ISO/IEC 10646 UCS-2 encoding into UTF8 encoded
+ * text. Input buffer length is given in bytes, not words.
+ *
+ * Returns newly-allocated UTF8 encoded string or NULL if the conversion
+ * could not be performed. Returns the number of bytes read from the
+ * UCS-2 encoded string in items_read (if not NULL), not including the
+ * terminator character. Returns the number of bytes written into the UTF8
+ * encoded string in items_written (if not NULL) not including the terminal
+ * '\0' character. The caller is reponsible for freeing the returned value.
+ */
+char *convert_ucs2_to_utf8(const unsigned char *buffer, long len,
+ long *items_read, long *items_written,
+ unsigned short terminator)
+{
+ int i;
+ unsigned short ucs2;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ unsigned char *swap_buf;
+#endif
+ unsigned char *ret;
+ GError *error;
+
+ /* All UCS-2 text is valid UTF-16 text but UTF-16 sequences of
+ * surrogate pairs are not valid in UCS-2 so first check that
+ * there are no surrogate pairs in the buffer and then use
+ * g_utf16_to_utf8() on it. */
+ for (i = 0; i < len - 1; i += 2) {
+ ucs2 = (buffer[i] << 8) + buffer[i + 1];
+
+ if (ucs2 == terminator)
+ break;
+
+ if ((ucs2 & 0xf800) == 0xd800)
+ return NULL;
+ }
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ swap_buf = g_malloc(i);
+ if (!swap_buf)
+ return NULL;
+
+ swab(buffer, swap_buf, i);
+ buffer = swap_buf;
+#endif
+
+ ret = g_utf16_to_utf8((const gunichar2 *) buffer, i / 2,
+ NULL, items_written, &error);
+ if (ret && items_read)
+ *items_read = i;
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ g_free(swap_buf);
+#endif
+
+ return ret;
+}
diff --git a/src/util.h b/src/util.h
index 6269630..8589f0c 100644
--- a/src/util.h
+++ b/src/util.h
@@ -56,3 +56,7 @@ unsigned char *pack_7bit_own_buf(const unsigned char *in, long len,
unsigned char *pack_7bit(const unsigned char *in, long len, int byte_offset,
gboolean ussd,
long *items_written, unsigned char terminator);
+
+char *convert_ucs2_to_utf8(const unsigned char *buffer, long len,
+ long *items_read, long *items_written,
+ unsigned short terminator);
--
1.6.0
Show replies by date
On Sun, 5 Jul 2009 05:13:45 +0200, Andrzej Zaborowski
<andrew.zaborowski(a)intel.com> wrote:
+
+ ret = g_utf16_to_utf8((const gunichar2 *) buffer, i / 2,
+ NULL, items_written, &error);
Why not use iconv() with UCS-2BE to UTF-8?
Cheers,
Aki