Function cesu8::from_java_cesu8 [] [src]

pub fn from_java_cesu8(bytes: &[u8]) -> Result<Cow<str>, Cesu8DecodingError>

Convert Java's modified UTF-8 data to a Rust string, re-encoding only if necessary. Returns an error if the data cannot be represented as valid UTF-8.

use std::borrow::Cow;
use cesu8::from_java_cesu8;

// This string is valid as UTF-8 or modified UTF-8, so it doesn't change,
// and we can convert it without allocating memory.
assert_eq!(Cow::Borrowed("aé日"),
           from_java_cesu8("aé日".as_bytes()).unwrap());

// This string is modified UTF-8 data containing a 6-byte surrogate pair,
// which becomes a 4-byte UTF-8 string.
let data = &[0xED, 0xA0, 0x81, 0xED, 0xB0, 0x81];
assert_eq!(Cow::Borrowed("\u{10401}"),
           from_java_cesu8(data).unwrap());

// This string is modified UTF-8 data containing null code-points.
let data = &[0xC0, 0x80, 0xC0, 0x80];
assert_eq!(Cow::Borrowed("\0\0"),
           from_java_cesu8(data).unwrap());