1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
use std::io::{self, Read};
use std::str;
use std::fmt;

#[derive(Debug)]
pub enum CharReadError {
    UnexpectedEof,
    Utf8(str::Utf8Error),
    Io(io::Error)
}

impl From<str::Utf8Error> for CharReadError {
    fn from(e: str::Utf8Error) -> CharReadError {
        CharReadError::Utf8(e)
    }
}

impl From<io::Error> for CharReadError {
    fn from(e: io::Error) -> CharReadError {
        CharReadError::Io(e)
    }
}

impl fmt::Display for CharReadError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        use self::CharReadError::*;
        match *self {
            UnexpectedEof => write!(f, "unexpected end of stream"),
            Utf8(ref e) => write!(f, "UTF-8 decoding error: {}", e),
            Io(ref e) => write!(f, "I/O error: {}", e)
        }
    }
}

pub fn next_char_from<R: Read>(source: &mut R) -> Result<Option<char>, CharReadError> {
    const MAX_CODEPOINT_LEN: usize = 4;

    let mut bytes = source.bytes();
    let mut buf = [0u8; MAX_CODEPOINT_LEN];
    let mut pos = 0;

    loop {
        let next = match bytes.next() {
            Some(Ok(b)) => b,
            Some(Err(e)) => return Err(e.into()),
            None if pos == 0 => return Ok(None),
            None => return Err(CharReadError::UnexpectedEof)
        };
        buf[pos] = next;
        pos += 1;

        match str::from_utf8(&buf[..pos]) {
            Ok(s) => return Ok(s.chars().next()),  // always Some(..)
            Err(_) if pos < MAX_CODEPOINT_LEN => {},
            Err(e) => return Err(e.into())
        }
    }
}

#[cfg(test)]
mod tests {
    #[test]
    fn test_next_char_from() {
        use std::io;
        use std::error::Error;

        let mut bytes: &[u8] = "correct".as_bytes();    // correct ASCII
        assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('c'));

        let mut bytes: &[u8] = "правильно".as_bytes();  // correct BMP
        assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('п'));

        let mut bytes: &[u8] = "😊".as_bytes();          // correct non-BMP
        assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('😊'));

        let mut bytes: &[u8] = b"";                     // empty
        assert_eq!(super::next_char_from(&mut bytes).unwrap(), None);

        let mut bytes: &[u8] = b"\xf0\x9f\x98";         // incomplete code point
        match super::next_char_from(&mut bytes).unwrap_err() {
            super::CharReadError::UnexpectedEof => {},
            e => panic!("Unexpected result: {:?}", e)
        };

        let mut bytes: &[u8] = b"\xff\x9f\x98\x32";     // invalid code point
        match super::next_char_from(&mut bytes).unwrap_err() {
            super::CharReadError::Utf8(_) => {},
            e => panic!("Unexpected result: {:?}", e)
        };


        // error during read
        struct ErrorReader;
        impl io::Read for ErrorReader {
            fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
                Err(io::Error::new(io::ErrorKind::Other, "test error"))
            }
        }

        let mut r = ErrorReader;
        match super::next_char_from(&mut r).unwrap_err() {
            super::CharReadError::Io(ref e) if e.kind() == io::ErrorKind::Other &&
                                               e.description() == "test error" => {},
            e => panic!("Unexpected result: {:?}", e)
        }
    }
}