1970 lines
66 KiB
Rust
1970 lines
66 KiB
Rust
// Copyright Mozilla Foundation. See the COPYRIGHT
|
|
// file at the top-level directory of this distribution.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
|
// option. This file may not be copied, modified, or distributed
|
|
// except according to those terms.
|
|
|
|
//! This module provides structs that use lifetimes to couple bounds checking
|
|
//! and space availability checking and detaching those from actual slice
|
|
//! reading/writing.
|
|
//!
|
|
//! At present, the internals of the implementation are safe code, so the
|
|
//! bound checks currently also happen on read/write. Once this code works,
|
|
//! the plan is to replace the internals with unsafe code that omits the
|
|
//! bound check at the read/write time.
|
|
|
|
#[cfg(all(
|
|
feature = "simd-accel",
|
|
any(
|
|
target_feature = "sse2",
|
|
all(target_endian = "little", target_arch = "aarch64"),
|
|
all(target_endian = "little", target_feature = "neon")
|
|
)
|
|
))]
|
|
use crate::simd_funcs::*;
|
|
|
|
#[cfg(all(
|
|
feature = "simd-accel",
|
|
any(
|
|
target_feature = "sse2",
|
|
all(target_endian = "little", target_arch = "aarch64"),
|
|
all(target_endian = "little", target_feature = "neon")
|
|
)
|
|
))]
|
|
use packed_simd::u16x8;
|
|
|
|
use super::DecoderResult;
|
|
use super::EncoderResult;
|
|
use crate::ascii::*;
|
|
use crate::utf_8::convert_utf8_to_utf16_up_to_invalid;
|
|
use crate::utf_8::utf8_valid_up_to;
|
|
|
|
pub enum Space<T> {
|
|
Available(T),
|
|
Full(usize),
|
|
}
|
|
|
|
pub enum CopyAsciiResult<T, U> {
|
|
Stop(T),
|
|
GoOn(U),
|
|
}
|
|
|
|
pub enum NonAscii {
|
|
BmpExclAscii(u16),
|
|
Astral(char),
|
|
}
|
|
|
|
pub enum Unicode {
|
|
Ascii(u8),
|
|
NonAscii(NonAscii),
|
|
}
|
|
|
|
// Start UTF-16LE/BE fast path
|
|
|
|
pub trait Endian {
|
|
const OPPOSITE_ENDIAN: bool;
|
|
}
|
|
|
|
pub struct BigEndian;
|
|
|
|
impl Endian for BigEndian {
|
|
#[cfg(target_endian = "little")]
|
|
const OPPOSITE_ENDIAN: bool = true;
|
|
|
|
#[cfg(target_endian = "big")]
|
|
const OPPOSITE_ENDIAN: bool = false;
|
|
}
|
|
|
|
pub struct LittleEndian;
|
|
|
|
impl Endian for LittleEndian {
|
|
#[cfg(target_endian = "little")]
|
|
const OPPOSITE_ENDIAN: bool = false;
|
|
|
|
#[cfg(target_endian = "big")]
|
|
const OPPOSITE_ENDIAN: bool = true;
|
|
}
|
|
|
|
#[derive(Debug, Copy, Clone)]
|
|
struct UnalignedU16Slice {
|
|
ptr: *const u8,
|
|
len: usize,
|
|
}
|
|
|
|
impl UnalignedU16Slice {
|
|
#[inline(always)]
|
|
pub unsafe fn new(ptr: *const u8, len: usize) -> UnalignedU16Slice {
|
|
UnalignedU16Slice { ptr, len }
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn trim_last(&mut self) {
|
|
assert!(self.len > 0);
|
|
self.len -= 1;
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn at(&self, i: usize) -> u16 {
|
|
use core::mem::MaybeUninit;
|
|
|
|
assert!(i < self.len);
|
|
unsafe {
|
|
let mut u: MaybeUninit<u16> = MaybeUninit::uninit();
|
|
::core::ptr::copy_nonoverlapping(self.ptr.add(i * 2), u.as_mut_ptr() as *mut u8, 2);
|
|
u.assume_init()
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "simd-accel")]
|
|
#[inline(always)]
|
|
pub fn simd_at(&self, i: usize) -> u16x8 {
|
|
assert!(i + SIMD_STRIDE_SIZE / 2 <= self.len);
|
|
let byte_index = i * 2;
|
|
unsafe { to_u16_lanes(load16_unaligned(self.ptr.add(byte_index))) }
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn len(&self) -> usize {
|
|
self.len
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn tail(&self, from: usize) -> UnalignedU16Slice {
|
|
// XXX the return value should be restricted not to
|
|
// outlive self.
|
|
assert!(from <= self.len);
|
|
unsafe { UnalignedU16Slice::new(self.ptr.add(from * 2), self.len - from) }
|
|
}
|
|
|
|
#[cfg(feature = "simd-accel")]
|
|
#[inline(always)]
|
|
pub fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
|
|
assert!(self.len <= other.len());
|
|
let mut offset = 0;
|
|
if SIMD_STRIDE_SIZE / 2 <= self.len {
|
|
let len_minus_stride = self.len - SIMD_STRIDE_SIZE / 2;
|
|
loop {
|
|
let mut simd = self.simd_at(offset);
|
|
if E::OPPOSITE_ENDIAN {
|
|
simd = simd_byte_swap(simd);
|
|
}
|
|
unsafe {
|
|
store8_unaligned(other.as_mut_ptr().add(offset), simd);
|
|
}
|
|
if contains_surrogates(simd) {
|
|
break;
|
|
}
|
|
offset += SIMD_STRIDE_SIZE / 2;
|
|
if offset > len_minus_stride {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
while offset < self.len {
|
|
let unit = swap_if_opposite_endian::<E>(self.at(offset));
|
|
other[offset] = unit;
|
|
if super::in_range16(unit, 0xD800, 0xE000) {
|
|
return Some((unit, offset));
|
|
}
|
|
offset += 1;
|
|
}
|
|
None
|
|
}
|
|
|
|
#[cfg(not(feature = "simd-accel"))]
|
|
#[inline(always)]
|
|
fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
|
|
assert!(self.len <= other.len());
|
|
for (i, target) in other.iter_mut().enumerate().take(self.len) {
|
|
let unit = swap_if_opposite_endian::<E>(self.at(i));
|
|
*target = unit;
|
|
if super::in_range16(unit, 0xD800, 0xE000) {
|
|
return Some((unit, i));
|
|
}
|
|
}
|
|
None
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn copy_unaligned_basic_latin_to_ascii_alu<E: Endian>(
|
|
src: UnalignedU16Slice,
|
|
dst: &mut [u8],
|
|
offset: usize,
|
|
) -> CopyAsciiResult<usize, (u16, usize)> {
|
|
let len = ::core::cmp::min(src.len(), dst.len());
|
|
let mut i = 0usize;
|
|
loop {
|
|
if i == len {
|
|
return CopyAsciiResult::Stop(i + offset);
|
|
}
|
|
let unit = swap_if_opposite_endian::<E>(src.at(i));
|
|
if unit > 0x7F {
|
|
return CopyAsciiResult::GoOn((unit, i + offset));
|
|
}
|
|
dst[i] = unit as u8;
|
|
i += 1;
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn swap_if_opposite_endian<E: Endian>(unit: u16) -> u16 {
|
|
if E::OPPOSITE_ENDIAN {
|
|
unit.swap_bytes()
|
|
} else {
|
|
unit
|
|
}
|
|
}
|
|
|
|
#[cfg(not(feature = "simd-accel"))]
|
|
#[inline(always)]
|
|
fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
|
|
src: UnalignedU16Slice,
|
|
dst: &mut [u8],
|
|
) -> CopyAsciiResult<usize, (u16, usize)> {
|
|
copy_unaligned_basic_latin_to_ascii_alu::<E>(src, dst, 0)
|
|
}
|
|
|
|
#[cfg(feature = "simd-accel")]
|
|
#[inline(always)]
|
|
fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
|
|
src: UnalignedU16Slice,
|
|
dst: &mut [u8],
|
|
) -> CopyAsciiResult<usize, (u16, usize)> {
|
|
let len = ::core::cmp::min(src.len(), dst.len());
|
|
let mut offset = 0;
|
|
if SIMD_STRIDE_SIZE <= len {
|
|
let len_minus_stride = len - SIMD_STRIDE_SIZE;
|
|
loop {
|
|
let mut first = src.simd_at(offset);
|
|
let mut second = src.simd_at(offset + (SIMD_STRIDE_SIZE / 2));
|
|
if E::OPPOSITE_ENDIAN {
|
|
first = simd_byte_swap(first);
|
|
second = simd_byte_swap(second);
|
|
}
|
|
if !simd_is_basic_latin(first | second) {
|
|
break;
|
|
}
|
|
let packed = simd_pack(first, second);
|
|
unsafe {
|
|
store16_unaligned(dst.as_mut_ptr().add(offset), packed);
|
|
}
|
|
offset += SIMD_STRIDE_SIZE;
|
|
if offset > len_minus_stride {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
copy_unaligned_basic_latin_to_ascii_alu::<E>(src.tail(offset), &mut dst[offset..], offset)
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn convert_unaligned_utf16_to_utf8<E: Endian>(
|
|
src: UnalignedU16Slice,
|
|
dst: &mut [u8],
|
|
) -> (usize, usize, bool) {
|
|
if dst.len() < 4 {
|
|
return (0, 0, false);
|
|
}
|
|
let mut src_pos = 0usize;
|
|
let mut dst_pos = 0usize;
|
|
let src_len = src.len();
|
|
let dst_len_minus_three = dst.len() - 3;
|
|
'outer: loop {
|
|
let mut non_ascii = match copy_unaligned_basic_latin_to_ascii::<E>(
|
|
src.tail(src_pos),
|
|
&mut dst[dst_pos..],
|
|
) {
|
|
CopyAsciiResult::GoOn((unit, read_written)) => {
|
|
src_pos += read_written;
|
|
dst_pos += read_written;
|
|
unit
|
|
}
|
|
CopyAsciiResult::Stop(read_written) => {
|
|
return (src_pos + read_written, dst_pos + read_written, false);
|
|
}
|
|
};
|
|
if dst_pos >= dst_len_minus_three {
|
|
break 'outer;
|
|
}
|
|
// We have enough destination space to commit to
|
|
// having read `non_ascii`.
|
|
src_pos += 1;
|
|
'inner: loop {
|
|
let non_ascii_minus_surrogate_start = non_ascii.wrapping_sub(0xD800);
|
|
if non_ascii_minus_surrogate_start > (0xDFFF - 0xD800) {
|
|
if non_ascii < 0x800 {
|
|
dst[dst_pos] = ((non_ascii >> 6) | 0xC0) as u8;
|
|
dst_pos += 1;
|
|
dst[dst_pos] = ((non_ascii & 0x3F) | 0x80) as u8;
|
|
dst_pos += 1;
|
|
} else {
|
|
dst[dst_pos] = ((non_ascii >> 12) | 0xE0) as u8;
|
|
dst_pos += 1;
|
|
dst[dst_pos] = (((non_ascii & 0xFC0) >> 6) | 0x80) as u8;
|
|
dst_pos += 1;
|
|
dst[dst_pos] = ((non_ascii & 0x3F) | 0x80) as u8;
|
|
dst_pos += 1;
|
|
}
|
|
} else if non_ascii_minus_surrogate_start <= (0xDBFF - 0xD800) {
|
|
// high surrogate
|
|
if src_pos < src_len {
|
|
let second = swap_if_opposite_endian::<E>(src.at(src_pos));
|
|
let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
|
|
if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
|
|
// The next code unit is a low surrogate. Advance position.
|
|
src_pos += 1;
|
|
let point = (u32::from(non_ascii) << 10) + u32::from(second)
|
|
- (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32);
|
|
|
|
dst[dst_pos] = ((point >> 18) | 0xF0u32) as u8;
|
|
dst_pos += 1;
|
|
dst[dst_pos] = (((point & 0x3F000u32) >> 12) | 0x80u32) as u8;
|
|
dst_pos += 1;
|
|
dst[dst_pos] = (((point & 0xFC0u32) >> 6) | 0x80u32) as u8;
|
|
dst_pos += 1;
|
|
dst[dst_pos] = ((point & 0x3Fu32) | 0x80u32) as u8;
|
|
dst_pos += 1;
|
|
} else {
|
|
// The next code unit is not a low surrogate. Don't advance
|
|
// position and treat the high surrogate as unpaired.
|
|
return (src_pos, dst_pos, true);
|
|
}
|
|
} else {
|
|
// Unpaired surrogate at the end of buffer
|
|
return (src_pos, dst_pos, true);
|
|
}
|
|
} else {
|
|
// Unpaired low surrogate
|
|
return (src_pos, dst_pos, true);
|
|
}
|
|
if dst_pos >= dst_len_minus_three || src_pos == src_len {
|
|
break 'outer;
|
|
}
|
|
let unit = swap_if_opposite_endian::<E>(src.at(src_pos));
|
|
src_pos += 1;
|
|
if unit > 0x7F {
|
|
non_ascii = unit;
|
|
continue 'inner;
|
|
}
|
|
dst[dst_pos] = unit as u8;
|
|
dst_pos += 1;
|
|
continue 'outer;
|
|
}
|
|
}
|
|
(src_pos, dst_pos, false)
|
|
}
|
|
|
|
// Byte source
|
|
|
|
pub struct ByteSource<'a> {
|
|
slice: &'a [u8],
|
|
pos: usize,
|
|
}
|
|
|
|
impl<'a> ByteSource<'a> {
|
|
#[inline(always)]
|
|
pub fn new(src: &[u8]) -> ByteSource {
|
|
ByteSource { slice: src, pos: 0 }
|
|
}
|
|
#[inline(always)]
|
|
pub fn check_available<'b>(&'b mut self) -> Space<ByteReadHandle<'b, 'a>> {
|
|
if self.pos < self.slice.len() {
|
|
Space::Available(ByteReadHandle::new(self))
|
|
} else {
|
|
Space::Full(self.consumed())
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
fn read(&mut self) -> u8 {
|
|
let ret = self.slice[self.pos];
|
|
self.pos += 1;
|
|
ret
|
|
}
|
|
#[inline(always)]
|
|
fn unread(&mut self) -> usize {
|
|
self.pos -= 1;
|
|
self.pos
|
|
}
|
|
#[inline(always)]
|
|
pub fn consumed(&self) -> usize {
|
|
self.pos
|
|
}
|
|
}
|
|
|
|
pub struct ByteReadHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
source: &'a mut ByteSource<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> ByteReadHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(src: &'a mut ByteSource<'b>) -> ByteReadHandle<'a, 'b> {
|
|
ByteReadHandle { source: src }
|
|
}
|
|
#[inline(always)]
|
|
pub fn read(self) -> (u8, ByteUnreadHandle<'a, 'b>) {
|
|
let byte = self.source.read();
|
|
let handle = ByteUnreadHandle::new(self.source);
|
|
(byte, handle)
|
|
}
|
|
#[inline(always)]
|
|
pub fn consumed(&self) -> usize {
|
|
self.source.consumed()
|
|
}
|
|
}
|
|
|
|
pub struct ByteUnreadHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
source: &'a mut ByteSource<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> ByteUnreadHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(src: &'a mut ByteSource<'b>) -> ByteUnreadHandle<'a, 'b> {
|
|
ByteUnreadHandle { source: src }
|
|
}
|
|
#[inline(always)]
|
|
pub fn unread(self) -> usize {
|
|
self.source.unread()
|
|
}
|
|
#[inline(always)]
|
|
pub fn consumed(&self) -> usize {
|
|
self.source.consumed()
|
|
}
|
|
#[inline(always)]
|
|
pub fn commit(self) -> &'a mut ByteSource<'b> {
|
|
self.source
|
|
}
|
|
}
|
|
|
|
// UTF-16 destination
|
|
|
|
pub struct Utf16BmpHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
dest: &'a mut Utf16Destination<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> Utf16BmpHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(dst: &'a mut Utf16Destination<'b>) -> Utf16BmpHandle<'a, 'b> {
|
|
Utf16BmpHandle { dest: dst }
|
|
}
|
|
#[inline(always)]
|
|
pub fn written(&self) -> usize {
|
|
self.dest.written()
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b> {
|
|
self.dest.write_ascii(ascii);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
|
|
self.dest.write_bmp(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
|
|
self.dest.write_bmp_excl_ascii(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_mid_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
|
|
self.dest.write_mid_bmp(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
|
|
self.dest.write_upper_bmp(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn commit(self) -> &'a mut Utf16Destination<'b> {
|
|
self.dest
|
|
}
|
|
}
|
|
|
|
pub struct Utf16AstralHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
dest: &'a mut Utf16Destination<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> Utf16AstralHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(dst: &'a mut Utf16Destination<'b>) -> Utf16AstralHandle<'a, 'b> {
|
|
Utf16AstralHandle { dest: dst }
|
|
}
|
|
#[inline(always)]
|
|
pub fn written(&self) -> usize {
|
|
self.dest.written()
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b> {
|
|
self.dest.write_ascii(ascii);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
|
|
self.dest.write_bmp(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
|
|
self.dest.write_bmp_excl_ascii(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
|
|
self.dest.write_upper_bmp(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_astral(self, astral: u32) -> &'a mut Utf16Destination<'b> {
|
|
self.dest.write_astral(astral);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf16Destination<'b> {
|
|
self.dest.write_surrogate_pair(high, low);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_big5_combination(
|
|
self,
|
|
combined: u16,
|
|
combining: u16,
|
|
) -> &'a mut Utf16Destination<'b> {
|
|
self.dest.write_big5_combination(combined, combining);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn commit(self) -> &'a mut Utf16Destination<'b> {
|
|
self.dest
|
|
}
|
|
}
|
|
|
|
pub struct Utf16Destination<'a> {
|
|
slice: &'a mut [u16],
|
|
pos: usize,
|
|
}
|
|
|
|
impl<'a> Utf16Destination<'a> {
|
|
#[inline(always)]
|
|
pub fn new(dst: &mut [u16]) -> Utf16Destination {
|
|
Utf16Destination { slice: dst, pos: 0 }
|
|
}
|
|
#[inline(always)]
|
|
pub fn check_space_bmp<'b>(&'b mut self) -> Space<Utf16BmpHandle<'b, 'a>> {
|
|
if self.pos < self.slice.len() {
|
|
Space::Available(Utf16BmpHandle::new(self))
|
|
} else {
|
|
Space::Full(self.written())
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
pub fn check_space_astral<'b>(&'b mut self) -> Space<Utf16AstralHandle<'b, 'a>> {
|
|
if self.pos + 1 < self.slice.len() {
|
|
Space::Available(Utf16AstralHandle::new(self))
|
|
} else {
|
|
Space::Full(self.written())
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
pub fn written(&self) -> usize {
|
|
self.pos
|
|
}
|
|
#[inline(always)]
|
|
fn write_code_unit(&mut self, u: u16) {
|
|
unsafe {
|
|
// OK, because we checked before handing out a handle.
|
|
*(self.slice.get_unchecked_mut(self.pos)) = u;
|
|
}
|
|
self.pos += 1;
|
|
}
|
|
#[inline(always)]
|
|
fn write_ascii(&mut self, ascii: u8) {
|
|
debug_assert!(ascii < 0x80);
|
|
self.write_code_unit(u16::from(ascii));
|
|
}
|
|
#[inline(always)]
|
|
fn write_bmp(&mut self, bmp: u16) {
|
|
self.write_code_unit(bmp);
|
|
}
|
|
#[inline(always)]
|
|
fn write_bmp_excl_ascii(&mut self, bmp: u16) {
|
|
debug_assert!(bmp >= 0x80);
|
|
self.write_code_unit(bmp);
|
|
}
|
|
#[inline(always)]
|
|
fn write_mid_bmp(&mut self, bmp: u16) {
|
|
debug_assert!(bmp >= 0x80); // XXX
|
|
self.write_code_unit(bmp);
|
|
}
|
|
#[inline(always)]
|
|
fn write_upper_bmp(&mut self, bmp: u16) {
|
|
debug_assert!(bmp >= 0x80);
|
|
self.write_code_unit(bmp);
|
|
}
|
|
#[inline(always)]
|
|
fn write_astral(&mut self, astral: u32) {
|
|
debug_assert!(astral > 0xFFFF);
|
|
debug_assert!(astral <= 0x10_FFFF);
|
|
self.write_code_unit((0xD7C0 + (astral >> 10)) as u16);
|
|
self.write_code_unit((0xDC00 + (astral & 0x3FF)) as u16);
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_surrogate_pair(&mut self, high: u16, low: u16) {
|
|
self.write_code_unit(high);
|
|
self.write_code_unit(low);
|
|
}
|
|
#[inline(always)]
|
|
fn write_big5_combination(&mut self, combined: u16, combining: u16) {
|
|
self.write_bmp_excl_ascii(combined);
|
|
self.write_bmp_excl_ascii(combining);
|
|
}
|
|
#[inline(always)]
|
|
pub fn copy_ascii_from_check_space_bmp<'b>(
|
|
&'b mut self,
|
|
source: &mut ByteSource,
|
|
) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16BmpHandle<'b, 'a>)> {
|
|
let non_ascii_ret = {
|
|
let src_remaining = &source.slice[source.pos..];
|
|
let dst_remaining = &mut self.slice[self.pos..];
|
|
let (pending, length) = if dst_remaining.len() < src_remaining.len() {
|
|
(DecoderResult::OutputFull, dst_remaining.len())
|
|
} else {
|
|
(DecoderResult::InputEmpty, src_remaining.len())
|
|
};
|
|
match unsafe {
|
|
ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
|
|
} {
|
|
None => {
|
|
source.pos += length;
|
|
self.pos += length;
|
|
return CopyAsciiResult::Stop((pending, source.pos, self.pos));
|
|
}
|
|
Some((non_ascii, consumed)) => {
|
|
source.pos += consumed;
|
|
self.pos += consumed;
|
|
source.pos += 1; // +1 for non_ascii
|
|
non_ascii
|
|
}
|
|
}
|
|
};
|
|
CopyAsciiResult::GoOn((non_ascii_ret, Utf16BmpHandle::new(self)))
|
|
}
|
|
#[inline(always)]
|
|
pub fn copy_ascii_from_check_space_astral<'b>(
|
|
&'b mut self,
|
|
source: &mut ByteSource,
|
|
) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16AstralHandle<'b, 'a>)> {
|
|
let non_ascii_ret = {
|
|
let dst_len = self.slice.len();
|
|
let src_remaining = &source.slice[source.pos..];
|
|
let dst_remaining = &mut self.slice[self.pos..];
|
|
let (pending, length) = if dst_remaining.len() < src_remaining.len() {
|
|
(DecoderResult::OutputFull, dst_remaining.len())
|
|
} else {
|
|
(DecoderResult::InputEmpty, src_remaining.len())
|
|
};
|
|
match unsafe {
|
|
ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
|
|
} {
|
|
None => {
|
|
source.pos += length;
|
|
self.pos += length;
|
|
return CopyAsciiResult::Stop((pending, source.pos, self.pos));
|
|
}
|
|
Some((non_ascii, consumed)) => {
|
|
source.pos += consumed;
|
|
self.pos += consumed;
|
|
if self.pos + 1 < dst_len {
|
|
source.pos += 1; // +1 for non_ascii
|
|
non_ascii
|
|
} else {
|
|
return CopyAsciiResult::Stop((
|
|
DecoderResult::OutputFull,
|
|
source.pos,
|
|
self.pos,
|
|
));
|
|
}
|
|
}
|
|
}
|
|
};
|
|
CopyAsciiResult::GoOn((non_ascii_ret, Utf16AstralHandle::new(self)))
|
|
}
|
|
#[inline(always)]
|
|
pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) {
|
|
let src_remaining = &source.slice[source.pos..];
|
|
let dst_remaining = &mut self.slice[self.pos..];
|
|
let (read, written) = convert_utf8_to_utf16_up_to_invalid(src_remaining, dst_remaining);
|
|
source.pos += read;
|
|
self.pos += written;
|
|
}
|
|
#[inline(always)]
|
|
pub fn copy_utf16_from<E: Endian>(
|
|
&mut self,
|
|
source: &mut ByteSource,
|
|
) -> Option<(usize, usize)> {
|
|
let src_remaining = &source.slice[source.pos..];
|
|
let dst_remaining = &mut self.slice[self.pos..];
|
|
|
|
let mut src_unaligned = unsafe {
|
|
UnalignedU16Slice::new(
|
|
src_remaining.as_ptr(),
|
|
::core::cmp::min(src_remaining.len() / 2, dst_remaining.len()),
|
|
)
|
|
};
|
|
if src_unaligned.len() == 0 {
|
|
return None;
|
|
}
|
|
let last_unit = swap_if_opposite_endian::<E>(src_unaligned.at(src_unaligned.len() - 1));
|
|
if super::in_range16(last_unit, 0xD800, 0xDC00) {
|
|
// Last code unit is a high surrogate. It might
|
|
// legitimately form a pair later, so let's not
|
|
// include it.
|
|
src_unaligned.trim_last();
|
|
}
|
|
let mut offset = 0usize;
|
|
loop {
|
|
if let Some((surrogate, bmp_len)) = {
|
|
let src_left = src_unaligned.tail(offset);
|
|
let dst_left = &mut dst_remaining[offset..src_unaligned.len()];
|
|
src_left.copy_bmp_to::<E>(dst_left)
|
|
} {
|
|
offset += bmp_len; // surrogate has not been consumed yet
|
|
let second_pos = offset + 1;
|
|
if surrogate > 0xDBFF || second_pos == src_unaligned.len() {
|
|
// Unpaired surrogate
|
|
source.pos += second_pos * 2;
|
|
self.pos += offset;
|
|
return Some((source.pos, self.pos));
|
|
}
|
|
let second = swap_if_opposite_endian::<E>(src_unaligned.at(second_pos));
|
|
if !super::in_range16(second, 0xDC00, 0xE000) {
|
|
// Unpaired surrogate
|
|
source.pos += second_pos * 2;
|
|
self.pos += offset;
|
|
return Some((source.pos, self.pos));
|
|
}
|
|
// `surrogate` was already speculatively written
|
|
dst_remaining[second_pos] = second;
|
|
offset += 2;
|
|
continue;
|
|
} else {
|
|
source.pos += src_unaligned.len() * 2;
|
|
self.pos += src_unaligned.len();
|
|
return None;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// UTF-8 destination
|
|
|
|
pub struct Utf8BmpHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
dest: &'a mut Utf8Destination<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> Utf8BmpHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(dst: &'a mut Utf8Destination<'b>) -> Utf8BmpHandle<'a, 'b> {
|
|
Utf8BmpHandle { dest: dst }
|
|
}
|
|
#[inline(always)]
|
|
pub fn written(&self) -> usize {
|
|
self.dest.written()
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b> {
|
|
self.dest.write_ascii(ascii);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
|
|
self.dest.write_bmp(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
|
|
self.dest.write_bmp_excl_ascii(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_mid_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
|
|
self.dest.write_mid_bmp(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
|
|
self.dest.write_upper_bmp(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn commit(self) -> &'a mut Utf8Destination<'b> {
|
|
self.dest
|
|
}
|
|
}
|
|
|
|
pub struct Utf8AstralHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
dest: &'a mut Utf8Destination<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> Utf8AstralHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(dst: &'a mut Utf8Destination<'b>) -> Utf8AstralHandle<'a, 'b> {
|
|
Utf8AstralHandle { dest: dst }
|
|
}
|
|
#[inline(always)]
|
|
pub fn written(&self) -> usize {
|
|
self.dest.written()
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b> {
|
|
self.dest.write_ascii(ascii);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
|
|
self.dest.write_bmp(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
|
|
self.dest.write_bmp_excl_ascii(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
|
|
self.dest.write_upper_bmp(bmp);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_astral(self, astral: u32) -> &'a mut Utf8Destination<'b> {
|
|
self.dest.write_astral(astral);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf8Destination<'b> {
|
|
self.dest.write_surrogate_pair(high, low);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_big5_combination(
|
|
self,
|
|
combined: u16,
|
|
combining: u16,
|
|
) -> &'a mut Utf8Destination<'b> {
|
|
self.dest.write_big5_combination(combined, combining);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn commit(self) -> &'a mut Utf8Destination<'b> {
|
|
self.dest
|
|
}
|
|
}
|
|
|
|
pub struct Utf8Destination<'a> {
|
|
slice: &'a mut [u8],
|
|
pos: usize,
|
|
}
|
|
|
|
impl<'a> Utf8Destination<'a> {
|
|
#[inline(always)]
|
|
pub fn new(dst: &mut [u8]) -> Utf8Destination {
|
|
Utf8Destination { slice: dst, pos: 0 }
|
|
}
|
|
#[inline(always)]
|
|
pub fn check_space_bmp<'b>(&'b mut self) -> Space<Utf8BmpHandle<'b, 'a>> {
|
|
if self.pos + 2 < self.slice.len() {
|
|
Space::Available(Utf8BmpHandle::new(self))
|
|
} else {
|
|
Space::Full(self.written())
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
pub fn check_space_astral<'b>(&'b mut self) -> Space<Utf8AstralHandle<'b, 'a>> {
|
|
if self.pos + 3 < self.slice.len() {
|
|
Space::Available(Utf8AstralHandle::new(self))
|
|
} else {
|
|
Space::Full(self.written())
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
pub fn written(&self) -> usize {
|
|
self.pos
|
|
}
|
|
#[inline(always)]
|
|
fn write_code_unit(&mut self, u: u8) {
|
|
unsafe {
|
|
// OK, because we checked before handing out a handle.
|
|
*(self.slice.get_unchecked_mut(self.pos)) = u;
|
|
}
|
|
self.pos += 1;
|
|
}
|
|
#[inline(always)]
|
|
fn write_ascii(&mut self, ascii: u8) {
|
|
debug_assert!(ascii < 0x80);
|
|
self.write_code_unit(ascii);
|
|
}
|
|
#[inline(always)]
|
|
fn write_bmp(&mut self, bmp: u16) {
|
|
if bmp < 0x80u16 {
|
|
self.write_ascii(bmp as u8);
|
|
} else if bmp < 0x800u16 {
|
|
self.write_mid_bmp(bmp);
|
|
} else {
|
|
self.write_upper_bmp(bmp);
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
fn write_mid_bmp(&mut self, mid_bmp: u16) {
|
|
debug_assert!(mid_bmp >= 0x80);
|
|
debug_assert!(mid_bmp < 0x800);
|
|
self.write_code_unit(((mid_bmp >> 6) | 0xC0) as u8);
|
|
self.write_code_unit(((mid_bmp & 0x3F) | 0x80) as u8);
|
|
}
|
|
#[inline(always)]
|
|
fn write_upper_bmp(&mut self, upper_bmp: u16) {
|
|
debug_assert!(upper_bmp >= 0x800);
|
|
self.write_code_unit(((upper_bmp >> 12) | 0xE0) as u8);
|
|
self.write_code_unit((((upper_bmp & 0xFC0) >> 6) | 0x80) as u8);
|
|
self.write_code_unit(((upper_bmp & 0x3F) | 0x80) as u8);
|
|
}
|
|
#[inline(always)]
|
|
fn write_bmp_excl_ascii(&mut self, bmp: u16) {
|
|
if bmp < 0x800u16 {
|
|
self.write_mid_bmp(bmp);
|
|
} else {
|
|
self.write_upper_bmp(bmp);
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
fn write_astral(&mut self, astral: u32) {
|
|
debug_assert!(astral > 0xFFFF);
|
|
debug_assert!(astral <= 0x10_FFFF);
|
|
self.write_code_unit(((astral >> 18) | 0xF0) as u8);
|
|
self.write_code_unit((((astral & 0x3F000) >> 12) | 0x80) as u8);
|
|
self.write_code_unit((((astral & 0xFC0) >> 6) | 0x80) as u8);
|
|
self.write_code_unit(((astral & 0x3F) | 0x80) as u8);
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_surrogate_pair(&mut self, high: u16, low: u16) {
|
|
self.write_astral(
|
|
(u32::from(high) << 10) + u32::from(low)
|
|
- (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
|
|
);
|
|
}
|
|
#[inline(always)]
|
|
fn write_big5_combination(&mut self, combined: u16, combining: u16) {
|
|
self.write_mid_bmp(combined);
|
|
self.write_mid_bmp(combining);
|
|
}
|
|
#[inline(always)]
|
|
pub fn copy_ascii_from_check_space_bmp<'b>(
|
|
&'b mut self,
|
|
source: &mut ByteSource,
|
|
) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8BmpHandle<'b, 'a>)> {
|
|
let non_ascii_ret = {
|
|
let dst_len = self.slice.len();
|
|
let src_remaining = &source.slice[source.pos..];
|
|
let dst_remaining = &mut self.slice[self.pos..];
|
|
let (pending, length) = if dst_remaining.len() < src_remaining.len() {
|
|
(DecoderResult::OutputFull, dst_remaining.len())
|
|
} else {
|
|
(DecoderResult::InputEmpty, src_remaining.len())
|
|
};
|
|
match unsafe {
|
|
ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
|
|
} {
|
|
None => {
|
|
source.pos += length;
|
|
self.pos += length;
|
|
return CopyAsciiResult::Stop((pending, source.pos, self.pos));
|
|
}
|
|
Some((non_ascii, consumed)) => {
|
|
source.pos += consumed;
|
|
self.pos += consumed;
|
|
if self.pos + 2 < dst_len {
|
|
source.pos += 1; // +1 for non_ascii
|
|
non_ascii
|
|
} else {
|
|
return CopyAsciiResult::Stop((
|
|
DecoderResult::OutputFull,
|
|
source.pos,
|
|
self.pos,
|
|
));
|
|
}
|
|
}
|
|
}
|
|
};
|
|
CopyAsciiResult::GoOn((non_ascii_ret, Utf8BmpHandle::new(self)))
|
|
}
|
|
#[inline(always)]
|
|
pub fn copy_ascii_from_check_space_astral<'b>(
|
|
&'b mut self,
|
|
source: &mut ByteSource,
|
|
) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8AstralHandle<'b, 'a>)> {
|
|
let non_ascii_ret = {
|
|
let dst_len = self.slice.len();
|
|
let src_remaining = &source.slice[source.pos..];
|
|
let dst_remaining = &mut self.slice[self.pos..];
|
|
let (pending, length) = if dst_remaining.len() < src_remaining.len() {
|
|
(DecoderResult::OutputFull, dst_remaining.len())
|
|
} else {
|
|
(DecoderResult::InputEmpty, src_remaining.len())
|
|
};
|
|
match unsafe {
|
|
ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
|
|
} {
|
|
None => {
|
|
source.pos += length;
|
|
self.pos += length;
|
|
return CopyAsciiResult::Stop((pending, source.pos, self.pos));
|
|
}
|
|
Some((non_ascii, consumed)) => {
|
|
source.pos += consumed;
|
|
self.pos += consumed;
|
|
if self.pos + 3 < dst_len {
|
|
source.pos += 1; // +1 for non_ascii
|
|
non_ascii
|
|
} else {
|
|
return CopyAsciiResult::Stop((
|
|
DecoderResult::OutputFull,
|
|
source.pos,
|
|
self.pos,
|
|
));
|
|
}
|
|
}
|
|
}
|
|
};
|
|
CopyAsciiResult::GoOn((non_ascii_ret, Utf8AstralHandle::new(self)))
|
|
}
|
|
#[inline(always)]
|
|
pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) {
|
|
let src_remaining = &source.slice[source.pos..];
|
|
let dst_remaining = &mut self.slice[self.pos..];
|
|
let min_len = ::core::cmp::min(src_remaining.len(), dst_remaining.len());
|
|
// Validate first, then memcpy to let memcpy do its thing even for
|
|
// non-ASCII. (And potentially do something better than SSE2 for ASCII.)
|
|
let valid_len = utf8_valid_up_to(&src_remaining[..min_len]);
|
|
(&mut dst_remaining[..valid_len]).copy_from_slice(&src_remaining[..valid_len]);
|
|
source.pos += valid_len;
|
|
self.pos += valid_len;
|
|
}
|
|
#[inline(always)]
|
|
pub fn copy_utf16_from<E: Endian>(
|
|
&mut self,
|
|
source: &mut ByteSource,
|
|
) -> Option<(usize, usize)> {
|
|
let src_remaining = &source.slice[source.pos..];
|
|
let dst_remaining = &mut self.slice[self.pos..];
|
|
|
|
let mut src_unaligned =
|
|
unsafe { UnalignedU16Slice::new(src_remaining.as_ptr(), src_remaining.len() / 2) };
|
|
if src_unaligned.len() == 0 {
|
|
return None;
|
|
}
|
|
let mut last_unit = src_unaligned.at(src_unaligned.len() - 1);
|
|
if E::OPPOSITE_ENDIAN {
|
|
last_unit = last_unit.swap_bytes();
|
|
}
|
|
if super::in_range16(last_unit, 0xD800, 0xDC00) {
|
|
// Last code unit is a high surrogate. It might
|
|
// legitimately form a pair later, so let's not
|
|
// include it.
|
|
src_unaligned.trim_last();
|
|
}
|
|
let (read, written, had_error) =
|
|
convert_unaligned_utf16_to_utf8::<E>(src_unaligned, dst_remaining);
|
|
source.pos += read * 2;
|
|
self.pos += written;
|
|
if had_error {
|
|
Some((source.pos, self.pos))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|
|
|
|
// UTF-16 source
|
|
|
|
pub struct Utf16Source<'a> {
|
|
slice: &'a [u16],
|
|
pos: usize,
|
|
old_pos: usize,
|
|
}
|
|
|
|
impl<'a> Utf16Source<'a> {
|
|
#[inline(always)]
|
|
pub fn new(src: &[u16]) -> Utf16Source {
|
|
Utf16Source {
|
|
slice: src,
|
|
pos: 0,
|
|
old_pos: 0,
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
pub fn check_available<'b>(&'b mut self) -> Space<Utf16ReadHandle<'b, 'a>> {
|
|
if self.pos < self.slice.len() {
|
|
Space::Available(Utf16ReadHandle::new(self))
|
|
} else {
|
|
Space::Full(self.consumed())
|
|
}
|
|
}
|
|
#[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
|
|
#[inline(always)]
|
|
fn read(&mut self) -> char {
|
|
self.old_pos = self.pos;
|
|
let unit = self.slice[self.pos];
|
|
self.pos += 1;
|
|
let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
|
|
if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
|
|
return unsafe { ::core::char::from_u32_unchecked(u32::from(unit)) };
|
|
}
|
|
if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
|
|
// high surrogate
|
|
if self.pos < self.slice.len() {
|
|
let second = self.slice[self.pos];
|
|
let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
|
|
if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
|
|
// The next code unit is a low surrogate. Advance position.
|
|
self.pos += 1;
|
|
return unsafe {
|
|
::core::char::from_u32_unchecked(
|
|
(u32::from(unit) << 10) + u32::from(second)
|
|
- (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
|
|
)
|
|
};
|
|
}
|
|
// The next code unit is not a low surrogate. Don't advance
|
|
// position and treat the high surrogate as unpaired.
|
|
// fall through
|
|
}
|
|
// Unpaired surrogate at the end of buffer, fall through
|
|
}
|
|
// Unpaired low surrogate
|
|
'\u{FFFD}'
|
|
}
|
|
#[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
|
|
#[inline(always)]
|
|
fn read_enum(&mut self) -> Unicode {
|
|
self.old_pos = self.pos;
|
|
let unit = self.slice[self.pos];
|
|
self.pos += 1;
|
|
if unit < 0x80 {
|
|
return Unicode::Ascii(unit as u8);
|
|
}
|
|
let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
|
|
if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
|
|
return Unicode::NonAscii(NonAscii::BmpExclAscii(unit));
|
|
}
|
|
if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
|
|
// high surrogate
|
|
if self.pos < self.slice.len() {
|
|
let second = self.slice[self.pos];
|
|
let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
|
|
if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
|
|
// The next code unit is a low surrogate. Advance position.
|
|
self.pos += 1;
|
|
return Unicode::NonAscii(NonAscii::Astral(unsafe {
|
|
::core::char::from_u32_unchecked(
|
|
(u32::from(unit) << 10) + u32::from(second)
|
|
- (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
|
|
)
|
|
}));
|
|
}
|
|
// The next code unit is not a low surrogate. Don't advance
|
|
// position and treat the high surrogate as unpaired.
|
|
// fall through
|
|
}
|
|
// Unpaired surrogate at the end of buffer, fall through
|
|
}
|
|
// Unpaired low surrogate
|
|
Unicode::NonAscii(NonAscii::BmpExclAscii(0xFFFDu16))
|
|
}
|
|
#[inline(always)]
|
|
fn unread(&mut self) -> usize {
|
|
self.pos = self.old_pos;
|
|
self.pos
|
|
}
|
|
#[inline(always)]
|
|
pub fn consumed(&self) -> usize {
|
|
self.pos
|
|
}
|
|
#[inline(always)]
|
|
pub fn copy_ascii_to_check_space_two<'b>(
|
|
&mut self,
|
|
dest: &'b mut ByteDestination<'a>,
|
|
) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)> {
|
|
let non_ascii_ret = {
|
|
let dst_len = dest.slice.len();
|
|
let src_remaining = &self.slice[self.pos..];
|
|
let dst_remaining = &mut dest.slice[dest.pos..];
|
|
let (pending, length) = if dst_remaining.len() < src_remaining.len() {
|
|
(EncoderResult::OutputFull, dst_remaining.len())
|
|
} else {
|
|
(EncoderResult::InputEmpty, src_remaining.len())
|
|
};
|
|
match unsafe {
|
|
basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
|
|
} {
|
|
None => {
|
|
self.pos += length;
|
|
dest.pos += length;
|
|
return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
|
|
}
|
|
Some((non_ascii, consumed)) => {
|
|
self.pos += consumed;
|
|
dest.pos += consumed;
|
|
if dest.pos + 1 < dst_len {
|
|
self.pos += 1; // commit to reading `non_ascii`
|
|
let unit = non_ascii;
|
|
let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
|
|
if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
|
|
NonAscii::BmpExclAscii(unit)
|
|
} else if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
|
|
// high surrogate
|
|
if self.pos < self.slice.len() {
|
|
let second = self.slice[self.pos];
|
|
let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
|
|
if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
|
|
// The next code unit is a low surrogate. Advance position.
|
|
self.pos += 1;
|
|
NonAscii::Astral(unsafe {
|
|
::core::char::from_u32_unchecked(
|
|
(u32::from(unit) << 10) + u32::from(second)
|
|
- (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
|
|
)
|
|
})
|
|
} else {
|
|
// The next code unit is not a low surrogate. Don't advance
|
|
// position and treat the high surrogate as unpaired.
|
|
NonAscii::BmpExclAscii(0xFFFDu16)
|
|
}
|
|
} else {
|
|
// Unpaired surrogate at the end of the buffer.
|
|
NonAscii::BmpExclAscii(0xFFFDu16)
|
|
}
|
|
} else {
|
|
// Unpaired low surrogate
|
|
NonAscii::BmpExclAscii(0xFFFDu16)
|
|
}
|
|
} else {
|
|
return CopyAsciiResult::Stop((
|
|
EncoderResult::OutputFull,
|
|
self.pos,
|
|
dest.pos,
|
|
));
|
|
}
|
|
}
|
|
}
|
|
};
|
|
CopyAsciiResult::GoOn((non_ascii_ret, ByteTwoHandle::new(dest)))
|
|
}
|
|
#[inline(always)]
|
|
pub fn copy_ascii_to_check_space_four<'b>(
|
|
&mut self,
|
|
dest: &'b mut ByteDestination<'a>,
|
|
) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)> {
|
|
let non_ascii_ret = {
|
|
let dst_len = dest.slice.len();
|
|
let src_remaining = &self.slice[self.pos..];
|
|
let dst_remaining = &mut dest.slice[dest.pos..];
|
|
let (pending, length) = if dst_remaining.len() < src_remaining.len() {
|
|
(EncoderResult::OutputFull, dst_remaining.len())
|
|
} else {
|
|
(EncoderResult::InputEmpty, src_remaining.len())
|
|
};
|
|
match unsafe {
|
|
basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
|
|
} {
|
|
None => {
|
|
self.pos += length;
|
|
dest.pos += length;
|
|
return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
|
|
}
|
|
Some((non_ascii, consumed)) => {
|
|
self.pos += consumed;
|
|
dest.pos += consumed;
|
|
if dest.pos + 3 < dst_len {
|
|
self.pos += 1; // commit to reading `non_ascii`
|
|
let unit = non_ascii;
|
|
let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
|
|
if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
|
|
NonAscii::BmpExclAscii(unit)
|
|
} else if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
|
|
// high surrogate
|
|
if self.pos == self.slice.len() {
|
|
// Unpaired surrogate at the end of the buffer.
|
|
NonAscii::BmpExclAscii(0xFFFDu16)
|
|
} else {
|
|
let second = self.slice[self.pos];
|
|
let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
|
|
if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
|
|
// The next code unit is a low surrogate. Advance position.
|
|
self.pos += 1;
|
|
NonAscii::Astral(unsafe {
|
|
::core::char::from_u32_unchecked(
|
|
(u32::from(unit) << 10) + u32::from(second)
|
|
- (((0xD800u32 << 10) - 0x1_0000u32) + 0xDC00u32),
|
|
)
|
|
})
|
|
} else {
|
|
// The next code unit is not a low surrogate. Don't advance
|
|
// position and treat the high surrogate as unpaired.
|
|
NonAscii::BmpExclAscii(0xFFFDu16)
|
|
}
|
|
}
|
|
} else {
|
|
// Unpaired low surrogate
|
|
NonAscii::BmpExclAscii(0xFFFDu16)
|
|
}
|
|
} else {
|
|
return CopyAsciiResult::Stop((
|
|
EncoderResult::OutputFull,
|
|
self.pos,
|
|
dest.pos,
|
|
));
|
|
}
|
|
}
|
|
}
|
|
};
|
|
CopyAsciiResult::GoOn((non_ascii_ret, ByteFourHandle::new(dest)))
|
|
}
|
|
}
|
|
|
|
pub struct Utf16ReadHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
source: &'a mut Utf16Source<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> Utf16ReadHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(src: &'a mut Utf16Source<'b>) -> Utf16ReadHandle<'a, 'b> {
|
|
Utf16ReadHandle { source: src }
|
|
}
|
|
#[inline(always)]
|
|
pub fn read(self) -> (char, Utf16UnreadHandle<'a, 'b>) {
|
|
let character = self.source.read();
|
|
let handle = Utf16UnreadHandle::new(self.source);
|
|
(character, handle)
|
|
}
|
|
#[inline(always)]
|
|
pub fn read_enum(self) -> (Unicode, Utf16UnreadHandle<'a, 'b>) {
|
|
let character = self.source.read_enum();
|
|
let handle = Utf16UnreadHandle::new(self.source);
|
|
(character, handle)
|
|
}
|
|
#[inline(always)]
|
|
pub fn consumed(&self) -> usize {
|
|
self.source.consumed()
|
|
}
|
|
}
|
|
|
|
pub struct Utf16UnreadHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
source: &'a mut Utf16Source<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> Utf16UnreadHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(src: &'a mut Utf16Source<'b>) -> Utf16UnreadHandle<'a, 'b> {
|
|
Utf16UnreadHandle { source: src }
|
|
}
|
|
#[inline(always)]
|
|
pub fn unread(self) -> usize {
|
|
self.source.unread()
|
|
}
|
|
#[inline(always)]
|
|
pub fn consumed(&self) -> usize {
|
|
self.source.consumed()
|
|
}
|
|
#[inline(always)]
|
|
pub fn commit(self) -> &'a mut Utf16Source<'b> {
|
|
self.source
|
|
}
|
|
}
|
|
|
|
// UTF-8 source
|
|
|
|
pub struct Utf8Source<'a> {
|
|
slice: &'a [u8],
|
|
pos: usize,
|
|
old_pos: usize,
|
|
}
|
|
|
|
impl<'a> Utf8Source<'a> {
|
|
#[inline(always)]
|
|
pub fn new(src: &str) -> Utf8Source {
|
|
Utf8Source {
|
|
slice: src.as_bytes(),
|
|
pos: 0,
|
|
old_pos: 0,
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
pub fn check_available<'b>(&'b mut self) -> Space<Utf8ReadHandle<'b, 'a>> {
|
|
if self.pos < self.slice.len() {
|
|
Space::Available(Utf8ReadHandle::new(self))
|
|
} else {
|
|
Space::Full(self.consumed())
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
fn read(&mut self) -> char {
|
|
self.old_pos = self.pos;
|
|
let unit = self.slice[self.pos];
|
|
if unit < 0x80 {
|
|
self.pos += 1;
|
|
return char::from(unit);
|
|
}
|
|
if unit < 0xE0 {
|
|
let point =
|
|
((u32::from(unit) & 0x1F) << 6) | (u32::from(self.slice[self.pos + 1]) & 0x3F);
|
|
self.pos += 2;
|
|
return unsafe { ::core::char::from_u32_unchecked(point) };
|
|
}
|
|
if unit < 0xF0 {
|
|
let point = ((u32::from(unit) & 0xF) << 12)
|
|
| ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 6)
|
|
| (u32::from(self.slice[self.pos + 2]) & 0x3F);
|
|
self.pos += 3;
|
|
return unsafe { ::core::char::from_u32_unchecked(point) };
|
|
}
|
|
let point = ((u32::from(unit) & 0x7) << 18)
|
|
| ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
|
|
| ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
|
|
| (u32::from(self.slice[self.pos + 3]) & 0x3F);
|
|
self.pos += 4;
|
|
unsafe { ::core::char::from_u32_unchecked(point) }
|
|
}
|
|
#[inline(always)]
|
|
fn read_enum(&mut self) -> Unicode {
|
|
self.old_pos = self.pos;
|
|
let unit = self.slice[self.pos];
|
|
if unit < 0x80 {
|
|
self.pos += 1;
|
|
return Unicode::Ascii(unit);
|
|
}
|
|
if unit < 0xE0 {
|
|
let point =
|
|
((u16::from(unit) & 0x1F) << 6) | (u16::from(self.slice[self.pos + 1]) & 0x3F);
|
|
self.pos += 2;
|
|
return Unicode::NonAscii(NonAscii::BmpExclAscii(point));
|
|
}
|
|
if unit < 0xF0 {
|
|
let point = ((u16::from(unit) & 0xF) << 12)
|
|
| ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
|
|
| (u16::from(self.slice[self.pos + 2]) & 0x3F);
|
|
self.pos += 3;
|
|
return Unicode::NonAscii(NonAscii::BmpExclAscii(point));
|
|
}
|
|
let point = ((u32::from(unit) & 0x7) << 18)
|
|
| ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
|
|
| ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
|
|
| (u32::from(self.slice[self.pos + 3]) & 0x3F);
|
|
self.pos += 4;
|
|
Unicode::NonAscii(NonAscii::Astral(unsafe {
|
|
::core::char::from_u32_unchecked(point)
|
|
}))
|
|
}
|
|
#[inline(always)]
|
|
fn unread(&mut self) -> usize {
|
|
self.pos = self.old_pos;
|
|
self.pos
|
|
}
|
|
#[inline(always)]
|
|
pub fn consumed(&self) -> usize {
|
|
self.pos
|
|
}
|
|
#[inline(always)]
|
|
pub fn copy_ascii_to_check_space_one<'b>(
|
|
&mut self,
|
|
dest: &'b mut ByteDestination<'a>,
|
|
) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteOneHandle<'b, 'a>)> {
|
|
let non_ascii_ret = {
|
|
let src_remaining = &self.slice[self.pos..];
|
|
let dst_remaining = &mut dest.slice[dest.pos..];
|
|
let (pending, length) = if dst_remaining.len() < src_remaining.len() {
|
|
(EncoderResult::OutputFull, dst_remaining.len())
|
|
} else {
|
|
(EncoderResult::InputEmpty, src_remaining.len())
|
|
};
|
|
match unsafe {
|
|
ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
|
|
} {
|
|
None => {
|
|
self.pos += length;
|
|
dest.pos += length;
|
|
return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
|
|
}
|
|
Some((non_ascii, consumed)) => {
|
|
self.pos += consumed;
|
|
dest.pos += consumed;
|
|
// We don't need to check space in destination, because
|
|
// `ascii_to_ascii()` already did.
|
|
if non_ascii < 0xE0 {
|
|
let point = ((u16::from(non_ascii) & 0x1F) << 6)
|
|
| (u16::from(self.slice[self.pos + 1]) & 0x3F);
|
|
self.pos += 2;
|
|
NonAscii::BmpExclAscii(point)
|
|
} else if non_ascii < 0xF0 {
|
|
let point = ((u16::from(non_ascii) & 0xF) << 12)
|
|
| ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
|
|
| (u16::from(self.slice[self.pos + 2]) & 0x3F);
|
|
self.pos += 3;
|
|
NonAscii::BmpExclAscii(point)
|
|
} else {
|
|
let point = ((u32::from(non_ascii) & 0x7) << 18)
|
|
| ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
|
|
| ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
|
|
| (u32::from(self.slice[self.pos + 3]) & 0x3F);
|
|
self.pos += 4;
|
|
NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) })
|
|
}
|
|
}
|
|
}
|
|
};
|
|
CopyAsciiResult::GoOn((non_ascii_ret, ByteOneHandle::new(dest)))
|
|
}
|
|
#[inline(always)]
|
|
pub fn copy_ascii_to_check_space_two<'b>(
|
|
&mut self,
|
|
dest: &'b mut ByteDestination<'a>,
|
|
) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)> {
|
|
let non_ascii_ret = {
|
|
let dst_len = dest.slice.len();
|
|
let src_remaining = &self.slice[self.pos..];
|
|
let dst_remaining = &mut dest.slice[dest.pos..];
|
|
let (pending, length) = if dst_remaining.len() < src_remaining.len() {
|
|
(EncoderResult::OutputFull, dst_remaining.len())
|
|
} else {
|
|
(EncoderResult::InputEmpty, src_remaining.len())
|
|
};
|
|
match unsafe {
|
|
ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
|
|
} {
|
|
None => {
|
|
self.pos += length;
|
|
dest.pos += length;
|
|
return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
|
|
}
|
|
Some((non_ascii, consumed)) => {
|
|
self.pos += consumed;
|
|
dest.pos += consumed;
|
|
if dest.pos + 1 < dst_len {
|
|
if non_ascii < 0xE0 {
|
|
let point = ((u16::from(non_ascii) & 0x1F) << 6)
|
|
| (u16::from(self.slice[self.pos + 1]) & 0x3F);
|
|
self.pos += 2;
|
|
NonAscii::BmpExclAscii(point)
|
|
} else if non_ascii < 0xF0 {
|
|
let point = ((u16::from(non_ascii) & 0xF) << 12)
|
|
| ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
|
|
| (u16::from(self.slice[self.pos + 2]) & 0x3F);
|
|
self.pos += 3;
|
|
NonAscii::BmpExclAscii(point)
|
|
} else {
|
|
let point = ((u32::from(non_ascii) & 0x7) << 18)
|
|
| ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
|
|
| ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
|
|
| (u32::from(self.slice[self.pos + 3]) & 0x3F);
|
|
self.pos += 4;
|
|
NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) })
|
|
}
|
|
} else {
|
|
return CopyAsciiResult::Stop((
|
|
EncoderResult::OutputFull,
|
|
self.pos,
|
|
dest.pos,
|
|
));
|
|
}
|
|
}
|
|
}
|
|
};
|
|
CopyAsciiResult::GoOn((non_ascii_ret, ByteTwoHandle::new(dest)))
|
|
}
|
|
#[inline(always)]
|
|
pub fn copy_ascii_to_check_space_four<'b>(
|
|
&mut self,
|
|
dest: &'b mut ByteDestination<'a>,
|
|
) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)> {
|
|
let non_ascii_ret = {
|
|
let dst_len = dest.slice.len();
|
|
let src_remaining = &self.slice[self.pos..];
|
|
let dst_remaining = &mut dest.slice[dest.pos..];
|
|
let (pending, length) = if dst_remaining.len() < src_remaining.len() {
|
|
(EncoderResult::OutputFull, dst_remaining.len())
|
|
} else {
|
|
(EncoderResult::InputEmpty, src_remaining.len())
|
|
};
|
|
match unsafe {
|
|
ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
|
|
} {
|
|
None => {
|
|
self.pos += length;
|
|
dest.pos += length;
|
|
return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
|
|
}
|
|
Some((non_ascii, consumed)) => {
|
|
self.pos += consumed;
|
|
dest.pos += consumed;
|
|
if dest.pos + 3 < dst_len {
|
|
if non_ascii < 0xE0 {
|
|
let point = ((u16::from(non_ascii) & 0x1F) << 6)
|
|
| (u16::from(self.slice[self.pos + 1]) & 0x3F);
|
|
self.pos += 2;
|
|
NonAscii::BmpExclAscii(point)
|
|
} else if non_ascii < 0xF0 {
|
|
let point = ((u16::from(non_ascii) & 0xF) << 12)
|
|
| ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
|
|
| (u16::from(self.slice[self.pos + 2]) & 0x3F);
|
|
self.pos += 3;
|
|
NonAscii::BmpExclAscii(point)
|
|
} else {
|
|
let point = ((u32::from(non_ascii) & 0x7) << 18)
|
|
| ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
|
|
| ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
|
|
| (u32::from(self.slice[self.pos + 3]) & 0x3F);
|
|
self.pos += 4;
|
|
NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) })
|
|
}
|
|
} else {
|
|
return CopyAsciiResult::Stop((
|
|
EncoderResult::OutputFull,
|
|
self.pos,
|
|
dest.pos,
|
|
));
|
|
}
|
|
}
|
|
}
|
|
};
|
|
CopyAsciiResult::GoOn((non_ascii_ret, ByteFourHandle::new(dest)))
|
|
}
|
|
}
|
|
|
|
pub struct Utf8ReadHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
source: &'a mut Utf8Source<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> Utf8ReadHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(src: &'a mut Utf8Source<'b>) -> Utf8ReadHandle<'a, 'b> {
|
|
Utf8ReadHandle { source: src }
|
|
}
|
|
#[inline(always)]
|
|
pub fn read(self) -> (char, Utf8UnreadHandle<'a, 'b>) {
|
|
let character = self.source.read();
|
|
let handle = Utf8UnreadHandle::new(self.source);
|
|
(character, handle)
|
|
}
|
|
#[inline(always)]
|
|
pub fn read_enum(self) -> (Unicode, Utf8UnreadHandle<'a, 'b>) {
|
|
let character = self.source.read_enum();
|
|
let handle = Utf8UnreadHandle::new(self.source);
|
|
(character, handle)
|
|
}
|
|
#[inline(always)]
|
|
pub fn consumed(&self) -> usize {
|
|
self.source.consumed()
|
|
}
|
|
}
|
|
|
|
pub struct Utf8UnreadHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
source: &'a mut Utf8Source<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> Utf8UnreadHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(src: &'a mut Utf8Source<'b>) -> Utf8UnreadHandle<'a, 'b> {
|
|
Utf8UnreadHandle { source: src }
|
|
}
|
|
#[inline(always)]
|
|
pub fn unread(self) -> usize {
|
|
self.source.unread()
|
|
}
|
|
#[inline(always)]
|
|
pub fn consumed(&self) -> usize {
|
|
self.source.consumed()
|
|
}
|
|
#[inline(always)]
|
|
pub fn commit(self) -> &'a mut Utf8Source<'b> {
|
|
self.source
|
|
}
|
|
}
|
|
|
|
// Byte destination
|
|
|
|
pub struct ByteOneHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
dest: &'a mut ByteDestination<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> ByteOneHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(dst: &'a mut ByteDestination<'b>) -> ByteOneHandle<'a, 'b> {
|
|
ByteOneHandle { dest: dst }
|
|
}
|
|
#[inline(always)]
|
|
pub fn written(&self) -> usize {
|
|
self.dest.written()
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
|
|
self.dest.write_one(first);
|
|
self.dest
|
|
}
|
|
}
|
|
|
|
pub struct ByteTwoHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
dest: &'a mut ByteDestination<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> ByteTwoHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(dst: &'a mut ByteDestination<'b>) -> ByteTwoHandle<'a, 'b> {
|
|
ByteTwoHandle { dest: dst }
|
|
}
|
|
#[inline(always)]
|
|
pub fn written(&self) -> usize {
|
|
self.dest.written()
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
|
|
self.dest.write_one(first);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
|
|
self.dest.write_two(first, second);
|
|
self.dest
|
|
}
|
|
}
|
|
|
|
pub struct ByteThreeHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
dest: &'a mut ByteDestination<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> ByteThreeHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(dst: &'a mut ByteDestination<'b>) -> ByteThreeHandle<'a, 'b> {
|
|
ByteThreeHandle { dest: dst }
|
|
}
|
|
#[inline(always)]
|
|
pub fn written(&self) -> usize {
|
|
self.dest.written()
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
|
|
self.dest.write_one(first);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
|
|
self.dest.write_two(first, second);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_three(self, first: u8, second: u8, third: u8) -> &'a mut ByteDestination<'b> {
|
|
self.dest.write_three(first, second, third);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_three_return_written(self, first: u8, second: u8, third: u8) -> usize {
|
|
self.dest.write_three(first, second, third);
|
|
self.dest.written()
|
|
}
|
|
}
|
|
|
|
pub struct ByteFourHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
dest: &'a mut ByteDestination<'b>,
|
|
}
|
|
|
|
impl<'a, 'b> ByteFourHandle<'a, 'b>
|
|
where
|
|
'b: 'a,
|
|
{
|
|
#[inline(always)]
|
|
fn new(dst: &'a mut ByteDestination<'b>) -> ByteFourHandle<'a, 'b> {
|
|
ByteFourHandle { dest: dst }
|
|
}
|
|
#[inline(always)]
|
|
pub fn written(&self) -> usize {
|
|
self.dest.written()
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
|
|
self.dest.write_one(first);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
|
|
self.dest.write_two(first, second);
|
|
self.dest
|
|
}
|
|
#[inline(always)]
|
|
pub fn write_four(
|
|
self,
|
|
first: u8,
|
|
second: u8,
|
|
third: u8,
|
|
fourth: u8,
|
|
) -> &'a mut ByteDestination<'b> {
|
|
self.dest.write_four(first, second, third, fourth);
|
|
self.dest
|
|
}
|
|
}
|
|
|
|
pub struct ByteDestination<'a> {
|
|
slice: &'a mut [u8],
|
|
pos: usize,
|
|
}
|
|
|
|
impl<'a> ByteDestination<'a> {
|
|
#[inline(always)]
|
|
pub fn new(dst: &mut [u8]) -> ByteDestination {
|
|
ByteDestination { slice: dst, pos: 0 }
|
|
}
|
|
#[inline(always)]
|
|
pub fn check_space_one<'b>(&'b mut self) -> Space<ByteOneHandle<'b, 'a>> {
|
|
if self.pos < self.slice.len() {
|
|
Space::Available(ByteOneHandle::new(self))
|
|
} else {
|
|
Space::Full(self.written())
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
pub fn check_space_two<'b>(&'b mut self) -> Space<ByteTwoHandle<'b, 'a>> {
|
|
if self.pos + 1 < self.slice.len() {
|
|
Space::Available(ByteTwoHandle::new(self))
|
|
} else {
|
|
Space::Full(self.written())
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
pub fn check_space_three<'b>(&'b mut self) -> Space<ByteThreeHandle<'b, 'a>> {
|
|
if self.pos + 2 < self.slice.len() {
|
|
Space::Available(ByteThreeHandle::new(self))
|
|
} else {
|
|
Space::Full(self.written())
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
pub fn check_space_four<'b>(&'b mut self) -> Space<ByteFourHandle<'b, 'a>> {
|
|
if self.pos + 3 < self.slice.len() {
|
|
Space::Available(ByteFourHandle::new(self))
|
|
} else {
|
|
Space::Full(self.written())
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
pub fn written(&self) -> usize {
|
|
self.pos
|
|
}
|
|
#[inline(always)]
|
|
fn write_one(&mut self, first: u8) {
|
|
self.slice[self.pos] = first;
|
|
self.pos += 1;
|
|
}
|
|
#[inline(always)]
|
|
fn write_two(&mut self, first: u8, second: u8) {
|
|
self.slice[self.pos] = first;
|
|
self.slice[self.pos + 1] = second;
|
|
self.pos += 2;
|
|
}
|
|
#[inline(always)]
|
|
fn write_three(&mut self, first: u8, second: u8, third: u8) {
|
|
self.slice[self.pos] = first;
|
|
self.slice[self.pos + 1] = second;
|
|
self.slice[self.pos + 2] = third;
|
|
self.pos += 3;
|
|
}
|
|
#[inline(always)]
|
|
fn write_four(&mut self, first: u8, second: u8, third: u8, fourth: u8) {
|
|
self.slice[self.pos] = first;
|
|
self.slice[self.pos + 1] = second;
|
|
self.slice[self.pos + 2] = third;
|
|
self.slice[self.pos + 3] = fourth;
|
|
self.pos += 4;
|
|
}
|
|
}
|