RPM build fix (reverted CI changes which will need to be un-reverted or made conditional) and vendor Rust dependencies to make builds much faster in any CI system.

This commit is contained in:
Adam Ierymenko
2022-06-08 07:32:16 -04:00
parent 373ca30269
commit d5ca4e5f52
12611 changed files with 2898014 additions and 284 deletions

View File

@@ -0,0 +1,247 @@
use crate::{
encode::{add_padding, encode_to_slice},
Config,
};
#[cfg(any(feature = "alloc", feature = "std", test))]
use alloc::string::String;
use core::cmp;
#[cfg(any(feature = "alloc", feature = "std", test))]
use core::str;
/// The output mechanism for ChunkedEncoder's encoded bytes.
pub trait Sink {
type Error;
/// Handle a chunk of encoded base64 data (as UTF-8 bytes)
fn write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error>;
}
const BUF_SIZE: usize = 1024;
/// A base64 encoder that emits encoded bytes in chunks without heap allocation.
pub struct ChunkedEncoder {
config: Config,
max_input_chunk_len: usize,
}
impl ChunkedEncoder {
pub fn new(config: Config) -> ChunkedEncoder {
ChunkedEncoder {
config,
max_input_chunk_len: max_input_length(BUF_SIZE, config),
}
}
pub fn encode<S: Sink>(&self, bytes: &[u8], sink: &mut S) -> Result<(), S::Error> {
let mut encode_buf: [u8; BUF_SIZE] = [0; BUF_SIZE];
let encode_table = self.config.char_set.encode_table();
let mut input_index = 0;
while input_index < bytes.len() {
// either the full input chunk size, or it's the last iteration
let input_chunk_len = cmp::min(self.max_input_chunk_len, bytes.len() - input_index);
let chunk = &bytes[input_index..(input_index + input_chunk_len)];
let mut b64_bytes_written = encode_to_slice(chunk, &mut encode_buf, encode_table);
input_index += input_chunk_len;
let more_input_left = input_index < bytes.len();
if self.config.pad && !more_input_left {
// no more input, add padding if needed. Buffer will have room because
// max_input_length leaves room for it.
b64_bytes_written += add_padding(bytes.len(), &mut encode_buf[b64_bytes_written..]);
}
sink.write_encoded_bytes(&encode_buf[0..b64_bytes_written])?;
}
Ok(())
}
}
/// Calculate the longest input that can be encoded for the given output buffer size.
///
/// If the config requires padding, two bytes of buffer space will be set aside so that the last
/// chunk of input can be encoded safely.
///
/// The input length will always be a multiple of 3 so that no encoding state has to be carried over
/// between chunks.
fn max_input_length(encoded_buf_len: usize, config: Config) -> usize {
let effective_buf_len = if config.pad {
// make room for padding
encoded_buf_len
.checked_sub(2)
.expect("Don't use a tiny buffer")
} else {
encoded_buf_len
};
// No padding, so just normal base64 expansion.
(effective_buf_len / 4) * 3
}
// A really simple sink that just appends to a string
#[cfg(any(feature = "alloc", feature = "std", test))]
pub(crate) struct StringSink<'a> {
string: &'a mut String,
}
#[cfg(any(feature = "alloc", feature = "std", test))]
impl<'a> StringSink<'a> {
pub(crate) fn new(s: &mut String) -> StringSink {
StringSink { string: s }
}
}
#[cfg(any(feature = "alloc", feature = "std", test))]
impl<'a> Sink for StringSink<'a> {
type Error = ();
fn write_encoded_bytes(&mut self, s: &[u8]) -> Result<(), Self::Error> {
self.string.push_str(str::from_utf8(s).unwrap());
Ok(())
}
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::{encode_config_buf, tests::random_config, CharacterSet, STANDARD};
use rand::{
distributions::{Distribution, Uniform},
FromEntropy, Rng,
};
#[test]
fn chunked_encode_empty() {
assert_eq!("", chunked_encode_str(&[], STANDARD));
}
#[test]
fn chunked_encode_intermediate_fast_loop() {
// > 8 bytes input, will enter the pretty fast loop
assert_eq!(
"Zm9vYmFyYmF6cXV4",
chunked_encode_str(b"foobarbazqux", STANDARD)
);
}
#[test]
fn chunked_encode_fast_loop() {
// > 32 bytes input, will enter the uber fast loop
assert_eq!(
"Zm9vYmFyYmF6cXV4cXV1eGNvcmdlZ3JhdWx0Z2FycGx5eg==",
chunked_encode_str(b"foobarbazquxquuxcorgegraultgarplyz", STANDARD)
);
}
#[test]
fn chunked_encode_slow_loop_only() {
// < 8 bytes input, slow loop only
assert_eq!("Zm9vYmFy", chunked_encode_str(b"foobar", STANDARD));
}
#[test]
fn chunked_encode_matches_normal_encode_random_string_sink() {
let helper = StringSinkTestHelper;
chunked_encode_matches_normal_encode_random(&helper);
}
#[test]
fn max_input_length_no_pad() {
let config = config_with_pad(false);
assert_eq!(768, max_input_length(1024, config));
}
#[test]
fn max_input_length_with_pad_decrements_one_triple() {
let config = config_with_pad(true);
assert_eq!(765, max_input_length(1024, config));
}
#[test]
fn max_input_length_with_pad_one_byte_short() {
let config = config_with_pad(true);
assert_eq!(765, max_input_length(1025, config));
}
#[test]
fn max_input_length_with_pad_fits_exactly() {
let config = config_with_pad(true);
assert_eq!(768, max_input_length(1026, config));
}
#[test]
fn max_input_length_cant_use_extra_single_encoded_byte() {
let config = Config::new(crate::CharacterSet::Standard, false);
assert_eq!(300, max_input_length(401, config));
}
pub fn chunked_encode_matches_normal_encode_random<S: SinkTestHelper>(sink_test_helper: &S) {
let mut input_buf: Vec<u8> = Vec::new();
let mut output_buf = String::new();
let mut rng = rand::rngs::SmallRng::from_entropy();
let input_len_range = Uniform::new(1, 10_000);
for _ in 0..5_000 {
input_buf.clear();
output_buf.clear();
let buf_len = input_len_range.sample(&mut rng);
for _ in 0..buf_len {
input_buf.push(rng.gen());
}
let config = random_config(&mut rng);
let chunk_encoded_string = sink_test_helper.encode_to_string(config, &input_buf);
encode_config_buf(&input_buf, config, &mut output_buf);
assert_eq!(
output_buf, chunk_encoded_string,
"input len={}, config: pad={}",
buf_len, config.pad
);
}
}
fn chunked_encode_str(bytes: &[u8], config: Config) -> String {
let mut s = String::new();
{
let mut sink = StringSink::new(&mut s);
let encoder = ChunkedEncoder::new(config);
encoder.encode(bytes, &mut sink).unwrap();
}
return s;
}
fn config_with_pad(pad: bool) -> Config {
Config::new(CharacterSet::Standard, pad)
}
// An abstraction around sinks so that we can have tests that easily to any sink implementation
pub trait SinkTestHelper {
fn encode_to_string(&self, config: Config, bytes: &[u8]) -> String;
}
struct StringSinkTestHelper;
impl SinkTestHelper for StringSinkTestHelper {
fn encode_to_string(&self, config: Config, bytes: &[u8]) -> String {
let encoder = ChunkedEncoder::new(config);
let mut s = String::new();
{
let mut sink = StringSink::new(&mut s);
encoder.encode(bytes, &mut sink).unwrap();
}
s
}
}
}

873
zeroidc/vendor/base64/src/decode.rs vendored Normal file
View File

@@ -0,0 +1,873 @@
use crate::{tables, Config, PAD_BYTE};
#[cfg(any(feature = "alloc", feature = "std", test))]
use crate::STANDARD;
#[cfg(any(feature = "alloc", feature = "std", test))]
use alloc::vec::Vec;
use core::fmt;
#[cfg(any(feature = "std", test))]
use std::error;
// decode logic operates on chunks of 8 input bytes without padding
const INPUT_CHUNK_LEN: usize = 8;
const DECODED_CHUNK_LEN: usize = 6;
// we read a u64 and write a u64, but a u64 of input only yields 6 bytes of output, so the last
// 2 bytes of any output u64 should not be counted as written to (but must be available in a
// slice).
const DECODED_CHUNK_SUFFIX: usize = 2;
// how many u64's of input to handle at a time
const CHUNKS_PER_FAST_LOOP_BLOCK: usize = 4;
const INPUT_BLOCK_LEN: usize = CHUNKS_PER_FAST_LOOP_BLOCK * INPUT_CHUNK_LEN;
// includes the trailing 2 bytes for the final u64 write
const DECODED_BLOCK_LEN: usize =
CHUNKS_PER_FAST_LOOP_BLOCK * DECODED_CHUNK_LEN + DECODED_CHUNK_SUFFIX;
/// Errors that can occur while decoding.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum DecodeError {
/// An invalid byte was found in the input. The offset and offending byte are provided.
InvalidByte(usize, u8),
/// The length of the input is invalid.
/// A typical cause of this is stray trailing whitespace or other separator bytes.
/// In the case where excess trailing bytes have produced an invalid length *and* the last byte
/// is also an invalid base64 symbol (as would be the case for whitespace, etc), `InvalidByte`
/// will be emitted instead of `InvalidLength` to make the issue easier to debug.
InvalidLength,
/// The last non-padding input symbol's encoded 6 bits have nonzero bits that will be discarded.
/// This is indicative of corrupted or truncated Base64.
/// Unlike InvalidByte, which reports symbols that aren't in the alphabet, this error is for
/// symbols that are in the alphabet but represent nonsensical encodings.
InvalidLastSymbol(usize, u8),
}
impl fmt::Display for DecodeError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
DecodeError::InvalidByte(index, byte) => {
write!(f, "Invalid byte {}, offset {}.", byte, index)
}
DecodeError::InvalidLength => write!(f, "Encoded text cannot have a 6-bit remainder."),
DecodeError::InvalidLastSymbol(index, byte) => {
write!(f, "Invalid last symbol {}, offset {}.", byte, index)
}
}
}
}
#[cfg(any(feature = "std", test))]
impl error::Error for DecodeError {
fn description(&self) -> &str {
match *self {
DecodeError::InvalidByte(_, _) => "invalid byte",
DecodeError::InvalidLength => "invalid length",
DecodeError::InvalidLastSymbol(_, _) => "invalid last symbol",
}
}
fn cause(&self) -> Option<&dyn error::Error> {
None
}
}
///Decode from string reference as octets.
///Returns a Result containing a Vec<u8>.
///Convenience `decode_config(input, base64::STANDARD);`.
///
///# Example
///
///```rust
///extern crate base64;
///
///fn main() {
/// let bytes = base64::decode("aGVsbG8gd29ybGQ=").unwrap();
/// println!("{:?}", bytes);
///}
///```
#[cfg(any(feature = "alloc", feature = "std", test))]
pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> {
decode_config(input, STANDARD)
}
///Decode from string reference as octets.
///Returns a Result containing a Vec<u8>.
///
///# Example
///
///```rust
///extern crate base64;
///
///fn main() {
/// let bytes = base64::decode_config("aGVsbG8gd29ybGR+Cg==", base64::STANDARD).unwrap();
/// println!("{:?}", bytes);
///
/// let bytes_url = base64::decode_config("aGVsbG8gaW50ZXJuZXR-Cg==", base64::URL_SAFE).unwrap();
/// println!("{:?}", bytes_url);
///}
///```
#[cfg(any(feature = "alloc", feature = "std", test))]
pub fn decode_config<T: AsRef<[u8]>>(input: T, config: Config) -> Result<Vec<u8>, DecodeError> {
let mut buffer = Vec::<u8>::with_capacity(input.as_ref().len() * 4 / 3);
decode_config_buf(input, config, &mut buffer).map(|_| buffer)
}
///Decode from string reference as octets.
///Writes into the supplied buffer to avoid allocation.
///Returns a Result containing an empty tuple, aka ().
///
///# Example
///
///```rust
///extern crate base64;
///
///fn main() {
/// let mut buffer = Vec::<u8>::new();
/// base64::decode_config_buf("aGVsbG8gd29ybGR+Cg==", base64::STANDARD, &mut buffer).unwrap();
/// println!("{:?}", buffer);
///
/// buffer.clear();
///
/// base64::decode_config_buf("aGVsbG8gaW50ZXJuZXR-Cg==", base64::URL_SAFE, &mut buffer)
/// .unwrap();
/// println!("{:?}", buffer);
///}
///```
#[cfg(any(feature = "alloc", feature = "std", test))]
pub fn decode_config_buf<T: AsRef<[u8]>>(
input: T,
config: Config,
buffer: &mut Vec<u8>,
) -> Result<(), DecodeError> {
let input_bytes = input.as_ref();
let starting_output_len = buffer.len();
let num_chunks = num_chunks(input_bytes);
let decoded_len_estimate = num_chunks
.checked_mul(DECODED_CHUNK_LEN)
.and_then(|p| p.checked_add(starting_output_len))
.expect("Overflow when calculating output buffer length");
buffer.resize(decoded_len_estimate, 0);
let bytes_written;
{
let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
bytes_written = decode_helper(input_bytes, num_chunks, config, buffer_slice)?;
}
buffer.truncate(starting_output_len + bytes_written);
Ok(())
}
/// Decode the input into the provided output slice.
///
/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
///
/// If you don't know ahead of time what the decoded length should be, size your buffer with a
/// conservative estimate for the decoded length of an input: 3 bytes of output for every 4 bytes of
/// input, rounded up, or in other words `(input_len + 3) / 4 * 3`.
///
/// If the slice is not large enough, this will panic.
pub fn decode_config_slice<T: AsRef<[u8]>>(
input: T,
config: Config,
output: &mut [u8],
) -> Result<usize, DecodeError> {
let input_bytes = input.as_ref();
decode_helper(input_bytes, num_chunks(input_bytes), config, output)
}
/// Return the number of input chunks (including a possibly partial final chunk) in the input
fn num_chunks(input: &[u8]) -> usize {
input
.len()
.checked_add(INPUT_CHUNK_LEN - 1)
.expect("Overflow when calculating number of chunks in input")
/ INPUT_CHUNK_LEN
}
/// Helper to avoid duplicating num_chunks calculation, which is costly on short inputs.
/// Returns the number of bytes written, or an error.
// We're on the fragile edge of compiler heuristics here. If this is not inlined, slow. If this is
// inlined(always), a different slow. plain ol' inline makes the benchmarks happiest at the moment,
// but this is fragile and the best setting changes with only minor code modifications.
#[inline]
fn decode_helper(
input: &[u8],
num_chunks: usize,
config: Config,
output: &mut [u8],
) -> Result<usize, DecodeError> {
let char_set = config.char_set;
let decode_table = char_set.decode_table();
let remainder_len = input.len() % INPUT_CHUNK_LEN;
// Because the fast decode loop writes in groups of 8 bytes (unrolled to
// CHUNKS_PER_FAST_LOOP_BLOCK times 8 bytes, where possible) and outputs 8 bytes at a time (of
// which only 6 are valid data), we need to be sure that we stop using the fast decode loop
// soon enough that there will always be 2 more bytes of valid data written after that loop.
let trailing_bytes_to_skip = match remainder_len {
// if input is a multiple of the chunk size, ignore the last chunk as it may have padding,
// and the fast decode logic cannot handle padding
0 => INPUT_CHUNK_LEN,
// 1 and 5 trailing bytes are illegal: can't decode 6 bits of input into a byte
1 | 5 => {
// trailing whitespace is so common that it's worth it to check the last byte to
// possibly return a better error message
if let Some(b) = input.last() {
if *b != PAD_BYTE && decode_table[*b as usize] == tables::INVALID_VALUE {
return Err(DecodeError::InvalidByte(input.len() - 1, *b));
}
}
return Err(DecodeError::InvalidLength);
}
// This will decode to one output byte, which isn't enough to overwrite the 2 extra bytes
// written by the fast decode loop. So, we have to ignore both these 2 bytes and the
// previous chunk.
2 => INPUT_CHUNK_LEN + 2,
// If this is 3 unpadded chars, then it would actually decode to 2 bytes. However, if this
// is an erroneous 2 chars + 1 pad char that would decode to 1 byte, then it should fail
// with an error, not panic from going past the bounds of the output slice, so we let it
// use stage 3 + 4.
3 => INPUT_CHUNK_LEN + 3,
// This can also decode to one output byte because it may be 2 input chars + 2 padding
// chars, which would decode to 1 byte.
4 => INPUT_CHUNK_LEN + 4,
// Everything else is a legal decode len (given that we don't require padding), and will
// decode to at least 2 bytes of output.
_ => remainder_len,
};
// rounded up to include partial chunks
let mut remaining_chunks = num_chunks;
let mut input_index = 0;
let mut output_index = 0;
{
let length_of_fast_decode_chunks = input.len().saturating_sub(trailing_bytes_to_skip);
// Fast loop, stage 1
// manual unroll to CHUNKS_PER_FAST_LOOP_BLOCK of u64s to amortize slice bounds checks
if let Some(max_start_index) = length_of_fast_decode_chunks.checked_sub(INPUT_BLOCK_LEN) {
while input_index <= max_start_index {
let input_slice = &input[input_index..(input_index + INPUT_BLOCK_LEN)];
let output_slice = &mut output[output_index..(output_index + DECODED_BLOCK_LEN)];
decode_chunk(
&input_slice[0..],
input_index,
decode_table,
&mut output_slice[0..],
)?;
decode_chunk(
&input_slice[8..],
input_index + 8,
decode_table,
&mut output_slice[6..],
)?;
decode_chunk(
&input_slice[16..],
input_index + 16,
decode_table,
&mut output_slice[12..],
)?;
decode_chunk(
&input_slice[24..],
input_index + 24,
decode_table,
&mut output_slice[18..],
)?;
input_index += INPUT_BLOCK_LEN;
output_index += DECODED_BLOCK_LEN - DECODED_CHUNK_SUFFIX;
remaining_chunks -= CHUNKS_PER_FAST_LOOP_BLOCK;
}
}
// Fast loop, stage 2 (aka still pretty fast loop)
// 8 bytes at a time for whatever we didn't do in stage 1.
if let Some(max_start_index) = length_of_fast_decode_chunks.checked_sub(INPUT_CHUNK_LEN) {
while input_index < max_start_index {
decode_chunk(
&input[input_index..(input_index + INPUT_CHUNK_LEN)],
input_index,
decode_table,
&mut output
[output_index..(output_index + DECODED_CHUNK_LEN + DECODED_CHUNK_SUFFIX)],
)?;
output_index += DECODED_CHUNK_LEN;
input_index += INPUT_CHUNK_LEN;
remaining_chunks -= 1;
}
}
}
// Stage 3
// If input length was such that a chunk had to be deferred until after the fast loop
// because decoding it would have produced 2 trailing bytes that wouldn't then be
// overwritten, we decode that chunk here. This way is slower but doesn't write the 2
// trailing bytes.
// However, we still need to avoid the last chunk (partial or complete) because it could
// have padding, so we always do 1 fewer to avoid the last chunk.
for _ in 1..remaining_chunks {
decode_chunk_precise(
&input[input_index..],
input_index,
decode_table,
&mut output[output_index..(output_index + DECODED_CHUNK_LEN)],
)?;
input_index += INPUT_CHUNK_LEN;
output_index += DECODED_CHUNK_LEN;
}
// always have one more (possibly partial) block of 8 input
debug_assert!(input.len() - input_index > 1 || input.is_empty());
debug_assert!(input.len() - input_index <= 8);
// Stage 4
// Finally, decode any leftovers that aren't a complete input block of 8 bytes.
// Use a u64 as a stack-resident 8 byte buffer.
let mut leftover_bits: u64 = 0;
let mut morsels_in_leftover = 0;
let mut padding_bytes = 0;
let mut first_padding_index: usize = 0;
let mut last_symbol = 0_u8;
let start_of_leftovers = input_index;
for (i, b) in input[start_of_leftovers..].iter().enumerate() {
// '=' padding
if *b == PAD_BYTE {
// There can be bad padding in a few ways:
// 1 - Padding with non-padding characters after it
// 2 - Padding after zero or one non-padding characters before it
// in the current quad.
// 3 - More than two characters of padding. If 3 or 4 padding chars
// are in the same quad, that implies it will be caught by #2.
// If it spreads from one quad to another, it will be caught by
// #2 in the second quad.
if i % 4 < 2 {
// Check for case #2.
let bad_padding_index = start_of_leftovers
+ if padding_bytes > 0 {
// If we've already seen padding, report the first padding index.
// This is to be consistent with the faster logic above: it will report an
// error on the first padding character (since it doesn't expect to see
// anything but actual encoded data).
first_padding_index
} else {
// haven't seen padding before, just use where we are now
i
};
return Err(DecodeError::InvalidByte(bad_padding_index, *b));
}
if padding_bytes == 0 {
first_padding_index = i;
}
padding_bytes += 1;
continue;
}
// Check for case #1.
// To make '=' handling consistent with the main loop, don't allow
// non-suffix '=' in trailing chunk either. Report error as first
// erroneous padding.
if padding_bytes > 0 {
return Err(DecodeError::InvalidByte(
start_of_leftovers + first_padding_index,
PAD_BYTE,
));
}
last_symbol = *b;
// can use up to 8 * 6 = 48 bits of the u64, if last chunk has no padding.
// To minimize shifts, pack the leftovers from left to right.
let shift = 64 - (morsels_in_leftover + 1) * 6;
// tables are all 256 elements, lookup with a u8 index always succeeds
let morsel = decode_table[*b as usize];
if morsel == tables::INVALID_VALUE {
return Err(DecodeError::InvalidByte(start_of_leftovers + i, *b));
}
leftover_bits |= (morsel as u64) << shift;
morsels_in_leftover += 1;
}
let leftover_bits_ready_to_append = match morsels_in_leftover {
0 => 0,
2 => 8,
3 => 16,
4 => 24,
6 => 32,
7 => 40,
8 => 48,
_ => unreachable!(
"Impossible: must only have 0 to 8 input bytes in last chunk, with no invalid lengths"
),
};
// if there are bits set outside the bits we care about, last symbol encodes trailing bits that
// will not be included in the output
let mask = !0 >> leftover_bits_ready_to_append;
if !config.decode_allow_trailing_bits && (leftover_bits & mask) != 0 {
// last morsel is at `morsels_in_leftover` - 1
return Err(DecodeError::InvalidLastSymbol(
start_of_leftovers + morsels_in_leftover - 1,
last_symbol,
));
}
let mut leftover_bits_appended_to_buf = 0;
while leftover_bits_appended_to_buf < leftover_bits_ready_to_append {
// `as` simply truncates the higher bits, which is what we want here
let selected_bits = (leftover_bits >> (56 - leftover_bits_appended_to_buf)) as u8;
output[output_index] = selected_bits;
output_index += 1;
leftover_bits_appended_to_buf += 8;
}
Ok(output_index)
}
#[inline]
fn write_u64(output: &mut [u8], value: u64) {
output[..8].copy_from_slice(&value.to_be_bytes());
}
/// Decode 8 bytes of input into 6 bytes of output. 8 bytes of output will be written, but only the
/// first 6 of those contain meaningful data.
///
/// `input` is the bytes to decode, of which the first 8 bytes will be processed.
/// `index_at_start_of_input` is the offset in the overall input (used for reporting errors
/// accurately)
/// `decode_table` is the lookup table for the particular base64 alphabet.
/// `output` will have its first 8 bytes overwritten, of which only the first 6 are valid decoded
/// data.
// yes, really inline (worth 30-50% speedup)
#[inline(always)]
fn decode_chunk(
input: &[u8],
index_at_start_of_input: usize,
decode_table: &[u8; 256],
output: &mut [u8],
) -> Result<(), DecodeError> {
let mut accum: u64;
let morsel = decode_table[input[0] as usize];
if morsel == tables::INVALID_VALUE {
return Err(DecodeError::InvalidByte(index_at_start_of_input, input[0]));
}
accum = (morsel as u64) << 58;
let morsel = decode_table[input[1] as usize];
if morsel == tables::INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 1,
input[1],
));
}
accum |= (morsel as u64) << 52;
let morsel = decode_table[input[2] as usize];
if morsel == tables::INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 2,
input[2],
));
}
accum |= (morsel as u64) << 46;
let morsel = decode_table[input[3] as usize];
if morsel == tables::INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 3,
input[3],
));
}
accum |= (morsel as u64) << 40;
let morsel = decode_table[input[4] as usize];
if morsel == tables::INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 4,
input[4],
));
}
accum |= (morsel as u64) << 34;
let morsel = decode_table[input[5] as usize];
if morsel == tables::INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 5,
input[5],
));
}
accum |= (morsel as u64) << 28;
let morsel = decode_table[input[6] as usize];
if morsel == tables::INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 6,
input[6],
));
}
accum |= (morsel as u64) << 22;
let morsel = decode_table[input[7] as usize];
if morsel == tables::INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 7,
input[7],
));
}
accum |= (morsel as u64) << 16;
write_u64(output, accum);
Ok(())
}
/// Decode an 8-byte chunk, but only write the 6 bytes actually decoded instead of including 2
/// trailing garbage bytes.
#[inline]
fn decode_chunk_precise(
input: &[u8],
index_at_start_of_input: usize,
decode_table: &[u8; 256],
output: &mut [u8],
) -> Result<(), DecodeError> {
let mut tmp_buf = [0_u8; 8];
decode_chunk(
input,
index_at_start_of_input,
decode_table,
&mut tmp_buf[..],
)?;
output[0..6].copy_from_slice(&tmp_buf[0..6]);
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
encode::encode_config_buf,
encode::encode_config_slice,
tests::{assert_encode_sanity, random_config},
};
use rand::{
distributions::{Distribution, Uniform},
FromEntropy, Rng,
};
#[test]
fn decode_chunk_precise_writes_only_6_bytes() {
let input = b"Zm9vYmFy"; // "foobar"
let mut output = [0_u8, 1, 2, 3, 4, 5, 6, 7];
decode_chunk_precise(&input[..], 0, tables::STANDARD_DECODE, &mut output).unwrap();
assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 6, 7], &output);
}
#[test]
fn decode_chunk_writes_8_bytes() {
let input = b"Zm9vYmFy"; // "foobar"
let mut output = [0_u8, 1, 2, 3, 4, 5, 6, 7];
decode_chunk(&input[..], 0, tables::STANDARD_DECODE, &mut output).unwrap();
assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 0, 0], &output);
}
#[test]
fn decode_into_nonempty_vec_doesnt_clobber_existing_prefix() {
let mut orig_data = Vec::new();
let mut encoded_data = String::new();
let mut decoded_with_prefix = Vec::new();
let mut decoded_without_prefix = Vec::new();
let mut prefix = Vec::new();
let prefix_len_range = Uniform::new(0, 1000);
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
orig_data.clear();
encoded_data.clear();
decoded_with_prefix.clear();
decoded_without_prefix.clear();
prefix.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
orig_data.push(rng.gen());
}
let config = random_config(&mut rng);
encode_config_buf(&orig_data, config, &mut encoded_data);
assert_encode_sanity(&encoded_data, config, input_len);
let prefix_len = prefix_len_range.sample(&mut rng);
// fill the buf with a prefix
for _ in 0..prefix_len {
prefix.push(rng.gen());
}
decoded_with_prefix.resize(prefix_len, 0);
decoded_with_prefix.copy_from_slice(&prefix);
// decode into the non-empty buf
decode_config_buf(&encoded_data, config, &mut decoded_with_prefix).unwrap();
// also decode into the empty buf
decode_config_buf(&encoded_data, config, &mut decoded_without_prefix).unwrap();
assert_eq!(
prefix_len + decoded_without_prefix.len(),
decoded_with_prefix.len()
);
assert_eq!(orig_data, decoded_without_prefix);
// append plain decode onto prefix
prefix.append(&mut decoded_without_prefix);
assert_eq!(prefix, decoded_with_prefix);
}
}
#[test]
fn decode_into_slice_doesnt_clobber_existing_prefix_or_suffix() {
let mut orig_data = Vec::new();
let mut encoded_data = String::new();
let mut decode_buf = Vec::new();
let mut decode_buf_copy: Vec<u8> = Vec::new();
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
orig_data.clear();
encoded_data.clear();
decode_buf.clear();
decode_buf_copy.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
orig_data.push(rng.gen());
}
let config = random_config(&mut rng);
encode_config_buf(&orig_data, config, &mut encoded_data);
assert_encode_sanity(&encoded_data, config, input_len);
// fill the buffer with random garbage, long enough to have some room before and after
for _ in 0..5000 {
decode_buf.push(rng.gen());
}
// keep a copy for later comparison
decode_buf_copy.extend(decode_buf.iter());
let offset = 1000;
// decode into the non-empty buf
let decode_bytes_written =
decode_config_slice(&encoded_data, config, &mut decode_buf[offset..]).unwrap();
assert_eq!(orig_data.len(), decode_bytes_written);
assert_eq!(
orig_data,
&decode_buf[offset..(offset + decode_bytes_written)]
);
assert_eq!(&decode_buf_copy[0..offset], &decode_buf[0..offset]);
assert_eq!(
&decode_buf_copy[offset + decode_bytes_written..],
&decode_buf[offset + decode_bytes_written..]
);
}
}
#[test]
fn decode_into_slice_fits_in_precisely_sized_slice() {
let mut orig_data = Vec::new();
let mut encoded_data = String::new();
let mut decode_buf = Vec::new();
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
orig_data.clear();
encoded_data.clear();
decode_buf.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
orig_data.push(rng.gen());
}
let config = random_config(&mut rng);
encode_config_buf(&orig_data, config, &mut encoded_data);
assert_encode_sanity(&encoded_data, config, input_len);
decode_buf.resize(input_len, 0);
// decode into the non-empty buf
let decode_bytes_written =
decode_config_slice(&encoded_data, config, &mut decode_buf[..]).unwrap();
assert_eq!(orig_data.len(), decode_bytes_written);
assert_eq!(orig_data, decode_buf);
}
}
#[test]
fn detect_invalid_last_symbol_two_bytes() {
let decode =
|input, forgiving| decode_config(input, STANDARD.decode_allow_trailing_bits(forgiving));
// example from https://github.com/marshallpierce/rust-base64/issues/75
assert!(decode("iYU=", false).is_ok());
// trailing 01
assert_eq!(
Err(DecodeError::InvalidLastSymbol(2, b'V')),
decode("iYV=", false)
);
assert_eq!(Ok(vec![137, 133]), decode("iYV=", true));
// trailing 10
assert_eq!(
Err(DecodeError::InvalidLastSymbol(2, b'W')),
decode("iYW=", false)
);
assert_eq!(Ok(vec![137, 133]), decode("iYV=", true));
// trailing 11
assert_eq!(
Err(DecodeError::InvalidLastSymbol(2, b'X')),
decode("iYX=", false)
);
assert_eq!(Ok(vec![137, 133]), decode("iYV=", true));
// also works when there are 2 quads in the last block
assert_eq!(
Err(DecodeError::InvalidLastSymbol(6, b'X')),
decode("AAAAiYX=", false)
);
assert_eq!(Ok(vec![0, 0, 0, 137, 133]), decode("AAAAiYX=", true));
}
#[test]
fn detect_invalid_last_symbol_one_byte() {
// 0xFF -> "/w==", so all letters > w, 0-9, and '+', '/' should get InvalidLastSymbol
assert!(decode("/w==").is_ok());
// trailing 01
assert_eq!(Err(DecodeError::InvalidLastSymbol(1, b'x')), decode("/x=="));
assert_eq!(Err(DecodeError::InvalidLastSymbol(1, b'z')), decode("/z=="));
assert_eq!(Err(DecodeError::InvalidLastSymbol(1, b'0')), decode("/0=="));
assert_eq!(Err(DecodeError::InvalidLastSymbol(1, b'9')), decode("/9=="));
assert_eq!(Err(DecodeError::InvalidLastSymbol(1, b'+')), decode("/+=="));
assert_eq!(Err(DecodeError::InvalidLastSymbol(1, b'/')), decode("//=="));
// also works when there are 2 quads in the last block
assert_eq!(
Err(DecodeError::InvalidLastSymbol(5, b'x')),
decode("AAAA/x==")
);
}
#[test]
fn detect_invalid_last_symbol_every_possible_three_symbols() {
let mut base64_to_bytes = ::std::collections::HashMap::new();
let mut bytes = [0_u8; 2];
for b1 in 0_u16..256 {
bytes[0] = b1 as u8;
for b2 in 0_u16..256 {
bytes[1] = b2 as u8;
let mut b64 = vec![0_u8; 4];
assert_eq!(4, encode_config_slice(&bytes, STANDARD, &mut b64[..]));
let mut v = ::std::vec::Vec::with_capacity(2);
v.extend_from_slice(&bytes[..]);
assert!(base64_to_bytes.insert(b64, v).is_none());
}
}
// every possible combination of symbols must either decode to 2 bytes or get InvalidLastSymbol
let mut symbols = [0_u8; 4];
for &s1 in STANDARD.char_set.encode_table().iter() {
symbols[0] = s1;
for &s2 in STANDARD.char_set.encode_table().iter() {
symbols[1] = s2;
for &s3 in STANDARD.char_set.encode_table().iter() {
symbols[2] = s3;
symbols[3] = PAD_BYTE;
match base64_to_bytes.get(&symbols[..]) {
Some(bytes) => {
assert_eq!(Ok(bytes.to_vec()), decode_config(&symbols, STANDARD))
}
None => assert_eq!(
Err(DecodeError::InvalidLastSymbol(2, s3)),
decode_config(&symbols[..], STANDARD)
),
}
}
}
}
}
#[test]
fn detect_invalid_last_symbol_every_possible_two_symbols() {
let mut base64_to_bytes = ::std::collections::HashMap::new();
for b in 0_u16..256 {
let mut b64 = vec![0_u8; 4];
assert_eq!(4, encode_config_slice(&[b as u8], STANDARD, &mut b64[..]));
let mut v = ::std::vec::Vec::with_capacity(1);
v.push(b as u8);
assert!(base64_to_bytes.insert(b64, v).is_none());
}
// every possible combination of symbols must either decode to 1 byte or get InvalidLastSymbol
let mut symbols = [0_u8; 4];
for &s1 in STANDARD.char_set.encode_table().iter() {
symbols[0] = s1;
for &s2 in STANDARD.char_set.encode_table().iter() {
symbols[1] = s2;
symbols[2] = PAD_BYTE;
symbols[3] = PAD_BYTE;
match base64_to_bytes.get(&symbols[..]) {
Some(bytes) => {
assert_eq!(Ok(bytes.to_vec()), decode_config(&symbols, STANDARD))
}
None => assert_eq!(
Err(DecodeError::InvalidLastSymbol(1, s2)),
decode_config(&symbols[..], STANDARD)
),
}
}
}
}
}

88
zeroidc/vendor/base64/src/display.rs vendored Normal file
View File

@@ -0,0 +1,88 @@
//! Enables base64'd output anywhere you might use a `Display` implementation, like a format string.
//!
//! ```
//! use base64::display::Base64Display;
//!
//! let data = vec![0x0, 0x1, 0x2, 0x3];
//! let wrapper = Base64Display::with_config(&data, base64::STANDARD);
//!
//! assert_eq!("base64: AAECAw==", format!("base64: {}", wrapper));
//! ```
use super::chunked_encoder::ChunkedEncoder;
use super::Config;
use core::fmt::{Display, Formatter};
use core::{fmt, str};
/// A convenience wrapper for base64'ing bytes into a format string without heap allocation.
pub struct Base64Display<'a> {
bytes: &'a [u8],
chunked_encoder: ChunkedEncoder,
}
impl<'a> Base64Display<'a> {
/// Create a `Base64Display` with the provided config.
pub fn with_config(bytes: &[u8], config: Config) -> Base64Display {
Base64Display {
bytes,
chunked_encoder: ChunkedEncoder::new(config),
}
}
}
impl<'a> Display for Base64Display<'a> {
fn fmt(&self, formatter: &mut Formatter) -> Result<(), fmt::Error> {
let mut sink = FormatterSink { f: formatter };
self.chunked_encoder.encode(self.bytes, &mut sink)
}
}
struct FormatterSink<'a, 'b: 'a> {
f: &'a mut Formatter<'b>,
}
impl<'a, 'b: 'a> super::chunked_encoder::Sink for FormatterSink<'a, 'b> {
type Error = fmt::Error;
fn write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error> {
// Avoid unsafe. If max performance is needed, write your own display wrapper that uses
// unsafe here to gain about 10-15%.
self.f
.write_str(str::from_utf8(encoded).expect("base64 data was not utf8"))
}
}
#[cfg(test)]
mod tests {
use super::super::chunked_encoder::tests::{
chunked_encode_matches_normal_encode_random, SinkTestHelper,
};
use super::super::*;
use super::*;
#[test]
fn basic_display() {
assert_eq!(
"~$Zm9vYmFy#*",
format!("~${}#*", Base64Display::with_config(b"foobar", STANDARD))
);
assert_eq!(
"~$Zm9vYmFyZg==#*",
format!("~${}#*", Base64Display::with_config(b"foobarf", STANDARD))
);
}
#[test]
fn display_encode_matches_normal_encode() {
let helper = DisplaySinkTestHelper;
chunked_encode_matches_normal_encode_random(&helper);
}
struct DisplaySinkTestHelper;
impl SinkTestHelper for DisplaySinkTestHelper {
fn encode_to_string(&self, config: Config, bytes: &[u8]) -> String {
format!("{}", Base64Display::with_config(bytes, config))
}
}
}

675
zeroidc/vendor/base64/src/encode.rs vendored Normal file
View File

@@ -0,0 +1,675 @@
use crate::{Config, PAD_BYTE};
#[cfg(any(feature = "alloc", feature = "std", test))]
use crate::{chunked_encoder, STANDARD};
#[cfg(any(feature = "alloc", feature = "std", test))]
use alloc::{string::String, vec};
use core::convert::TryInto;
///Encode arbitrary octets as base64.
///Returns a String.
///Convenience for `encode_config(input, base64::STANDARD);`.
///
///# Example
///
///```rust
///extern crate base64;
///
///fn main() {
/// let b64 = base64::encode(b"hello world");
/// println!("{}", b64);
///}
///```
#[cfg(any(feature = "alloc", feature = "std", test))]
pub fn encode<T: AsRef<[u8]>>(input: T) -> String {
encode_config(input, STANDARD)
}
///Encode arbitrary octets as base64.
///Returns a String.
///
///# Example
///
///```rust
///extern crate base64;
///
///fn main() {
/// let b64 = base64::encode_config(b"hello world~", base64::STANDARD);
/// println!("{}", b64);
///
/// let b64_url = base64::encode_config(b"hello internet~", base64::URL_SAFE);
/// println!("{}", b64_url);
///}
///```
#[cfg(any(feature = "alloc", feature = "std", test))]
pub fn encode_config<T: AsRef<[u8]>>(input: T, config: Config) -> String {
let mut buf = match encoded_size(input.as_ref().len(), config) {
Some(n) => vec![0; n],
None => panic!("integer overflow when calculating buffer size"),
};
encode_with_padding(input.as_ref(), config, buf.len(), &mut buf[..]);
String::from_utf8(buf).expect("Invalid UTF8")
}
///Encode arbitrary octets as base64.
///Writes into the supplied output buffer, which will grow the buffer if needed.
///
///# Example
///
///```rust
///extern crate base64;
///
///fn main() {
/// let mut buf = String::new();
/// base64::encode_config_buf(b"hello world~", base64::STANDARD, &mut buf);
/// println!("{}", buf);
///
/// buf.clear();
/// base64::encode_config_buf(b"hello internet~", base64::URL_SAFE, &mut buf);
/// println!("{}", buf);
///}
///```
#[cfg(any(feature = "alloc", feature = "std", test))]
pub fn encode_config_buf<T: AsRef<[u8]>>(input: T, config: Config, buf: &mut String) {
let input_bytes = input.as_ref();
{
let mut sink = chunked_encoder::StringSink::new(buf);
let encoder = chunked_encoder::ChunkedEncoder::new(config);
encoder
.encode(input_bytes, &mut sink)
.expect("Writing to a String shouldn't fail")
}
}
/// Encode arbitrary octets as base64.
/// Writes into the supplied output buffer.
///
/// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident
/// or statically-allocated buffer).
///
/// # Panics
///
/// If `output` is too small to hold the encoded version of `input`, a panic will result.
///
/// # Example
///
/// ```rust
/// extern crate base64;
///
/// fn main() {
/// let s = b"hello internet!";
/// let mut buf = Vec::new();
/// // make sure we'll have a slice big enough for base64 + padding
/// buf.resize(s.len() * 4 / 3 + 4, 0);
///
/// let bytes_written = base64::encode_config_slice(s,
/// base64::STANDARD, &mut buf);
///
/// // shorten our vec down to just what was written
/// buf.resize(bytes_written, 0);
///
/// assert_eq!(s, base64::decode(&buf).unwrap().as_slice());
/// }
/// ```
pub fn encode_config_slice<T: AsRef<[u8]>>(input: T, config: Config, output: &mut [u8]) -> usize {
let input_bytes = input.as_ref();
let encoded_size = encoded_size(input_bytes.len(), config)
.expect("usize overflow when calculating buffer size");
let mut b64_output = &mut output[0..encoded_size];
encode_with_padding(&input_bytes, config, encoded_size, &mut b64_output);
encoded_size
}
/// B64-encode and pad (if configured).
///
/// This helper exists to avoid recalculating encoded_size, which is relatively expensive on short
/// inputs.
///
/// `encoded_size` is the encoded size calculated for `input`.
///
/// `output` must be of size `encoded_size`.
///
/// All bytes in `output` will be written to since it is exactly the size of the output.
fn encode_with_padding(input: &[u8], config: Config, encoded_size: usize, output: &mut [u8]) {
debug_assert_eq!(encoded_size, output.len());
let b64_bytes_written = encode_to_slice(input, output, config.char_set.encode_table());
let padding_bytes = if config.pad {
add_padding(input.len(), &mut output[b64_bytes_written..])
} else {
0
};
let encoded_bytes = b64_bytes_written
.checked_add(padding_bytes)
.expect("usize overflow when calculating b64 length");
debug_assert_eq!(encoded_size, encoded_bytes);
}
#[inline]
fn read_u64(s: &[u8]) -> u64 {
u64::from_be_bytes(s[..8].try_into().unwrap())
}
/// Encode input bytes to utf8 base64 bytes. Does not pad.
/// `output` must be long enough to hold the encoded `input` without padding.
/// Returns the number of bytes written.
#[inline]
pub fn encode_to_slice(input: &[u8], output: &mut [u8], encode_table: &[u8; 64]) -> usize {
let mut input_index: usize = 0;
const BLOCKS_PER_FAST_LOOP: usize = 4;
const LOW_SIX_BITS: u64 = 0x3F;
// we read 8 bytes at a time (u64) but only actually consume 6 of those bytes. Thus, we need
// 2 trailing bytes to be available to read..
let last_fast_index = input.len().saturating_sub(BLOCKS_PER_FAST_LOOP * 6 + 2);
let mut output_index = 0;
if last_fast_index > 0 {
while input_index <= last_fast_index {
// Major performance wins from letting the optimizer do the bounds check once, mostly
// on the output side
let input_chunk = &input[input_index..(input_index + (BLOCKS_PER_FAST_LOOP * 6 + 2))];
let output_chunk = &mut output[output_index..(output_index + BLOCKS_PER_FAST_LOOP * 8)];
// Hand-unrolling for 32 vs 16 or 8 bytes produces yields performance about equivalent
// to unsafe pointer code on a Xeon E5-1650v3. 64 byte unrolling was slightly better for
// large inputs but significantly worse for 50-byte input, unsurprisingly. I suspect
// that it's a not uncommon use case to encode smallish chunks of data (e.g. a 64-byte
// SHA-512 digest), so it would be nice if that fit in the unrolled loop at least once.
// Plus, single-digit percentage performance differences might well be quite different
// on different hardware.
let input_u64 = read_u64(&input_chunk[0..]);
output_chunk[0] = encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize];
output_chunk[1] = encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize];
output_chunk[2] = encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize];
output_chunk[3] = encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize];
output_chunk[4] = encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize];
output_chunk[5] = encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize];
output_chunk[6] = encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize];
output_chunk[7] = encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize];
let input_u64 = read_u64(&input_chunk[6..]);
output_chunk[8] = encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize];
output_chunk[9] = encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize];
output_chunk[10] = encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize];
output_chunk[11] = encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize];
output_chunk[12] = encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize];
output_chunk[13] = encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize];
output_chunk[14] = encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize];
output_chunk[15] = encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize];
let input_u64 = read_u64(&input_chunk[12..]);
output_chunk[16] = encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize];
output_chunk[17] = encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize];
output_chunk[18] = encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize];
output_chunk[19] = encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize];
output_chunk[20] = encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize];
output_chunk[21] = encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize];
output_chunk[22] = encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize];
output_chunk[23] = encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize];
let input_u64 = read_u64(&input_chunk[18..]);
output_chunk[24] = encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize];
output_chunk[25] = encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize];
output_chunk[26] = encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize];
output_chunk[27] = encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize];
output_chunk[28] = encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize];
output_chunk[29] = encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize];
output_chunk[30] = encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize];
output_chunk[31] = encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize];
output_index += BLOCKS_PER_FAST_LOOP * 8;
input_index += BLOCKS_PER_FAST_LOOP * 6;
}
}
// Encode what's left after the fast loop.
const LOW_SIX_BITS_U8: u8 = 0x3F;
let rem = input.len() % 3;
let start_of_rem = input.len() - rem;
// start at the first index not handled by fast loop, which may be 0.
while input_index < start_of_rem {
let input_chunk = &input[input_index..(input_index + 3)];
let output_chunk = &mut output[output_index..(output_index + 4)];
output_chunk[0] = encode_table[(input_chunk[0] >> 2) as usize];
output_chunk[1] =
encode_table[((input_chunk[0] << 4 | input_chunk[1] >> 4) & LOW_SIX_BITS_U8) as usize];
output_chunk[2] =
encode_table[((input_chunk[1] << 2 | input_chunk[2] >> 6) & LOW_SIX_BITS_U8) as usize];
output_chunk[3] = encode_table[(input_chunk[2] & LOW_SIX_BITS_U8) as usize];
input_index += 3;
output_index += 4;
}
if rem == 2 {
output[output_index] = encode_table[(input[start_of_rem] >> 2) as usize];
output[output_index + 1] = encode_table[((input[start_of_rem] << 4
| input[start_of_rem + 1] >> 4)
& LOW_SIX_BITS_U8) as usize];
output[output_index + 2] =
encode_table[((input[start_of_rem + 1] << 2) & LOW_SIX_BITS_U8) as usize];
output_index += 3;
} else if rem == 1 {
output[output_index] = encode_table[(input[start_of_rem] >> 2) as usize];
output[output_index + 1] =
encode_table[((input[start_of_rem] << 4) & LOW_SIX_BITS_U8) as usize];
output_index += 2;
}
output_index
}
/// calculate the base64 encoded string size, including padding if appropriate
pub fn encoded_size(bytes_len: usize, config: Config) -> Option<usize> {
let rem = bytes_len % 3;
let complete_input_chunks = bytes_len / 3;
let complete_chunk_output = complete_input_chunks.checked_mul(4);
if rem > 0 {
if config.pad {
complete_chunk_output.and_then(|c| c.checked_add(4))
} else {
let encoded_rem = match rem {
1 => 2,
2 => 3,
_ => unreachable!("Impossible remainder"),
};
complete_chunk_output.and_then(|c| c.checked_add(encoded_rem))
}
} else {
complete_chunk_output
}
}
/// Write padding characters.
/// `output` is the slice where padding should be written, of length at least 2.
///
/// Returns the number of padding bytes written.
pub fn add_padding(input_len: usize, output: &mut [u8]) -> usize {
let rem = input_len % 3;
let mut bytes_written = 0;
for _ in 0..((3 - rem) % 3) {
output[bytes_written] = PAD_BYTE;
bytes_written += 1;
}
bytes_written
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
decode::decode_config_buf,
tests::{assert_encode_sanity, random_config},
Config, STANDARD, URL_SAFE_NO_PAD,
};
use rand::{
distributions::{Distribution, Uniform},
FromEntropy, Rng,
};
use std;
use std::str;
#[test]
fn encoded_size_correct_standard() {
assert_encoded_length(0, 0, STANDARD);
assert_encoded_length(1, 4, STANDARD);
assert_encoded_length(2, 4, STANDARD);
assert_encoded_length(3, 4, STANDARD);
assert_encoded_length(4, 8, STANDARD);
assert_encoded_length(5, 8, STANDARD);
assert_encoded_length(6, 8, STANDARD);
assert_encoded_length(7, 12, STANDARD);
assert_encoded_length(8, 12, STANDARD);
assert_encoded_length(9, 12, STANDARD);
assert_encoded_length(54, 72, STANDARD);
assert_encoded_length(55, 76, STANDARD);
assert_encoded_length(56, 76, STANDARD);
assert_encoded_length(57, 76, STANDARD);
assert_encoded_length(58, 80, STANDARD);
}
#[test]
fn encoded_size_correct_no_pad() {
assert_encoded_length(0, 0, URL_SAFE_NO_PAD);
assert_encoded_length(1, 2, URL_SAFE_NO_PAD);
assert_encoded_length(2, 3, URL_SAFE_NO_PAD);
assert_encoded_length(3, 4, URL_SAFE_NO_PAD);
assert_encoded_length(4, 6, URL_SAFE_NO_PAD);
assert_encoded_length(5, 7, URL_SAFE_NO_PAD);
assert_encoded_length(6, 8, URL_SAFE_NO_PAD);
assert_encoded_length(7, 10, URL_SAFE_NO_PAD);
assert_encoded_length(8, 11, URL_SAFE_NO_PAD);
assert_encoded_length(9, 12, URL_SAFE_NO_PAD);
assert_encoded_length(54, 72, URL_SAFE_NO_PAD);
assert_encoded_length(55, 74, URL_SAFE_NO_PAD);
assert_encoded_length(56, 75, URL_SAFE_NO_PAD);
assert_encoded_length(57, 76, URL_SAFE_NO_PAD);
assert_encoded_length(58, 78, URL_SAFE_NO_PAD);
}
#[test]
fn encoded_size_overflow() {
assert_eq!(None, encoded_size(std::usize::MAX, STANDARD));
}
#[test]
fn encode_config_buf_into_nonempty_buffer_doesnt_clobber_prefix() {
let mut orig_data = Vec::new();
let mut prefix = String::new();
let mut encoded_data_no_prefix = String::new();
let mut encoded_data_with_prefix = String::new();
let mut decoded = Vec::new();
let prefix_len_range = Uniform::new(0, 1000);
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
orig_data.clear();
prefix.clear();
encoded_data_no_prefix.clear();
encoded_data_with_prefix.clear();
decoded.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
orig_data.push(rng.gen());
}
let prefix_len = prefix_len_range.sample(&mut rng);
for _ in 0..prefix_len {
// getting convenient random single-byte printable chars that aren't base64 is
// annoying
prefix.push('#');
}
encoded_data_with_prefix.push_str(&prefix);
let config = random_config(&mut rng);
encode_config_buf(&orig_data, config, &mut encoded_data_no_prefix);
encode_config_buf(&orig_data, config, &mut encoded_data_with_prefix);
assert_eq!(
encoded_data_no_prefix.len() + prefix_len,
encoded_data_with_prefix.len()
);
assert_encode_sanity(&encoded_data_no_prefix, config, input_len);
assert_encode_sanity(&encoded_data_with_prefix[prefix_len..], config, input_len);
// append plain encode onto prefix
prefix.push_str(&mut encoded_data_no_prefix);
assert_eq!(prefix, encoded_data_with_prefix);
decode_config_buf(&encoded_data_no_prefix, config, &mut decoded).unwrap();
assert_eq!(orig_data, decoded);
}
}
#[test]
fn encode_config_slice_into_nonempty_buffer_doesnt_clobber_suffix() {
let mut orig_data = Vec::new();
let mut encoded_data = Vec::new();
let mut encoded_data_original_state = Vec::new();
let mut decoded = Vec::new();
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
orig_data.clear();
encoded_data.clear();
encoded_data_original_state.clear();
decoded.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
orig_data.push(rng.gen());
}
// plenty of existing garbage in the encoded buffer
for _ in 0..10 * input_len {
encoded_data.push(rng.gen());
}
encoded_data_original_state.extend_from_slice(&encoded_data);
let config = random_config(&mut rng);
let encoded_size = encoded_size(input_len, config).unwrap();
assert_eq!(
encoded_size,
encode_config_slice(&orig_data, config, &mut encoded_data)
);
assert_encode_sanity(
std::str::from_utf8(&encoded_data[0..encoded_size]).unwrap(),
config,
input_len,
);
assert_eq!(
&encoded_data[encoded_size..],
&encoded_data_original_state[encoded_size..]
);
decode_config_buf(&encoded_data[0..encoded_size], config, &mut decoded).unwrap();
assert_eq!(orig_data, decoded);
}
}
#[test]
fn encode_config_slice_fits_into_precisely_sized_slice() {
let mut orig_data = Vec::new();
let mut encoded_data = Vec::new();
let mut decoded = Vec::new();
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
orig_data.clear();
encoded_data.clear();
decoded.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
orig_data.push(rng.gen());
}
let config = random_config(&mut rng);
let encoded_size = encoded_size(input_len, config).unwrap();
encoded_data.resize(encoded_size, 0);
assert_eq!(
encoded_size,
encode_config_slice(&orig_data, config, &mut encoded_data)
);
assert_encode_sanity(
std::str::from_utf8(&encoded_data[0..encoded_size]).unwrap(),
config,
input_len,
);
decode_config_buf(&encoded_data[0..encoded_size], config, &mut decoded).unwrap();
assert_eq!(orig_data, decoded);
}
}
#[test]
fn encode_to_slice_random_valid_utf8() {
let mut input = Vec::new();
let mut output = Vec::new();
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
input.clear();
output.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
input.push(rng.gen());
}
let config = random_config(&mut rng);
// fill up the output buffer with garbage
let encoded_size = encoded_size(input_len, config).unwrap();
for _ in 0..encoded_size {
output.push(rng.gen());
}
let orig_output_buf = output.to_vec();
let bytes_written =
encode_to_slice(&input, &mut output, config.char_set.encode_table());
// make sure the part beyond bytes_written is the same garbage it was before
assert_eq!(orig_output_buf[bytes_written..], output[bytes_written..]);
// make sure the encoded bytes are UTF-8
let _ = str::from_utf8(&output[0..bytes_written]).unwrap();
}
}
#[test]
fn encode_with_padding_random_valid_utf8() {
let mut input = Vec::new();
let mut output = Vec::new();
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
input.clear();
output.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
input.push(rng.gen());
}
let config = random_config(&mut rng);
// fill up the output buffer with garbage
let encoded_size = encoded_size(input_len, config).unwrap();
for _ in 0..encoded_size + 1000 {
output.push(rng.gen());
}
let orig_output_buf = output.to_vec();
encode_with_padding(&input, config, encoded_size, &mut output[0..encoded_size]);
// make sure the part beyond b64 is the same garbage it was before
assert_eq!(orig_output_buf[encoded_size..], output[encoded_size..]);
// make sure the encoded bytes are UTF-8
let _ = str::from_utf8(&output[0..encoded_size]).unwrap();
}
}
#[test]
fn add_padding_random_valid_utf8() {
let mut output = Vec::new();
let mut rng = rand::rngs::SmallRng::from_entropy();
// cover our bases for length % 3
for input_len in 0..10 {
output.clear();
// fill output with random
for _ in 0..10 {
output.push(rng.gen());
}
let orig_output_buf = output.to_vec();
let bytes_written = add_padding(input_len, &mut output);
// make sure the part beyond bytes_written is the same garbage it was before
assert_eq!(orig_output_buf[bytes_written..], output[bytes_written..]);
// make sure the encoded bytes are UTF-8
let _ = str::from_utf8(&output[0..bytes_written]).unwrap();
}
}
fn assert_encoded_length(input_len: usize, encoded_len: usize, config: Config) {
assert_eq!(encoded_len, encoded_size(input_len, config).unwrap());
let mut bytes: Vec<u8> = Vec::new();
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..input_len {
bytes.push(rng.gen());
}
let encoded = encode_config(&bytes, config);
assert_encode_sanity(&encoded, config, input_len);
assert_eq!(encoded_len, encoded.len());
}
#[test]
fn encode_imap() {
assert_eq!(
encode_config(b"\xFB\xFF", crate::IMAP_MUTF7),
encode_config(b"\xFB\xFF", crate::STANDARD_NO_PAD).replace("/", ",")
);
}
}

245
zeroidc/vendor/base64/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,245 @@
//! # Configs
//!
//! There isn't just one type of Base64; that would be too simple. You need to choose a character
//! set (standard, URL-safe, etc) and padding suffix (yes/no).
//! The `Config` struct encapsulates this info. There are some common configs included: `STANDARD`,
//! `URL_SAFE`, etc. You can also make your own `Config` if needed.
//!
//! The functions that don't have `config` in the name (e.g. `encode()` and `decode()`) use the
//! `STANDARD` config .
//!
//! The functions that write to a slice (the ones that end in `_slice`) are generally the fastest
//! because they don't need to resize anything. If it fits in your workflow and you care about
//! performance, keep using the same buffer (growing as need be) and use the `_slice` methods for
//! the best performance.
//!
//! # Encoding
//!
//! Several different encoding functions are available to you depending on your desire for
//! convenience vs performance.
//!
//! | Function | Output | Allocates |
//! | ----------------------- | ---------------------------- | ------------------------------ |
//! | `encode` | Returns a new `String` | Always |
//! | `encode_config` | Returns a new `String` | Always |
//! | `encode_config_buf` | Appends to provided `String` | Only if `String` needs to grow |
//! | `encode_config_slice` | Writes to provided `&[u8]` | Never |
//!
//! All of the encoding functions that take a `Config` will pad as per the config.
//!
//! # Decoding
//!
//! Just as for encoding, there are different decoding functions available.
//!
//! | Function | Output | Allocates |
//! | ----------------------- | ----------------------------- | ------------------------------ |
//! | `decode` | Returns a new `Vec<u8>` | Always |
//! | `decode_config` | Returns a new `Vec<u8>` | Always |
//! | `decode_config_buf` | Appends to provided `Vec<u8>` | Only if `Vec` needs to grow |
//! | `decode_config_slice` | Writes to provided `&[u8]` | Never |
//!
//! Unlike encoding, where all possible input is valid, decoding can fail (see `DecodeError`).
//!
//! Input can be invalid because it has invalid characters or invalid padding. (No padding at all is
//! valid, but excess padding is not.) Whitespace in the input is invalid.
//!
//! # `Read` and `Write`
//!
//! To map a `Read` of b64 bytes to the decoded bytes, wrap a reader (file, network socket, etc)
//! with `base64::read::DecoderReader`. To write raw bytes and have them b64 encoded on the fly,
//! wrap a writer with `base64::write::EncoderWriter`. There is some performance overhead (15% or
//! so) because of the necessary buffer shuffling -- still fast enough that almost nobody cares.
//! Also, these implementations do not heap allocate.
//!
//! # Panics
//!
//! If length calculations result in overflowing `usize`, a panic will result.
//!
//! The `_slice` flavors of encode or decode will panic if the provided output slice is too small,
#![cfg_attr(feature = "cargo-clippy", allow(clippy::cast_lossless))]
#![deny(
missing_docs,
trivial_casts,
trivial_numeric_casts,
unused_extern_crates,
unused_import_braces,
unused_results,
variant_size_differences,
warnings
)]
#![forbid(unsafe_code)]
#![cfg_attr(not(any(feature = "std", test)), no_std)]
#[cfg(all(feature = "alloc", not(any(feature = "std", test))))]
extern crate alloc;
#[cfg(any(feature = "std", test))]
extern crate std as alloc;
mod chunked_encoder;
pub mod display;
#[cfg(any(feature = "std", test))]
pub mod read;
mod tables;
#[cfg(any(feature = "std", test))]
pub mod write;
mod encode;
pub use crate::encode::encode_config_slice;
#[cfg(any(feature = "alloc", feature = "std", test))]
pub use crate::encode::{encode, encode_config, encode_config_buf};
mod decode;
#[cfg(any(feature = "alloc", feature = "std", test))]
pub use crate::decode::{decode, decode_config, decode_config_buf};
pub use crate::decode::{decode_config_slice, DecodeError};
#[cfg(test)]
mod tests;
/// Available encoding character sets
#[derive(Clone, Copy, Debug)]
pub enum CharacterSet {
/// The standard character set (uses `+` and `/`).
///
/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3).
Standard,
/// The URL safe character set (uses `-` and `_`).
///
/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4).
UrlSafe,
/// The `crypt(3)` character set (uses `./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz`).
///
/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
Crypt,
/// The bcrypt character set (uses `./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`).
Bcrypt,
/// The character set used in IMAP-modified UTF-7 (uses `+` and `,`).
///
/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
ImapMutf7,
/// The character set used in BinHex 4.0 files.
///
/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
BinHex,
}
impl CharacterSet {
fn encode_table(self) -> &'static [u8; 64] {
match self {
CharacterSet::Standard => tables::STANDARD_ENCODE,
CharacterSet::UrlSafe => tables::URL_SAFE_ENCODE,
CharacterSet::Crypt => tables::CRYPT_ENCODE,
CharacterSet::Bcrypt => tables::BCRYPT_ENCODE,
CharacterSet::ImapMutf7 => tables::IMAP_MUTF7_ENCODE,
CharacterSet::BinHex => tables::BINHEX_ENCODE,
}
}
fn decode_table(self) -> &'static [u8; 256] {
match self {
CharacterSet::Standard => tables::STANDARD_DECODE,
CharacterSet::UrlSafe => tables::URL_SAFE_DECODE,
CharacterSet::Crypt => tables::CRYPT_DECODE,
CharacterSet::Bcrypt => tables::BCRYPT_DECODE,
CharacterSet::ImapMutf7 => tables::IMAP_MUTF7_DECODE,
CharacterSet::BinHex => tables::BINHEX_DECODE,
}
}
}
/// Contains configuration parameters for base64 encoding
#[derive(Clone, Copy, Debug)]
pub struct Config {
/// Character set to use
char_set: CharacterSet,
/// True to pad output with `=` characters
pad: bool,
/// True to ignore excess nonzero bits in the last few symbols, otherwise an error is returned.
decode_allow_trailing_bits: bool,
}
impl Config {
/// Create a new `Config`.
pub const fn new(char_set: CharacterSet, pad: bool) -> Config {
Config {
char_set,
pad,
decode_allow_trailing_bits: false,
}
}
/// Sets whether to pad output with `=` characters.
pub const fn pad(self, pad: bool) -> Config {
Config { pad, ..self }
}
/// Sets whether to emit errors for nonzero trailing bits.
///
/// This is useful when implementing
/// [forgiving-base64 decode](https://infra.spec.whatwg.org/#forgiving-base64-decode).
pub const fn decode_allow_trailing_bits(self, allow: bool) -> Config {
Config {
decode_allow_trailing_bits: allow,
..self
}
}
}
/// Standard character set with padding.
pub const STANDARD: Config = Config {
char_set: CharacterSet::Standard,
pad: true,
decode_allow_trailing_bits: false,
};
/// Standard character set without padding.
pub const STANDARD_NO_PAD: Config = Config {
char_set: CharacterSet::Standard,
pad: false,
decode_allow_trailing_bits: false,
};
/// URL-safe character set with padding
pub const URL_SAFE: Config = Config {
char_set: CharacterSet::UrlSafe,
pad: true,
decode_allow_trailing_bits: false,
};
/// URL-safe character set without padding
pub const URL_SAFE_NO_PAD: Config = Config {
char_set: CharacterSet::UrlSafe,
pad: false,
decode_allow_trailing_bits: false,
};
/// As per `crypt(3)` requirements
pub const CRYPT: Config = Config {
char_set: CharacterSet::Crypt,
pad: false,
decode_allow_trailing_bits: false,
};
/// Bcrypt character set
pub const BCRYPT: Config = Config {
char_set: CharacterSet::Bcrypt,
pad: false,
decode_allow_trailing_bits: false,
};
/// IMAP modified UTF-7 requirements
pub const IMAP_MUTF7: Config = Config {
char_set: CharacterSet::ImapMutf7,
pad: false,
decode_allow_trailing_bits: false,
};
/// BinHex character set
pub const BINHEX: Config = Config {
char_set: CharacterSet::BinHex,
pad: false,
decode_allow_trailing_bits: false,
};
const PAD_BYTE: u8 = b'=';

View File

@@ -0,0 +1,282 @@
use crate::{decode_config_slice, Config, DecodeError};
use std::io::Read;
use std::{cmp, fmt, io};
// This should be large, but it has to fit on the stack.
pub(crate) const BUF_SIZE: usize = 1024;
// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
const BASE64_CHUNK_SIZE: usize = 4;
const DECODED_CHUNK_SIZE: usize = 3;
/// A `Read` implementation that decodes base64 data read from an underlying reader.
///
/// # Examples
///
/// ```
/// use std::io::Read;
/// use std::io::Cursor;
///
/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
/// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
/// let mut decoder = base64::read::DecoderReader::new(
/// &mut wrapped_reader, base64::STANDARD);
///
/// // handle errors as you normally would
/// let mut result = Vec::new();
/// decoder.read_to_end(&mut result).unwrap();
///
/// assert_eq!(b"asdf", &result[..]);
///
/// ```
pub struct DecoderReader<'a, R: 'a + io::Read> {
config: Config,
/// Where b64 data is read from
r: &'a mut R,
// Holds b64 data read from the delegate reader.
b64_buffer: [u8; BUF_SIZE],
// The start of the pending buffered data in b64_buffer.
b64_offset: usize,
// The amount of buffered b64 data.
b64_len: usize,
// Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
// decoded chunk in to, we have to be able to hang on to a few decoded bytes.
// Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to
// decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
// into here, which seems like a lot of complexity for 1 extra byte of storage.
decoded_buffer: [u8; 3],
// index of start of decoded data
decoded_offset: usize,
// length of decoded data
decoded_len: usize,
// used to provide accurate offsets in errors
total_b64_decoded: usize,
}
impl<'a, R: io::Read> fmt::Debug for DecoderReader<'a, R> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("DecoderReader")
.field("config", &self.config)
.field("b64_offset", &self.b64_offset)
.field("b64_len", &self.b64_len)
.field("decoded_buffer", &self.decoded_buffer)
.field("decoded_offset", &self.decoded_offset)
.field("decoded_len", &self.decoded_len)
.field("total_b64_decoded", &self.total_b64_decoded)
.finish()
}
}
impl<'a, R: io::Read> DecoderReader<'a, R> {
/// Create a new decoder that will read from the provided reader `r`.
pub fn new(r: &'a mut R, config: Config) -> Self {
DecoderReader {
config,
r,
b64_buffer: [0; BUF_SIZE],
b64_offset: 0,
b64_len: 0,
decoded_buffer: [0; DECODED_CHUNK_SIZE],
decoded_offset: 0,
decoded_len: 0,
total_b64_decoded: 0,
}
}
/// Write as much as possible of the decoded buffer into the target buffer.
/// Must only be called when there is something to write and space to write into.
/// Returns a Result with the number of (decoded) bytes copied.
fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
debug_assert!(self.decoded_len > 0);
debug_assert!(buf.len() > 0);
let copy_len = cmp::min(self.decoded_len, buf.len());
debug_assert!(copy_len > 0);
debug_assert!(copy_len <= self.decoded_len);
buf[..copy_len].copy_from_slice(
&self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len],
);
self.decoded_offset += copy_len;
self.decoded_len -= copy_len;
debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
Ok(copy_len)
}
/// Read into the remaining space in the buffer after the current contents.
/// Must only be called when there is space to read into in the buffer.
/// Returns the number of bytes read.
fn read_from_delegate(&mut self) -> io::Result<usize> {
debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);
let read = self
.r
.read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
self.b64_len += read;
debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
return Ok(read);
}
/// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
/// caller's responsibility to choose the number of b64 bytes to decode correctly.
///
/// Returns a Result with the number of decoded bytes written to `buf`.
fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> {
debug_assert!(self.b64_len >= num_bytes);
debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
debug_assert!(buf.len() > 0);
let decoded = decode_config_slice(
&self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes],
self.config,
&mut buf[..],
)
.map_err(|e| match e {
DecodeError::InvalidByte(offset, byte) => {
DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
}
DecodeError::InvalidLength => DecodeError::InvalidLength,
DecodeError::InvalidLastSymbol(offset, byte) => {
DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte)
}
})
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
self.total_b64_decoded += num_bytes;
self.b64_offset += num_bytes;
self.b64_len -= num_bytes;
debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
Ok(decoded)
}
}
impl<'a, R: Read> Read for DecoderReader<'a, R> {
/// Decode input from the wrapped reader.
///
/// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
/// written in `buf`.
///
/// Where possible, this function buffers base64 to minimize the number of read() calls to the
/// delegate reader.
///
/// # Errors
///
/// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
/// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if buf.len() == 0 {
return Ok(0);
}
// offset == BUF_SIZE when we copied it all last time
debug_assert!(self.b64_offset <= BUF_SIZE);
debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
debug_assert!(if self.b64_offset == BUF_SIZE {
self.b64_len == 0
} else {
self.b64_len <= BUF_SIZE
});
debug_assert!(if self.decoded_len == 0 {
// can be = when we were able to copy the complete chunk
self.decoded_offset <= DECODED_CHUNK_SIZE
} else {
self.decoded_offset < DECODED_CHUNK_SIZE
});
// We shouldn't ever decode into here when we can't immediately write at least one byte into
// the provided buf, so the effective length should only be 3 momentarily between when we
// decode and when we copy into the target buffer.
debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
if self.decoded_len > 0 {
// we have a few leftover decoded bytes; flush that rather than pull in more b64
self.flush_decoded_buf(buf)
} else {
let mut at_eof = false;
while self.b64_len < BASE64_CHUNK_SIZE {
// Work around lack of copy_within, which is only present in 1.37
// Copy any bytes we have to the start of the buffer.
// We know we have < 1 chunk, so we can use a tiny tmp buffer.
let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE];
memmove_buf[..self.b64_len].copy_from_slice(
&self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len],
);
self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]);
self.b64_offset = 0;
// then fill in more data
let read = self.read_from_delegate()?;
if read == 0 {
// we never pass in an empty buf, so 0 => we've hit EOF
at_eof = true;
break;
}
}
if self.b64_len == 0 {
debug_assert!(at_eof);
// we must be at EOF, and we have no data left to decode
return Ok(0);
};
debug_assert!(if at_eof {
// if we are at eof, we may not have a complete chunk
self.b64_len > 0
} else {
// otherwise, we must have at least one chunk
self.b64_len >= BASE64_CHUNK_SIZE
});
debug_assert_eq!(0, self.decoded_len);
if buf.len() < DECODED_CHUNK_SIZE {
// caller requested an annoyingly short read
// have to write to a tmp buf first to avoid double mutable borrow
let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE];
// if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
// to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
// tokens, not 1, since 1 token can't decode to 1 byte).
let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);
let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);
self.decoded_offset = 0;
self.decoded_len = decoded;
// can be less than 3 on last block due to padding
debug_assert!(decoded <= 3);
self.flush_decoded_buf(buf)
} else {
let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
.checked_mul(BASE64_CHUNK_SIZE)
.expect("too many chunks");
debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);
let b64_bytes_available_to_decode = if at_eof {
self.b64_len
} else {
// only use complete chunks
self.b64_len - self.b64_len % 4
};
let actual_decode_len = cmp::min(
b64_bytes_that_can_decode_into_buf,
b64_bytes_available_to_decode,
);
self.decode_to_buf(actual_decode_len, buf)
}
}
}
}

View File

@@ -0,0 +1,335 @@
use std::io::{self, Read};
use rand::{Rng, RngCore};
use std::{cmp, iter};
use super::decoder::{DecoderReader, BUF_SIZE};
use crate::encode::encode_config_buf;
use crate::tests::random_config;
use crate::{decode_config_buf, DecodeError, STANDARD};
#[test]
fn simple() {
let tests: &[(&[u8], &[u8])] = &[
(&b"0"[..], &b"MA=="[..]),
(b"01", b"MDE="),
(b"012", b"MDEy"),
(b"0123", b"MDEyMw=="),
(b"01234", b"MDEyMzQ="),
(b"012345", b"MDEyMzQ1"),
(b"0123456", b"MDEyMzQ1Ng=="),
(b"01234567", b"MDEyMzQ1Njc="),
(b"012345678", b"MDEyMzQ1Njc4"),
(b"0123456789", b"MDEyMzQ1Njc4OQ=="),
][..];
for (text_expected, base64data) in tests.iter() {
// Read n bytes at a time.
for n in 1..base64data.len() + 1 {
let mut wrapped_reader = io::Cursor::new(base64data);
let mut decoder = DecoderReader::new(&mut wrapped_reader, STANDARD);
// handle errors as you normally would
let mut text_got = Vec::new();
let mut buffer = vec![0u8; n];
while let Ok(read) = decoder.read(&mut buffer[..]) {
if read == 0 {
break;
}
text_got.extend_from_slice(&buffer[..read]);
}
assert_eq!(
text_got,
*text_expected,
"\nGot: {}\nExpected: {}",
String::from_utf8_lossy(&text_got[..]),
String::from_utf8_lossy(text_expected)
);
}
}
}
// Make sure we error out on trailing junk.
#[test]
fn trailing_junk() {
let tests: &[&[u8]] = &[&b"MDEyMzQ1Njc4*!@#$%^&"[..], b"MDEyMzQ1Njc4OQ== "][..];
for base64data in tests.iter() {
// Read n bytes at a time.
for n in 1..base64data.len() + 1 {
let mut wrapped_reader = io::Cursor::new(base64data);
let mut decoder = DecoderReader::new(&mut wrapped_reader, STANDARD);
// handle errors as you normally would
let mut buffer = vec![0u8; n];
let mut saw_error = false;
loop {
match decoder.read(&mut buffer[..]) {
Err(_) => {
saw_error = true;
break;
}
Ok(read) if read == 0 => break,
Ok(_) => (),
}
}
assert!(saw_error);
}
}
}
#[test]
fn handles_short_read_from_delegate() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
let mut decoded = Vec::new();
for _ in 0..10_000 {
bytes.clear();
b64.clear();
decoded.clear();
let size = rng.gen_range(0, 10 * BUF_SIZE);
bytes.extend(iter::repeat(0).take(size));
bytes.truncate(size);
rng.fill_bytes(&mut bytes[..size]);
assert_eq!(size, bytes.len());
let config = random_config(&mut rng);
encode_config_buf(&bytes[..], config, &mut b64);
let mut wrapped_reader = io::Cursor::new(b64.as_bytes());
let mut short_reader = RandomShortRead {
delegate: &mut wrapped_reader,
rng: &mut rng,
};
let mut decoder = DecoderReader::new(&mut short_reader, config);
let decoded_len = decoder.read_to_end(&mut decoded).unwrap();
assert_eq!(size, decoded_len);
assert_eq!(&bytes[..], &decoded[..]);
}
}
#[test]
fn read_in_short_increments() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
let mut decoded = Vec::new();
for _ in 0..10_000 {
bytes.clear();
b64.clear();
decoded.clear();
let size = rng.gen_range(0, 10 * BUF_SIZE);
bytes.extend(iter::repeat(0).take(size));
// leave room to play around with larger buffers
decoded.extend(iter::repeat(0).take(size * 3));
rng.fill_bytes(&mut bytes[..]);
assert_eq!(size, bytes.len());
let config = random_config(&mut rng);
encode_config_buf(&bytes[..], config, &mut b64);
let mut wrapped_reader = io::Cursor::new(&b64[..]);
let mut decoder = DecoderReader::new(&mut wrapped_reader, config);
consume_with_short_reads_and_validate(&mut rng, &bytes[..], &mut decoded, &mut decoder);
}
}
#[test]
fn read_in_short_increments_with_short_delegate_reads() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
let mut decoded = Vec::new();
for _ in 0..10_000 {
bytes.clear();
b64.clear();
decoded.clear();
let size = rng.gen_range(0, 10 * BUF_SIZE);
bytes.extend(iter::repeat(0).take(size));
// leave room to play around with larger buffers
decoded.extend(iter::repeat(0).take(size * 3));
rng.fill_bytes(&mut bytes[..]);
assert_eq!(size, bytes.len());
let config = random_config(&mut rng);
encode_config_buf(&bytes[..], config, &mut b64);
let mut base_reader = io::Cursor::new(&b64[..]);
let mut decoder = DecoderReader::new(&mut base_reader, config);
let mut short_reader = RandomShortRead {
delegate: &mut decoder,
rng: &mut rand::thread_rng(),
};
consume_with_short_reads_and_validate(&mut rng, &bytes[..], &mut decoded, &mut short_reader)
}
}
#[test]
fn reports_invalid_last_symbol_correctly() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
let mut b64_bytes = Vec::new();
let mut decoded = Vec::new();
let mut bulk_decoded = Vec::new();
for _ in 0..1_000 {
bytes.clear();
b64.clear();
b64_bytes.clear();
let size = rng.gen_range(1, 10 * BUF_SIZE);
bytes.extend(iter::repeat(0).take(size));
decoded.extend(iter::repeat(0).take(size));
rng.fill_bytes(&mut bytes[..]);
assert_eq!(size, bytes.len());
let mut config = random_config(&mut rng);
// changing padding will cause invalid padding errors when we twiddle the last byte
config.pad = false;
encode_config_buf(&bytes[..], config, &mut b64);
b64_bytes.extend(b64.bytes());
assert_eq!(b64_bytes.len(), b64.len());
// change the last character to every possible symbol. Should behave the same as bulk
// decoding whether invalid or valid.
for &s1 in config.char_set.encode_table().iter() {
decoded.clear();
bulk_decoded.clear();
// replace the last
*b64_bytes.last_mut().unwrap() = s1;
let bulk_res = decode_config_buf(&b64_bytes[..], config, &mut bulk_decoded);
let mut wrapped_reader = io::Cursor::new(&b64_bytes[..]);
let mut decoder = DecoderReader::new(&mut wrapped_reader, config);
let stream_res = decoder.read_to_end(&mut decoded).map(|_| ()).map_err(|e| {
e.into_inner()
.and_then(|e| e.downcast::<DecodeError>().ok())
});
assert_eq!(bulk_res.map_err(|e| Some(Box::new(e))), stream_res);
}
}
}
#[test]
fn reports_invalid_byte_correctly() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
let mut decoded = Vec::new();
for _ in 0..10_000 {
bytes.clear();
b64.clear();
decoded.clear();
let size = rng.gen_range(1, 10 * BUF_SIZE);
bytes.extend(iter::repeat(0).take(size));
rng.fill_bytes(&mut bytes[..size]);
assert_eq!(size, bytes.len());
let config = random_config(&mut rng);
encode_config_buf(&bytes[..], config, &mut b64);
// replace one byte, somewhere, with '*', which is invalid
let bad_byte_pos = rng.gen_range(0, &b64.len());
let mut b64_bytes = b64.bytes().collect::<Vec<u8>>();
b64_bytes[bad_byte_pos] = b'*';
let mut wrapped_reader = io::Cursor::new(b64_bytes.clone());
let mut decoder = DecoderReader::new(&mut wrapped_reader, config);
// some gymnastics to avoid double-moving the io::Error, which is not Copy
let read_decode_err = decoder
.read_to_end(&mut decoded)
.map_err(|e| {
let kind = e.kind();
let inner = e
.into_inner()
.and_then(|e| e.downcast::<DecodeError>().ok());
inner.map(|i| (*i, kind))
})
.err()
.and_then(|o| o);
let mut bulk_buf = Vec::new();
let bulk_decode_err = decode_config_buf(&b64_bytes[..], config, &mut bulk_buf).err();
// it's tricky to predict where the invalid data's offset will be since if it's in the last
// chunk it will be reported at the first padding location because it's treated as invalid
// padding. So, we just check that it's the same as it is for decoding all at once.
assert_eq!(
bulk_decode_err.map(|e| (e, io::ErrorKind::InvalidData)),
read_decode_err
);
}
}
fn consume_with_short_reads_and_validate<R: Read>(
rng: &mut rand::rngs::ThreadRng,
expected_bytes: &[u8],
decoded: &mut Vec<u8>,
short_reader: &mut R,
) -> () {
let mut total_read = 0_usize;
loop {
assert!(
total_read <= expected_bytes.len(),
"tr {} size {}",
total_read,
expected_bytes.len()
);
if total_read == expected_bytes.len() {
assert_eq!(expected_bytes, &decoded[..total_read]);
// should be done
assert_eq!(0, short_reader.read(&mut decoded[..]).unwrap());
// didn't write anything
assert_eq!(expected_bytes, &decoded[..total_read]);
break;
}
let decode_len = rng.gen_range(1, cmp::max(2, expected_bytes.len() * 2));
let read = short_reader
.read(&mut decoded[total_read..total_read + decode_len])
.unwrap();
total_read += read;
}
}
/// Limits how many bytes a reader will provide in each read call.
/// Useful for shaking out code that may work fine only with typical input sources that always fill
/// the buffer.
struct RandomShortRead<'a, 'b, R: io::Read, N: rand::Rng> {
delegate: &'b mut R,
rng: &'a mut N,
}
impl<'a, 'b, R: io::Read, N: rand::Rng> io::Read for RandomShortRead<'a, 'b, R, N> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize, io::Error> {
// avoid 0 since it means EOF for non-empty buffers
let effective_len = cmp::min(self.rng.gen_range(1, 20), buf.len());
self.delegate.read(&mut buf[..effective_len])
}
}

6
zeroidc/vendor/base64/src/read/mod.rs vendored Normal file
View File

@@ -0,0 +1,6 @@
//! Implementations of `io::Read` to transparently decode base64.
mod decoder;
pub use self::decoder::DecoderReader;
#[cfg(test)]
mod decoder_tests;

1957
zeroidc/vendor/base64/src/tables.rs vendored Normal file

File diff suppressed because it is too large Load Diff

81
zeroidc/vendor/base64/src/tests.rs vendored Normal file
View File

@@ -0,0 +1,81 @@
use crate::{decode_config, encode::encoded_size, encode_config_buf, CharacterSet, Config};
use std::str;
use rand::{
distributions::{Distribution, Uniform},
seq::SliceRandom,
FromEntropy, Rng,
};
#[test]
fn roundtrip_random_config_short() {
// exercise the slower encode/decode routines that operate on shorter buffers more vigorously
roundtrip_random_config(Uniform::new(0, 50), 10_000);
}
#[test]
fn roundtrip_random_config_long() {
roundtrip_random_config(Uniform::new(0, 1000), 10_000);
}
pub fn assert_encode_sanity(encoded: &str, config: Config, input_len: usize) {
let input_rem = input_len % 3;
let expected_padding_len = if input_rem > 0 {
if config.pad {
3 - input_rem
} else {
0
}
} else {
0
};
let expected_encoded_len = encoded_size(input_len, config).unwrap();
assert_eq!(expected_encoded_len, encoded.len());
let padding_len = encoded.chars().filter(|&c| c == '=').count();
assert_eq!(expected_padding_len, padding_len);
let _ = str::from_utf8(encoded.as_bytes()).expect("Base64 should be valid utf8");
}
fn roundtrip_random_config(input_len_range: Uniform<usize>, iterations: u32) {
let mut input_buf: Vec<u8> = Vec::new();
let mut encoded_buf = String::new();
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..iterations {
input_buf.clear();
encoded_buf.clear();
let input_len = input_len_range.sample(&mut rng);
let config = random_config(&mut rng);
for _ in 0..input_len {
input_buf.push(rng.gen());
}
encode_config_buf(&input_buf, config, &mut encoded_buf);
assert_encode_sanity(&encoded_buf, config, input_len);
assert_eq!(input_buf, decode_config(&encoded_buf, config).unwrap());
}
}
pub fn random_config<R: Rng>(rng: &mut R) -> Config {
const CHARSETS: &[CharacterSet] = &[
CharacterSet::UrlSafe,
CharacterSet::Standard,
CharacterSet::Crypt,
CharacterSet::ImapMutf7,
CharacterSet::BinHex,
];
let charset = *CHARSETS.choose(rng).unwrap();
Config::new(charset, rng.gen())
}

View File

@@ -0,0 +1,381 @@
use crate::encode::encode_to_slice;
use crate::{encode_config_slice, Config};
use std::{
cmp, fmt,
io::{ErrorKind, Result, Write},
};
pub(crate) const BUF_SIZE: usize = 1024;
/// The most bytes whose encoding will fit in `BUF_SIZE`
const MAX_INPUT_LEN: usize = BUF_SIZE / 4 * 3;
// 3 bytes of input = 4 bytes of base64, always (because we don't allow line wrapping)
const MIN_ENCODE_CHUNK_SIZE: usize = 3;
/// A `Write` implementation that base64 encodes data before delegating to the wrapped writer.
///
/// Because base64 has special handling for the end of the input data (padding, etc), there's a
/// `finish()` method on this type that encodes any leftover input bytes and adds padding if
/// appropriate. It's called automatically when deallocated (see the `Drop` implementation), but
/// any error that occurs when invoking the underlying writer will be suppressed. If you want to
/// handle such errors, call `finish()` yourself.
///
/// # Examples
///
/// ```
/// use std::io::Write;
///
/// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc.
/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), base64::STANDARD);
///
/// // handle errors as you normally would
/// enc.write_all(b"asdf").unwrap();
///
/// // could leave this out to be called by Drop, if you don't care
/// // about handling errors or getting the delegate writer back
/// let delegate = enc.finish().unwrap();
///
/// // base64 was written to the writer
/// assert_eq!(b"YXNkZg==", &delegate[..]);
///
/// ```
///
/// # Panics
///
/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
/// error is invalid and will panic.
///
/// # Errors
///
/// Base64 encoding itself does not generate errors, but errors from the wrapped writer will be
/// returned as per the contract of `Write`.
///
/// # Performance
///
/// It has some minor performance loss compared to encoding slices (a couple percent).
/// It does not do any heap allocation.
pub struct EncoderWriter<W: Write> {
config: Config,
/// Where encoded data is written to. It's an Option as it's None immediately before Drop is
/// called so that finish() can return the underlying writer. None implies that finish() has
/// been called successfully.
delegate: Option<W>,
/// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk
/// with the next `write()`, encode it, then proceed with the rest of the input normally.
extra_input: [u8; MIN_ENCODE_CHUNK_SIZE],
/// How much of `extra` is occupied, in `[0, MIN_ENCODE_CHUNK_SIZE]`.
extra_input_occupied_len: usize,
/// Buffer to encode into. May hold leftover encoded bytes from a previous write call that the underlying writer
/// did not write last time.
output: [u8; BUF_SIZE],
/// How much of `output` is occupied with encoded data that couldn't be written last time
output_occupied_len: usize,
/// panic safety: don't write again in destructor if writer panicked while we were writing to it
panicked: bool,
}
impl<W: Write> fmt::Debug for EncoderWriter<W> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"extra_input: {:?} extra_input_occupied_len:{:?} output[..5]: {:?} output_occupied_len: {:?}",
self.extra_input,
self.extra_input_occupied_len,
&self.output[0..5],
self.output_occupied_len
)
}
}
impl<W: Write> EncoderWriter<W> {
/// Create a new encoder that will write to the provided delegate writer `w`.
pub fn new(w: W, config: Config) -> EncoderWriter<W> {
EncoderWriter {
config,
delegate: Some(w),
extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE],
extra_input_occupied_len: 0,
output: [0u8; BUF_SIZE],
output_occupied_len: 0,
panicked: false,
}
}
/// Encode all remaining buffered data and write it, including any trailing incomplete input
/// triples and associated padding.
///
/// Once this succeeds, no further writes or calls to this method are allowed.
///
/// This may write to the delegate writer multiple times if the delegate writer does not accept
/// all input provided to its `write` each invocation.
///
/// If you don't care about error handling, it is not necessary to call this function, as the
/// equivalent finalization is done by the Drop impl.
///
/// Returns the writer that this was constructed around.
///
/// # Errors
///
/// The first error that is not of `ErrorKind::Interrupted` will be returned.
pub fn finish(&mut self) -> Result<W> {
// If we could consume self in finish(), we wouldn't have to worry about this case, but
// finish() is retryable in the face of I/O errors, so we can't consume here.
if self.delegate.is_none() {
panic!("Encoder has already had finish() called")
};
self.write_final_leftovers()?;
let writer = self.delegate.take().expect("Writer must be present");
Ok(writer)
}
/// Write any remaining buffered data to the delegate writer.
fn write_final_leftovers(&mut self) -> Result<()> {
if self.delegate.is_none() {
// finish() has already successfully called this, and we are now in drop() with a None
// writer, so just no-op
return Ok(());
}
self.write_all_encoded_output()?;
if self.extra_input_occupied_len > 0 {
let encoded_len = encode_config_slice(
&self.extra_input[..self.extra_input_occupied_len],
self.config,
&mut self.output[..],
);
self.output_occupied_len = encoded_len;
self.write_all_encoded_output()?;
// write succeeded, do not write the encoding of extra again if finish() is retried
self.extra_input_occupied_len = 0;
}
Ok(())
}
/// Write as much of the encoded output to the delegate writer as it will accept, and store the
/// leftovers to be attempted at the next write() call. Updates `self.output_occupied_len`.
///
/// # Errors
///
/// Errors from the delegate writer are returned. In the case of an error,
/// `self.output_occupied_len` will not be updated, as errors from `write` are specified to mean
/// that no write took place.
fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> {
self.panicked = true;
let res = self
.delegate
.as_mut()
.expect("Writer must be present")
.write(&self.output[..current_output_len]);
self.panicked = false;
res.map(|consumed| {
debug_assert!(consumed <= current_output_len);
if consumed < current_output_len {
self.output_occupied_len = current_output_len.checked_sub(consumed).unwrap();
// If we're blocking on I/O, the minor inefficiency of copying bytes to the
// start of the buffer is the least of our concerns...
// Rotate moves more than we need to, but copy_within isn't stabilized yet.
self.output.rotate_left(consumed);
} else {
self.output_occupied_len = 0;
}
})
}
/// Write all buffered encoded output. If this returns `Ok`, `self.output_occupied_len` is `0`.
///
/// This is basically write_all for the remaining buffered data but without the undesirable
/// abort-on-`Ok(0)` behavior.
///
/// # Errors
///
/// Any error emitted by the delegate writer abort the write loop and is returned, unless it's
/// `Interrupted`, in which case the error is ignored and writes will continue.
fn write_all_encoded_output(&mut self) -> Result<()> {
while self.output_occupied_len > 0 {
let remaining_len = self.output_occupied_len;
match self.write_to_delegate(remaining_len) {
// try again on interrupts ala write_all
Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
// other errors return
Err(e) => return Err(e),
// success no-ops because remaining length is already updated
Ok(_) => {}
};
}
debug_assert_eq!(0, self.output_occupied_len);
Ok(())
}
}
impl<W: Write> Write for EncoderWriter<W> {
/// Encode input and then write to the delegate writer.
///
/// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
/// of `input` consumed. The value may be `0`, which interacts poorly with `write_all`, which
/// interprets `Ok(0)` as an error, despite it being allowed by the contract of `write`. See
/// https://github.com/rust-lang/rust/issues/56889 for more on that.
///
/// If the previous call to `write` provided more (encoded) data than the delegate writer could
/// accept in a single call to its `write`, the remaining data is buffered. As long as buffered
/// data is present, subsequent calls to `write` will try to write the remaining buffered data
/// to the delegate and return either `Ok(0)` -- and therefore not consume any of `input` -- or
/// an error.
///
/// # Errors
///
/// Any errors emitted by the delegate writer are returned.
fn write(&mut self, input: &[u8]) -> Result<usize> {
if self.delegate.is_none() {
panic!("Cannot write more after calling finish()");
}
if input.is_empty() {
return Ok(0);
}
// The contract of `Write::write` places some constraints on this implementation:
// - a call to `write()` represents at most one call to a wrapped `Write`, so we can't
// iterate over the input and encode multiple chunks.
// - Errors mean that "no bytes were written to this writer", so we need to reset the
// internal state to what it was before the error occurred
// before reading any input, write any leftover encoded output from last time
if self.output_occupied_len > 0 {
let current_len = self.output_occupied_len;
return self
.write_to_delegate(current_len)
// did not read any input
.map(|_| 0);
}
debug_assert_eq!(0, self.output_occupied_len);
// how many bytes, if any, were read into `extra` to create a triple to encode
let mut extra_input_read_len = 0;
let mut input = input;
let orig_extra_len = self.extra_input_occupied_len;
let mut encoded_size = 0;
// always a multiple of MIN_ENCODE_CHUNK_SIZE
let mut max_input_len = MAX_INPUT_LEN;
// process leftover un-encoded input from last write
if self.extra_input_occupied_len > 0 {
debug_assert!(self.extra_input_occupied_len < 3);
if input.len() + self.extra_input_occupied_len >= MIN_ENCODE_CHUNK_SIZE {
// Fill up `extra`, encode that into `output`, and consume as much of the rest of
// `input` as possible.
// We could write just the encoding of `extra` by itself but then we'd have to
// return after writing only 4 bytes, which is inefficient if the underlying writer
// would make a syscall.
extra_input_read_len = MIN_ENCODE_CHUNK_SIZE - self.extra_input_occupied_len;
debug_assert!(extra_input_read_len > 0);
// overwrite only bytes that weren't already used. If we need to rollback extra_len
// (when the subsequent write errors), the old leading bytes will still be there.
self.extra_input[self.extra_input_occupied_len..MIN_ENCODE_CHUNK_SIZE]
.copy_from_slice(&input[0..extra_input_read_len]);
let len = encode_to_slice(
&self.extra_input[0..MIN_ENCODE_CHUNK_SIZE],
&mut self.output[..],
self.config.char_set.encode_table(),
);
debug_assert_eq!(4, len);
input = &input[extra_input_read_len..];
// consider extra to be used up, since we encoded it
self.extra_input_occupied_len = 0;
// don't clobber where we just encoded to
encoded_size = 4;
// and don't read more than can be encoded
max_input_len = MAX_INPUT_LEN - MIN_ENCODE_CHUNK_SIZE;
// fall through to normal encoding
} else {
// `extra` and `input` are non empty, but `|extra| + |input| < 3`, so there must be
// 1 byte in each.
debug_assert_eq!(1, input.len());
debug_assert_eq!(1, self.extra_input_occupied_len);
self.extra_input[self.extra_input_occupied_len] = input[0];
self.extra_input_occupied_len += 1;
return Ok(1);
};
} else if input.len() < MIN_ENCODE_CHUNK_SIZE {
// `extra` is empty, and `input` fits inside it
self.extra_input[0..input.len()].copy_from_slice(input);
self.extra_input_occupied_len = input.len();
return Ok(input.len());
};
// either 0 or 1 complete chunks encoded from extra
debug_assert!(encoded_size == 0 || encoded_size == 4);
debug_assert!(
// didn't encode extra input
MAX_INPUT_LEN == max_input_len
// encoded one triple
|| MAX_INPUT_LEN == max_input_len + MIN_ENCODE_CHUNK_SIZE
);
// encode complete triples only
let input_complete_chunks_len = input.len() - (input.len() % MIN_ENCODE_CHUNK_SIZE);
let input_chunks_to_encode_len = cmp::min(input_complete_chunks_len, max_input_len);
debug_assert_eq!(0, max_input_len % MIN_ENCODE_CHUNK_SIZE);
debug_assert_eq!(0, input_chunks_to_encode_len % MIN_ENCODE_CHUNK_SIZE);
encoded_size += encode_to_slice(
&input[..(input_chunks_to_encode_len)],
&mut self.output[encoded_size..],
self.config.char_set.encode_table(),
);
// not updating `self.output_occupied_len` here because if the below write fails, it should
// "never take place" -- the buffer contents we encoded are ignored and perhaps retried
// later, if the consumer chooses.
self.write_to_delegate(encoded_size)
// no matter whether we wrote the full encoded buffer or not, we consumed the same
// input
.map(|_| extra_input_read_len + input_chunks_to_encode_len)
.map_err(|e| {
// in case we filled and encoded `extra`, reset extra_len
self.extra_input_occupied_len = orig_extra_len;
e
})
}
/// Because this is usually treated as OK to call multiple times, it will *not* flush any
/// incomplete chunks of input or write padding.
/// # Errors
///
/// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
fn flush(&mut self) -> Result<()> {
self.write_all_encoded_output()?;
self.delegate
.as_mut()
.expect("Writer must be present")
.flush()
}
}
impl<W: Write> Drop for EncoderWriter<W> {
fn drop(&mut self) {
if !self.panicked {
// like `BufWriter`, ignore errors during drop
let _ = self.write_final_leftovers();
}
}
}

View File

@@ -0,0 +1,176 @@
use super::encoder::EncoderWriter;
use crate::Config;
use std::io;
use std::io::Write;
/// A `Write` implementation that base64-encodes data using the provided config and accumulates the
/// resulting base64 in memory, which is then exposed as a String via `into_inner()`.
///
/// # Examples
///
/// Buffer base64 in a new String:
///
/// ```
/// use std::io::Write;
///
/// let mut enc = base64::write::EncoderStringWriter::new(base64::STANDARD);
///
/// enc.write_all(b"asdf").unwrap();
///
/// // get the resulting String
/// let b64_string = enc.into_inner();
///
/// assert_eq!("YXNkZg==", &b64_string);
/// ```
///
/// Or, append to an existing String:
///
/// ```
/// use std::io::Write;
///
/// let mut buf = String::from("base64: ");
///
/// let mut enc = base64::write::EncoderStringWriter::from(&mut buf, base64::STANDARD);
///
/// enc.write_all(b"asdf").unwrap();
///
/// // release the &mut reference on buf
/// let _ = enc.into_inner();
///
/// assert_eq!("base64: YXNkZg==", &buf);
/// ```
///
/// # Panics
///
/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
/// error is invalid and will panic.
///
/// # Performance
///
/// Because it has to validate that the base64 is UTF-8, it is about 80% as fast as writing plain
/// bytes to a `io::Write`.
pub struct EncoderStringWriter<S: StrConsumer> {
encoder: EncoderWriter<Utf8SingleCodeUnitWriter<S>>,
}
impl<S: StrConsumer> EncoderStringWriter<S> {
/// Create a EncoderStringWriter that will append to the provided `StrConsumer`.
pub fn from(str_consumer: S, config: Config) -> Self {
EncoderStringWriter {
encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, config),
}
}
/// Encode all remaining buffered data, including any trailing incomplete input triples and
/// associated padding.
///
/// Once this succeeds, no further writes or calls to this method are allowed.
///
/// Returns the base64-encoded form of the accumulated written data.
pub fn into_inner(mut self) -> S {
self.encoder
.finish()
.expect("Writing to a Vec<u8> should never fail")
.str_consumer
}
}
impl EncoderStringWriter<String> {
/// Create a EncoderStringWriter that will encode into a new String with the provided config.
pub fn new(config: Config) -> Self {
EncoderStringWriter::from(String::new(), config)
}
}
impl<S: StrConsumer> Write for EncoderStringWriter<S> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.encoder.write(buf)
}
fn flush(&mut self) -> io::Result<()> {
self.encoder.flush()
}
}
/// An abstraction around consuming `str`s produced by base64 encoding.
pub trait StrConsumer {
/// Consume the base64 encoded data in `buf`
fn consume(&mut self, buf: &str);
}
/// As for io::Write, `StrConsumer` is implemented automatically for `&mut S`.
impl<S: StrConsumer + ?Sized> StrConsumer for &mut S {
fn consume(&mut self, buf: &str) {
(**self).consume(buf)
}
}
/// Pushes the str onto the end of the String
impl StrConsumer for String {
fn consume(&mut self, buf: &str) {
self.push_str(buf)
}
}
/// A `Write` that only can handle bytes that are valid single-byte UTF-8 code units.
///
/// This is safe because we only use it when writing base64, which is always valid UTF-8.
struct Utf8SingleCodeUnitWriter<S: StrConsumer> {
str_consumer: S,
}
impl<S: StrConsumer> io::Write for Utf8SingleCodeUnitWriter<S> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
// Because we expect all input to be valid utf-8 individual bytes, we can encode any buffer
// length
let s = std::str::from_utf8(buf).expect("Input must be valid UTF-8");
self.str_consumer.consume(s);
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> {
// no op
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::encode_config_buf;
use crate::tests::random_config;
use crate::write::encoder_string_writer::EncoderStringWriter;
use rand::Rng;
use std::io::Write;
#[test]
fn every_possible_split_of_input() {
let mut rng = rand::thread_rng();
let mut orig_data = Vec::<u8>::new();
let mut normal_encoded = String::new();
let size = 5_000;
for i in 0..size {
orig_data.clear();
normal_encoded.clear();
for _ in 0..size {
orig_data.push(rng.gen());
}
let config = random_config(&mut rng);
encode_config_buf(&orig_data, config, &mut normal_encoded);
let mut stream_encoder = EncoderStringWriter::new(config);
// Write the first i bytes, then the rest
stream_encoder.write_all(&orig_data[0..i]).unwrap();
stream_encoder.write_all(&orig_data[i..]).unwrap();
let stream_encoded = stream_encoder.into_inner();
assert_eq!(normal_encoded, stream_encoded);
}
}
}

View File

@@ -0,0 +1,568 @@
use super::EncoderWriter;
use crate::tests::random_config;
use crate::{encode_config, encode_config_buf, STANDARD_NO_PAD, URL_SAFE};
use std::io::{Cursor, Write};
use std::{cmp, io, str};
use rand::Rng;
#[test]
fn encode_three_bytes() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, URL_SAFE);
let sz = enc.write(b"abc").unwrap();
assert_eq!(sz, 3);
}
assert_eq!(&c.get_ref()[..], encode_config("abc", URL_SAFE).as_bytes());
}
#[test]
fn encode_nine_bytes_two_writes() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, URL_SAFE);
let sz = enc.write(b"abcdef").unwrap();
assert_eq!(sz, 6);
let sz = enc.write(b"ghi").unwrap();
assert_eq!(sz, 3);
}
assert_eq!(
&c.get_ref()[..],
encode_config("abcdefghi", URL_SAFE).as_bytes()
);
}
#[test]
fn encode_one_then_two_bytes() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, URL_SAFE);
let sz = enc.write(b"a").unwrap();
assert_eq!(sz, 1);
let sz = enc.write(b"bc").unwrap();
assert_eq!(sz, 2);
}
assert_eq!(&c.get_ref()[..], encode_config("abc", URL_SAFE).as_bytes());
}
#[test]
fn encode_one_then_five_bytes() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, URL_SAFE);
let sz = enc.write(b"a").unwrap();
assert_eq!(sz, 1);
let sz = enc.write(b"bcdef").unwrap();
assert_eq!(sz, 5);
}
assert_eq!(
&c.get_ref()[..],
encode_config("abcdef", URL_SAFE).as_bytes()
);
}
#[test]
fn encode_1_2_3_bytes() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, URL_SAFE);
let sz = enc.write(b"a").unwrap();
assert_eq!(sz, 1);
let sz = enc.write(b"bc").unwrap();
assert_eq!(sz, 2);
let sz = enc.write(b"def").unwrap();
assert_eq!(sz, 3);
}
assert_eq!(
&c.get_ref()[..],
encode_config("abcdef", URL_SAFE).as_bytes()
);
}
#[test]
fn encode_with_padding() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, URL_SAFE);
enc.write_all(b"abcd").unwrap();
enc.flush().unwrap();
}
assert_eq!(&c.get_ref()[..], encode_config("abcd", URL_SAFE).as_bytes());
}
#[test]
fn encode_with_padding_multiple_writes() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, URL_SAFE);
assert_eq!(1, enc.write(b"a").unwrap());
assert_eq!(2, enc.write(b"bc").unwrap());
assert_eq!(3, enc.write(b"def").unwrap());
assert_eq!(1, enc.write(b"g").unwrap());
enc.flush().unwrap();
}
assert_eq!(
&c.get_ref()[..],
encode_config("abcdefg", URL_SAFE).as_bytes()
);
}
#[test]
fn finish_writes_extra_byte() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, URL_SAFE);
assert_eq!(6, enc.write(b"abcdef").unwrap());
// will be in extra
assert_eq!(1, enc.write(b"g").unwrap());
// 1 trailing byte = 2 encoded chars
let _ = enc.finish().unwrap();
}
assert_eq!(
&c.get_ref()[..],
encode_config("abcdefg", URL_SAFE).as_bytes()
);
}
#[test]
fn write_partial_chunk_encodes_partial_chunk() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD);
// nothing encoded yet
assert_eq!(2, enc.write(b"ab").unwrap());
// encoded here
let _ = enc.finish().unwrap();
}
assert_eq!(
&c.get_ref()[..],
encode_config("ab", STANDARD_NO_PAD).as_bytes()
);
assert_eq!(3, c.get_ref().len());
}
#[test]
fn write_1_chunk_encodes_complete_chunk() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD);
assert_eq!(3, enc.write(b"abc").unwrap());
let _ = enc.finish().unwrap();
}
assert_eq!(
&c.get_ref()[..],
encode_config("abc", STANDARD_NO_PAD).as_bytes()
);
assert_eq!(4, c.get_ref().len());
}
#[test]
fn write_1_chunk_and_partial_encodes_only_complete_chunk() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD);
// "d" not written
assert_eq!(3, enc.write(b"abcd").unwrap());
let _ = enc.finish().unwrap();
}
assert_eq!(
&c.get_ref()[..],
encode_config("abc", STANDARD_NO_PAD).as_bytes()
);
assert_eq!(4, c.get_ref().len());
}
#[test]
fn write_2_partials_to_exactly_complete_chunk_encodes_complete_chunk() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD);
assert_eq!(1, enc.write(b"a").unwrap());
assert_eq!(2, enc.write(b"bc").unwrap());
let _ = enc.finish().unwrap();
}
assert_eq!(
&c.get_ref()[..],
encode_config("abc", STANDARD_NO_PAD).as_bytes()
);
assert_eq!(4, c.get_ref().len());
}
#[test]
fn write_partial_then_enough_to_complete_chunk_but_not_complete_another_chunk_encodes_complete_chunk_without_consuming_remaining(
) {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD);
assert_eq!(1, enc.write(b"a").unwrap());
// doesn't consume "d"
assert_eq!(2, enc.write(b"bcd").unwrap());
let _ = enc.finish().unwrap();
}
assert_eq!(
&c.get_ref()[..],
encode_config("abc", STANDARD_NO_PAD).as_bytes()
);
assert_eq!(4, c.get_ref().len());
}
#[test]
fn write_partial_then_enough_to_complete_chunk_and_another_chunk_encodes_complete_chunks() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD);
assert_eq!(1, enc.write(b"a").unwrap());
// completes partial chunk, and another chunk
assert_eq!(5, enc.write(b"bcdef").unwrap());
let _ = enc.finish().unwrap();
}
assert_eq!(
&c.get_ref()[..],
encode_config("abcdef", STANDARD_NO_PAD).as_bytes()
);
assert_eq!(8, c.get_ref().len());
}
#[test]
fn write_partial_then_enough_to_complete_chunk_and_another_chunk_and_another_partial_chunk_encodes_only_complete_chunks(
) {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD);
assert_eq!(1, enc.write(b"a").unwrap());
// completes partial chunk, and another chunk, with one more partial chunk that's not
// consumed
assert_eq!(5, enc.write(b"bcdefe").unwrap());
let _ = enc.finish().unwrap();
}
assert_eq!(
&c.get_ref()[..],
encode_config("abcdef", STANDARD_NO_PAD).as_bytes()
);
assert_eq!(8, c.get_ref().len());
}
#[test]
fn drop_calls_finish_for_you() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD);
assert_eq!(1, enc.write(b"a").unwrap());
}
assert_eq!(
&c.get_ref()[..],
encode_config("a", STANDARD_NO_PAD).as_bytes()
);
assert_eq!(2, c.get_ref().len());
}
#[test]
fn every_possible_split_of_input() {
let mut rng = rand::thread_rng();
let mut orig_data = Vec::<u8>::new();
let mut stream_encoded = Vec::<u8>::new();
let mut normal_encoded = String::new();
let size = 5_000;
for i in 0..size {
orig_data.clear();
stream_encoded.clear();
normal_encoded.clear();
for _ in 0..size {
orig_data.push(rng.gen());
}
let config = random_config(&mut rng);
encode_config_buf(&orig_data, config, &mut normal_encoded);
{
let mut stream_encoder = EncoderWriter::new(&mut stream_encoded, config);
// Write the first i bytes, then the rest
stream_encoder.write_all(&orig_data[0..i]).unwrap();
stream_encoder.write_all(&orig_data[i..]).unwrap();
}
assert_eq!(normal_encoded, str::from_utf8(&stream_encoded).unwrap());
}
}
#[test]
fn encode_random_config_matches_normal_encode_reasonable_input_len() {
// choose up to 2 * buf size, so ~half the time it'll use a full buffer
do_encode_random_config_matches_normal_encode(super::encoder::BUF_SIZE * 2)
}
#[test]
fn encode_random_config_matches_normal_encode_tiny_input_len() {
do_encode_random_config_matches_normal_encode(10)
}
#[test]
fn retrying_writes_that_error_with_interrupted_works() {
let mut rng = rand::thread_rng();
let mut orig_data = Vec::<u8>::new();
let mut stream_encoded = Vec::<u8>::new();
let mut normal_encoded = String::new();
for _ in 0..1_000 {
orig_data.clear();
stream_encoded.clear();
normal_encoded.clear();
let orig_len: usize = rng.gen_range(100, 20_000);
for _ in 0..orig_len {
orig_data.push(rng.gen());
}
// encode the normal way
let config = random_config(&mut rng);
encode_config_buf(&orig_data, config, &mut normal_encoded);
// encode via the stream encoder
{
let mut interrupt_rng = rand::thread_rng();
let mut interrupting_writer = InterruptingWriter {
w: &mut stream_encoded,
rng: &mut interrupt_rng,
fraction: 0.8,
};
let mut stream_encoder = EncoderWriter::new(&mut interrupting_writer, config);
let mut bytes_consumed = 0;
while bytes_consumed < orig_len {
// use short inputs since we want to use `extra` a lot as that's what needs rollback
// when errors occur
let input_len: usize = cmp::min(rng.gen_range(0, 10), orig_len - bytes_consumed);
retry_interrupted_write_all(
&mut stream_encoder,
&orig_data[bytes_consumed..bytes_consumed + input_len],
)
.unwrap();
bytes_consumed += input_len;
}
loop {
let res = stream_encoder.finish();
match res {
Ok(_) => break,
Err(e) => match e.kind() {
io::ErrorKind::Interrupted => continue,
_ => Err(e).unwrap(), // bail
},
}
}
assert_eq!(orig_len, bytes_consumed);
}
assert_eq!(normal_encoded, str::from_utf8(&stream_encoded).unwrap());
}
}
#[test]
fn writes_that_only_write_part_of_input_and_sometimes_interrupt_produce_correct_encoded_data() {
let mut rng = rand::thread_rng();
let mut orig_data = Vec::<u8>::new();
let mut stream_encoded = Vec::<u8>::new();
let mut normal_encoded = String::new();
for _ in 0..1_000 {
orig_data.clear();
stream_encoded.clear();
normal_encoded.clear();
let orig_len: usize = rng.gen_range(100, 20_000);
for _ in 0..orig_len {
orig_data.push(rng.gen());
}
// encode the normal way
let config = random_config(&mut rng);
encode_config_buf(&orig_data, config, &mut normal_encoded);
// encode via the stream encoder
{
let mut partial_rng = rand::thread_rng();
let mut partial_writer = PartialInterruptingWriter {
w: &mut stream_encoded,
rng: &mut partial_rng,
full_input_fraction: 0.1,
no_interrupt_fraction: 0.1,
};
let mut stream_encoder = EncoderWriter::new(&mut partial_writer, config);
let mut bytes_consumed = 0;
while bytes_consumed < orig_len {
// use at most medium-length inputs to exercise retry logic more aggressively
let input_len: usize = cmp::min(rng.gen_range(0, 100), orig_len - bytes_consumed);
let res =
stream_encoder.write(&orig_data[bytes_consumed..bytes_consumed + input_len]);
// retry on interrupt
match res {
Ok(len) => bytes_consumed += len,
Err(e) => match e.kind() {
io::ErrorKind::Interrupted => continue,
_ => {
panic!("should not see other errors");
}
},
}
}
let _ = stream_encoder.finish().unwrap();
assert_eq!(orig_len, bytes_consumed);
}
assert_eq!(normal_encoded, str::from_utf8(&stream_encoded).unwrap());
}
}
/// Retry writes until all the data is written or an error that isn't Interrupted is returned.
fn retry_interrupted_write_all<W: Write>(w: &mut W, buf: &[u8]) -> io::Result<()> {
let mut bytes_consumed = 0;
while bytes_consumed < buf.len() {
let res = w.write(&buf[bytes_consumed..]);
match res {
Ok(len) => bytes_consumed += len,
Err(e) => match e.kind() {
io::ErrorKind::Interrupted => continue,
_ => return Err(e),
},
}
}
Ok(())
}
fn do_encode_random_config_matches_normal_encode(max_input_len: usize) {
let mut rng = rand::thread_rng();
let mut orig_data = Vec::<u8>::new();
let mut stream_encoded = Vec::<u8>::new();
let mut normal_encoded = String::new();
for _ in 0..1_000 {
orig_data.clear();
stream_encoded.clear();
normal_encoded.clear();
let orig_len: usize = rng.gen_range(100, 20_000);
for _ in 0..orig_len {
orig_data.push(rng.gen());
}
// encode the normal way
let config = random_config(&mut rng);
encode_config_buf(&orig_data, config, &mut normal_encoded);
// encode via the stream encoder
{
let mut stream_encoder = EncoderWriter::new(&mut stream_encoded, config);
let mut bytes_consumed = 0;
while bytes_consumed < orig_len {
let input_len: usize =
cmp::min(rng.gen_range(0, max_input_len), orig_len - bytes_consumed);
// write a little bit of the data
stream_encoder
.write_all(&orig_data[bytes_consumed..bytes_consumed + input_len])
.unwrap();
bytes_consumed += input_len;
}
let _ = stream_encoder.finish().unwrap();
assert_eq!(orig_len, bytes_consumed);
}
assert_eq!(normal_encoded, str::from_utf8(&stream_encoded).unwrap());
}
}
/// A `Write` implementation that returns Interrupted some fraction of the time, randomly.
struct InterruptingWriter<'a, W: 'a + Write, R: 'a + Rng> {
w: &'a mut W,
rng: &'a mut R,
/// In [0, 1]. If a random number in [0, 1] is `<= threshold`, `Write` methods will return
/// an `Interrupted` error
fraction: f64,
}
impl<'a, W: Write, R: Rng> Write for InterruptingWriter<'a, W, R> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
if self.rng.gen_range(0.0, 1.0) <= self.fraction {
return Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted"));
}
self.w.write(buf)
}
fn flush(&mut self) -> io::Result<()> {
if self.rng.gen_range(0.0, 1.0) <= self.fraction {
return Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted"));
}
self.w.flush()
}
}
/// A `Write` implementation that sometimes will only write part of its input.
struct PartialInterruptingWriter<'a, W: 'a + Write, R: 'a + Rng> {
w: &'a mut W,
rng: &'a mut R,
/// In [0, 1]. If a random number in [0, 1] is `<= threshold`, `write()` will write all its
/// input. Otherwise, it will write a random substring
full_input_fraction: f64,
no_interrupt_fraction: f64,
}
impl<'a, W: Write, R: Rng> Write for PartialInterruptingWriter<'a, W, R> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
if self.rng.gen_range(0.0, 1.0) > self.no_interrupt_fraction {
return Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted"));
}
if self.rng.gen_range(0.0, 1.0) <= self.full_input_fraction || buf.len() == 0 {
// pass through the buf untouched
self.w.write(buf)
} else {
// only use a prefix of it
self.w
.write(&buf[0..(self.rng.gen_range(0, buf.len() - 1))])
}
}
fn flush(&mut self) -> io::Result<()> {
self.w.flush()
}
}

View File

@@ -0,0 +1,8 @@
//! Implementations of `io::Write` to transparently handle base64.
mod encoder;
mod encoder_string_writer;
pub use self::encoder::EncoderWriter;
pub use self::encoder_string_writer::EncoderStringWriter;
#[cfg(test)]
mod encoder_tests;