RPM build fix (reverted CI changes which will need to be un-reverted or made conditional) and vendor Rust dependencies to make builds much faster in any CI system.

2022-06-08 07:32:16 -04:00
parent 373ca30269
commit d5ca4e5f52
12611 changed files with 2898014 additions and 284 deletions
--- a/zeroidc/vendor/ppv-lite86/src/generic.rs
+++ b/zeroidc/vendor/ppv-lite86/src/generic.rs
@@ -0,0 +1,865 @@
+#![allow(non_camel_case_types)]
+
+use crate::soft::{x2, x4};
+use crate::types::*;
+use core::ops::*;
+
+#[repr(C)]
+#[derive(Clone, Copy)]
+pub union vec128_storage {
+    d: [u32; 4],
+    q: [u64; 2],
+}
+impl From<[u32; 4]> for vec128_storage {
+    #[inline(always)]
+    fn from(d: [u32; 4]) -> Self {
+        Self { d }
+    }
+}
+impl From<vec128_storage> for [u32; 4] {
+    #[inline(always)]
+    fn from(d: vec128_storage) -> Self {
+        unsafe { d.d }
+    }
+}
+impl From<[u64; 2]> for vec128_storage {
+    #[inline(always)]
+    fn from(q: [u64; 2]) -> Self {
+        Self { q }
+    }
+}
+impl From<vec128_storage> for [u64; 2] {
+    #[inline(always)]
+    fn from(q: vec128_storage) -> Self {
+        unsafe { q.q }
+    }
+}
+impl Default for vec128_storage {
+    #[inline(always)]
+    fn default() -> Self {
+        Self { q: [0, 0] }
+    }
+}
+impl Eq for vec128_storage {}
+impl PartialEq<vec128_storage> for vec128_storage {
+    #[inline(always)]
+    fn eq(&self, rhs: &Self) -> bool {
+        unsafe { self.q == rhs.q }
+    }
+}
+#[derive(Clone, Copy, PartialEq, Eq, Default)]
+pub struct vec256_storage {
+    v128: [vec128_storage; 2],
+}
+impl vec256_storage {
+    #[inline(always)]
+    pub fn new128(v128: [vec128_storage; 2]) -> Self {
+        Self { v128 }
+    }
+    #[inline(always)]
+    pub fn split128(self) -> [vec128_storage; 2] {
+        self.v128
+    }
+}
+impl From<vec256_storage> for [u64; 4] {
+    #[inline(always)]
+    fn from(q: vec256_storage) -> Self {
+        let [a, b]: [u64; 2] = q.v128[0].into();
+        let [c, d]: [u64; 2] = q.v128[1].into();
+        [a, b, c, d]
+    }
+}
+impl From<[u64; 4]> for vec256_storage {
+    #[inline(always)]
+    fn from([a, b, c, d]: [u64; 4]) -> Self {
+        Self {
+            v128: [[a, b].into(), [c, d].into()],
+        }
+    }
+}
+#[derive(Clone, Copy, PartialEq, Eq, Default)]
+pub struct vec512_storage {
+    v128: [vec128_storage; 4],
+}
+impl vec512_storage {
+    #[inline(always)]
+    pub fn new128(v128: [vec128_storage; 4]) -> Self {
+        Self { v128 }
+    }
+    #[inline(always)]
+    pub fn split128(self) -> [vec128_storage; 4] {
+        self.v128
+    }
+}
+
+#[inline(always)]
+fn dmap<T, F>(t: T, f: F) -> T
+where
+    T: Store<vec128_storage> + Into<vec128_storage>,
+    F: Fn(u32) -> u32,
+{
+    let t: vec128_storage = t.into();
+    let d = unsafe { t.d };
+    let d = vec128_storage {
+        d: [f(d[0]), f(d[1]), f(d[2]), f(d[3])],
+    };
+    unsafe { T::unpack(d) }
+}
+
+fn dmap2<T, F>(a: T, b: T, f: F) -> T
+where
+    T: Store<vec128_storage> + Into<vec128_storage>,
+    F: Fn(u32, u32) -> u32,
+{
+    let a: vec128_storage = a.into();
+    let b: vec128_storage = b.into();
+    let ao = unsafe { a.d };
+    let bo = unsafe { b.d };
+    let d = vec128_storage {
+        d: [
+            f(ao[0], bo[0]),
+            f(ao[1], bo[1]),
+            f(ao[2], bo[2]),
+            f(ao[3], bo[3]),
+        ],
+    };
+    unsafe { T::unpack(d) }
+}
+
+#[inline(always)]
+fn qmap<T, F>(t: T, f: F) -> T
+where
+    T: Store<vec128_storage> + Into<vec128_storage>,
+    F: Fn(u64) -> u64,
+{
+    let t: vec128_storage = t.into();
+    let q = unsafe { t.q };
+    let q = vec128_storage {
+        q: [f(q[0]), f(q[1])],
+    };
+    unsafe { T::unpack(q) }
+}
+
+#[inline(always)]
+fn qmap2<T, F>(a: T, b: T, f: F) -> T
+where
+    T: Store<vec128_storage> + Into<vec128_storage>,
+    F: Fn(u64, u64) -> u64,
+{
+    let a: vec128_storage = a.into();
+    let b: vec128_storage = b.into();
+    let ao = unsafe { a.q };
+    let bo = unsafe { b.q };
+    let q = vec128_storage {
+        q: [f(ao[0], bo[0]), f(ao[1], bo[1])],
+    };
+    unsafe { T::unpack(q) }
+}
+
+#[inline(always)]
+fn o_of_q(q: [u64; 2]) -> u128 {
+    u128::from(q[0]) | (u128::from(q[1]) << 64)
+}
+
+#[inline(always)]
+fn q_of_o(o: u128) -> [u64; 2] {
+    [o as u64, (o >> 64) as u64]
+}
+
+#[inline(always)]
+fn omap<T, F>(a: T, f: F) -> T
+where
+    T: Store<vec128_storage> + Into<vec128_storage>,
+    F: Fn(u128) -> u128,
+{
+    let a: vec128_storage = a.into();
+    let ao = o_of_q(unsafe { a.q });
+    let o = vec128_storage { q: q_of_o(f(ao)) };
+    unsafe { T::unpack(o) }
+}
+
+#[inline(always)]
+fn omap2<T, F>(a: T, b: T, f: F) -> T
+where
+    T: Store<vec128_storage> + Into<vec128_storage>,
+    F: Fn(u128, u128) -> u128,
+{
+    let a: vec128_storage = a.into();
+    let b: vec128_storage = b.into();
+    let ao = o_of_q(unsafe { a.q });
+    let bo = o_of_q(unsafe { b.q });
+    let o = vec128_storage {
+        q: q_of_o(f(ao, bo)),
+    };
+    unsafe { T::unpack(o) }
+}
+
+impl RotateEachWord128 for u128x1_generic {}
+impl BitOps128 for u128x1_generic {}
+impl BitOps64 for u128x1_generic {}
+impl BitOps64 for u64x2_generic {}
+impl BitOps32 for u128x1_generic {}
+impl BitOps32 for u64x2_generic {}
+impl BitOps32 for u32x4_generic {}
+impl BitOps0 for u128x1_generic {}
+impl BitOps0 for u64x2_generic {}
+impl BitOps0 for u32x4_generic {}
+
+macro_rules! impl_bitops {
+    ($vec:ident) => {
+        impl Not for $vec {
+            type Output = Self;
+            #[inline(always)]
+            fn not(self) -> Self::Output {
+                omap(self, |x| !x)
+            }
+        }
+        impl BitAnd for $vec {
+            type Output = Self;
+            #[inline(always)]
+            fn bitand(self, rhs: Self) -> Self::Output {
+                omap2(self, rhs, |x, y| x & y)
+            }
+        }
+        impl BitOr for $vec {
+            type Output = Self;
+            #[inline(always)]
+            fn bitor(self, rhs: Self) -> Self::Output {
+                omap2(self, rhs, |x, y| x | y)
+            }
+        }
+        impl BitXor for $vec {
+            type Output = Self;
+            #[inline(always)]
+            fn bitxor(self, rhs: Self) -> Self::Output {
+                omap2(self, rhs, |x, y| x ^ y)
+            }
+        }
+        impl AndNot for $vec {
+            type Output = Self;
+            #[inline(always)]
+            fn andnot(self, rhs: Self) -> Self::Output {
+                omap2(self, rhs, |x, y| !x & y)
+            }
+        }
+        impl BitAndAssign for $vec {
+            #[inline(always)]
+            fn bitand_assign(&mut self, rhs: Self) {
+                *self = *self & rhs
+            }
+        }
+        impl BitOrAssign for $vec {
+            #[inline(always)]
+            fn bitor_assign(&mut self, rhs: Self) {
+                *self = *self | rhs
+            }
+        }
+        impl BitXorAssign for $vec {
+            #[inline(always)]
+            fn bitxor_assign(&mut self, rhs: Self) {
+                *self = *self ^ rhs
+            }
+        }
+
+        impl Swap64 for $vec {
+            #[inline(always)]
+            fn swap1(self) -> Self {
+                qmap(self, |x| {
+                    ((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1)
+                })
+            }
+            #[inline(always)]
+            fn swap2(self) -> Self {
+                qmap(self, |x| {
+                    ((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2)
+                })
+            }
+            #[inline(always)]
+            fn swap4(self) -> Self {
+                qmap(self, |x| {
+                    ((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4)
+                })
+            }
+            #[inline(always)]
+            fn swap8(self) -> Self {
+                qmap(self, |x| {
+                    ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8)
+                })
+            }
+            #[inline(always)]
+            fn swap16(self) -> Self {
+                dmap(self, |x| x.rotate_left(16))
+            }
+            #[inline(always)]
+            fn swap32(self) -> Self {
+                qmap(self, |x| x.rotate_left(32))
+            }
+            #[inline(always)]
+            fn swap64(self) -> Self {
+                omap(self, |x| (x << 64) | (x >> 64))
+            }
+        }
+    };
+}
+impl_bitops!(u32x4_generic);
+impl_bitops!(u64x2_generic);
+impl_bitops!(u128x1_generic);
+
+impl RotateEachWord32 for u32x4_generic {
+    #[inline(always)]
+    fn rotate_each_word_right7(self) -> Self {
+        dmap(self, |x| x.rotate_right(7))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right8(self) -> Self {
+        dmap(self, |x| x.rotate_right(8))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right11(self) -> Self {
+        dmap(self, |x| x.rotate_right(11))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right12(self) -> Self {
+        dmap(self, |x| x.rotate_right(12))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right16(self) -> Self {
+        dmap(self, |x| x.rotate_right(16))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right20(self) -> Self {
+        dmap(self, |x| x.rotate_right(20))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right24(self) -> Self {
+        dmap(self, |x| x.rotate_right(24))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right25(self) -> Self {
+        dmap(self, |x| x.rotate_right(25))
+    }
+}
+
+impl RotateEachWord32 for u64x2_generic {
+    #[inline(always)]
+    fn rotate_each_word_right7(self) -> Self {
+        qmap(self, |x| x.rotate_right(7))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right8(self) -> Self {
+        qmap(self, |x| x.rotate_right(8))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right11(self) -> Self {
+        qmap(self, |x| x.rotate_right(11))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right12(self) -> Self {
+        qmap(self, |x| x.rotate_right(12))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right16(self) -> Self {
+        qmap(self, |x| x.rotate_right(16))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right20(self) -> Self {
+        qmap(self, |x| x.rotate_right(20))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right24(self) -> Self {
+        qmap(self, |x| x.rotate_right(24))
+    }
+    #[inline(always)]
+    fn rotate_each_word_right25(self) -> Self {
+        qmap(self, |x| x.rotate_right(25))
+    }
+}
+impl RotateEachWord64 for u64x2_generic {
+    #[inline(always)]
+    fn rotate_each_word_right32(self) -> Self {
+        qmap(self, |x| x.rotate_right(32))
+    }
+}
+
+// workaround for koute/cargo-web#52 (u128::rotate_* broken with cargo web)
+#[inline(always)]
+fn rotate_u128_right(x: u128, i: u32) -> u128 {
+    (x >> i) | (x << (128 - i))
+}
+#[test]
+fn test_rotate_u128() {
+    const X: u128 = 0x0001_0203_0405_0607_0809_0a0b_0c0d_0e0f;
+    assert_eq!(rotate_u128_right(X, 17), X.rotate_right(17));
+}
+
+impl RotateEachWord32 for u128x1_generic {
+    #[inline(always)]
+    fn rotate_each_word_right7(self) -> Self {
+        Self([rotate_u128_right(self.0[0], 7)])
+    }
+    #[inline(always)]
+    fn rotate_each_word_right8(self) -> Self {
+        Self([rotate_u128_right(self.0[0], 8)])
+    }
+    #[inline(always)]
+    fn rotate_each_word_right11(self) -> Self {
+        Self([rotate_u128_right(self.0[0], 11)])
+    }
+    #[inline(always)]
+    fn rotate_each_word_right12(self) -> Self {
+        Self([rotate_u128_right(self.0[0], 12)])
+    }
+    #[inline(always)]
+    fn rotate_each_word_right16(self) -> Self {
+        Self([rotate_u128_right(self.0[0], 16)])
+    }
+    #[inline(always)]
+    fn rotate_each_word_right20(self) -> Self {
+        Self([rotate_u128_right(self.0[0], 20)])
+    }
+    #[inline(always)]
+    fn rotate_each_word_right24(self) -> Self {
+        Self([rotate_u128_right(self.0[0], 24)])
+    }
+    #[inline(always)]
+    fn rotate_each_word_right25(self) -> Self {
+        Self([rotate_u128_right(self.0[0], 25)])
+    }
+}
+impl RotateEachWord64 for u128x1_generic {
+    #[inline(always)]
+    fn rotate_each_word_right32(self) -> Self {
+        Self([rotate_u128_right(self.0[0], 32)])
+    }
+}
+
+#[derive(Copy, Clone)]
+pub struct GenericMachine;
+impl Machine for GenericMachine {
+    type u32x4 = u32x4_generic;
+    type u64x2 = u64x2_generic;
+    type u128x1 = u128x1_generic;
+    type u32x4x2 = u32x4x2_generic;
+    type u64x2x2 = u64x2x2_generic;
+    type u64x4 = u64x4_generic;
+    type u128x2 = u128x2_generic;
+    type u32x4x4 = u32x4x4_generic;
+    type u64x2x4 = u64x2x4_generic;
+    type u128x4 = u128x4_generic;
+    #[inline(always)]
+    unsafe fn instance() -> Self {
+        Self
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub struct u32x4_generic([u32; 4]);
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub struct u64x2_generic([u64; 2]);
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub struct u128x1_generic([u128; 1]);
+
+impl From<u32x4_generic> for vec128_storage {
+    #[inline(always)]
+    fn from(d: u32x4_generic) -> Self {
+        Self { d: d.0 }
+    }
+}
+impl From<u64x2_generic> for vec128_storage {
+    #[inline(always)]
+    fn from(q: u64x2_generic) -> Self {
+        Self { q: q.0 }
+    }
+}
+impl From<u128x1_generic> for vec128_storage {
+    #[inline(always)]
+    fn from(o: u128x1_generic) -> Self {
+        Self { q: q_of_o(o.0[0]) }
+    }
+}
+
+impl Store<vec128_storage> for u32x4_generic {
+    #[inline(always)]
+    unsafe fn unpack(s: vec128_storage) -> Self {
+        Self(s.d)
+    }
+}
+impl Store<vec128_storage> for u64x2_generic {
+    #[inline(always)]
+    unsafe fn unpack(s: vec128_storage) -> Self {
+        Self(s.q)
+    }
+}
+impl Store<vec128_storage> for u128x1_generic {
+    #[inline(always)]
+    unsafe fn unpack(s: vec128_storage) -> Self {
+        Self([o_of_q(s.q); 1])
+    }
+}
+
+impl ArithOps for u32x4_generic {}
+impl ArithOps for u64x2_generic {}
+impl ArithOps for u128x1_generic {}
+
+impl Add for u32x4_generic {
+    type Output = Self;
+    #[inline(always)]
+    fn add(self, rhs: Self) -> Self::Output {
+        dmap2(self, rhs, |x, y| x.wrapping_add(y))
+    }
+}
+impl Add for u64x2_generic {
+    type Output = Self;
+    #[inline(always)]
+    fn add(self, rhs: Self) -> Self::Output {
+        qmap2(self, rhs, |x, y| x.wrapping_add(y))
+    }
+}
+impl Add for u128x1_generic {
+    type Output = Self;
+    #[inline(always)]
+    fn add(self, rhs: Self) -> Self::Output {
+        omap2(self, rhs, |x, y| x.wrapping_add(y))
+    }
+}
+impl AddAssign for u32x4_generic {
+    #[inline(always)]
+    fn add_assign(&mut self, rhs: Self) {
+        *self = *self + rhs
+    }
+}
+impl AddAssign for u64x2_generic {
+    #[inline(always)]
+    fn add_assign(&mut self, rhs: Self) {
+        *self = *self + rhs
+    }
+}
+impl AddAssign for u128x1_generic {
+    #[inline(always)]
+    fn add_assign(&mut self, rhs: Self) {
+        *self = *self + rhs
+    }
+}
+impl BSwap for u32x4_generic {
+    #[inline(always)]
+    fn bswap(self) -> Self {
+        dmap(self, |x| x.swap_bytes())
+    }
+}
+impl BSwap for u64x2_generic {
+    #[inline(always)]
+    fn bswap(self) -> Self {
+        qmap(self, |x| x.swap_bytes())
+    }
+}
+impl BSwap for u128x1_generic {
+    #[inline(always)]
+    fn bswap(self) -> Self {
+        omap(self, |x| x.swap_bytes())
+    }
+}
+impl StoreBytes for u32x4_generic {
+    #[inline(always)]
+    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
+        assert_eq!(input.len(), 16);
+        let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
+        dmap(x, |x| x.to_le())
+    }
+    #[inline(always)]
+    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
+        assert_eq!(input.len(), 16);
+        let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
+        dmap(x, |x| x.to_be())
+    }
+    #[inline(always)]
+    fn write_le(self, out: &mut [u8]) {
+        assert_eq!(out.len(), 16);
+        let x = dmap(self, |x| x.to_le());
+        unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
+    }
+    #[inline(always)]
+    fn write_be(self, out: &mut [u8]) {
+        assert_eq!(out.len(), 16);
+        let x = dmap(self, |x| x.to_be());
+        unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
+    }
+}
+impl StoreBytes for u64x2_generic {
+    #[inline(always)]
+    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
+        assert_eq!(input.len(), 16);
+        let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
+        qmap(x, |x| x.to_le())
+    }
+    #[inline(always)]
+    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
+        assert_eq!(input.len(), 16);
+        let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
+        qmap(x, |x| x.to_be())
+    }
+    #[inline(always)]
+    fn write_le(self, out: &mut [u8]) {
+        assert_eq!(out.len(), 16);
+        let x = qmap(self, |x| x.to_le());
+        unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
+    }
+    #[inline(always)]
+    fn write_be(self, out: &mut [u8]) {
+        assert_eq!(out.len(), 16);
+        let x = qmap(self, |x| x.to_be());
+        unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
+    }
+}
+
+#[derive(Copy, Clone)]
+pub struct G0;
+#[derive(Copy, Clone)]
+pub struct G1;
+pub type u32x4x2_generic = x2<u32x4_generic, G0>;
+pub type u64x2x2_generic = x2<u64x2_generic, G0>;
+pub type u64x4_generic = x2<u64x2_generic, G1>;
+pub type u128x2_generic = x2<u128x1_generic, G0>;
+pub type u32x4x4_generic = x4<u32x4_generic>;
+pub type u64x2x4_generic = x4<u64x2_generic>;
+pub type u128x4_generic = x4<u128x1_generic>;
+
+impl Vector<[u32; 16]> for u32x4x4_generic {
+    fn to_scalars(self) -> [u32; 16] {
+        let [a, b, c, d] = self.0;
+        let a = a.0;
+        let b = b.0;
+        let c = c.0;
+        let d = d.0;
+        [
+            a[0], a[1], a[2], a[3], //
+            b[0], b[1], b[2], b[3], //
+            c[0], c[1], c[2], c[3], //
+            d[0], d[1], d[2], d[3], //
+        ]
+    }
+}
+
+impl MultiLane<[u32; 4]> for u32x4_generic {
+    #[inline(always)]
+    fn to_lanes(self) -> [u32; 4] {
+        self.0
+    }
+    #[inline(always)]
+    fn from_lanes(xs: [u32; 4]) -> Self {
+        Self(xs)
+    }
+}
+impl MultiLane<[u64; 2]> for u64x2_generic {
+    #[inline(always)]
+    fn to_lanes(self) -> [u64; 2] {
+        self.0
+    }
+    #[inline(always)]
+    fn from_lanes(xs: [u64; 2]) -> Self {
+        Self(xs)
+    }
+}
+impl MultiLane<[u64; 4]> for u64x4_generic {
+    #[inline(always)]
+    fn to_lanes(self) -> [u64; 4] {
+        let (a, b) = (self.0[0].to_lanes(), self.0[1].to_lanes());
+        [a[0], a[1], b[0], b[1]]
+    }
+    #[inline(always)]
+    fn from_lanes(xs: [u64; 4]) -> Self {
+        let (a, b) = (
+            u64x2_generic::from_lanes([xs[0], xs[1]]),
+            u64x2_generic::from_lanes([xs[2], xs[3]]),
+        );
+        x2::new([a, b])
+    }
+}
+impl MultiLane<[u128; 1]> for u128x1_generic {
+    #[inline(always)]
+    fn to_lanes(self) -> [u128; 1] {
+        self.0
+    }
+    #[inline(always)]
+    fn from_lanes(xs: [u128; 1]) -> Self {
+        Self(xs)
+    }
+}
+impl Vec4<u32> for u32x4_generic {
+    #[inline(always)]
+    fn extract(self, i: u32) -> u32 {
+        self.0[i as usize]
+    }
+    #[inline(always)]
+    fn insert(mut self, v: u32, i: u32) -> Self {
+        self.0[i as usize] = v;
+        self
+    }
+}
+impl Vec4<u64> for u64x4_generic {
+    #[inline(always)]
+    fn extract(self, i: u32) -> u64 {
+        let d: [u64; 4] = self.to_lanes();
+        d[i as usize]
+    }
+    #[inline(always)]
+    fn insert(self, v: u64, i: u32) -> Self {
+        self.0[(i / 2) as usize].insert(v, i % 2);
+        self
+    }
+}
+impl Vec2<u64> for u64x2_generic {
+    #[inline(always)]
+    fn extract(self, i: u32) -> u64 {
+        self.0[i as usize]
+    }
+    #[inline(always)]
+    fn insert(mut self, v: u64, i: u32) -> Self {
+        self.0[i as usize] = v;
+        self
+    }
+}
+
+impl Words4 for u32x4_generic {
+    #[inline(always)]
+    fn shuffle2301(self) -> Self {
+        self.swap64()
+    }
+    #[inline(always)]
+    fn shuffle1230(self) -> Self {
+        let x = self.0;
+        Self([x[3], x[0], x[1], x[2]])
+    }
+    #[inline(always)]
+    fn shuffle3012(self) -> Self {
+        let x = self.0;
+        Self([x[1], x[2], x[3], x[0]])
+    }
+}
+impl LaneWords4 for u32x4_generic {
+    #[inline(always)]
+    fn shuffle_lane_words2301(self) -> Self {
+        self.shuffle2301()
+    }
+    #[inline(always)]
+    fn shuffle_lane_words1230(self) -> Self {
+        self.shuffle1230()
+    }
+    #[inline(always)]
+    fn shuffle_lane_words3012(self) -> Self {
+        self.shuffle3012()
+    }
+}
+
+impl Words4 for u64x4_generic {
+    #[inline(always)]
+    fn shuffle2301(self) -> Self {
+        x2::new([self.0[1], self.0[0]])
+    }
+    #[inline(always)]
+    fn shuffle1230(self) -> Self {
+        unimplemented!()
+    }
+    #[inline(always)]
+    fn shuffle3012(self) -> Self {
+        unimplemented!()
+    }
+}
+
+impl u32x4<GenericMachine> for u32x4_generic {}
+impl u64x2<GenericMachine> for u64x2_generic {}
+impl u128x1<GenericMachine> for u128x1_generic {}
+impl u32x4x2<GenericMachine> for u32x4x2_generic {}
+impl u64x2x2<GenericMachine> for u64x2x2_generic {}
+impl u64x4<GenericMachine> for u64x4_generic {}
+impl u128x2<GenericMachine> for u128x2_generic {}
+impl u32x4x4<GenericMachine> for u32x4x4_generic {}
+impl u64x2x4<GenericMachine> for u64x2x4_generic {}
+impl u128x4<GenericMachine> for u128x4_generic {}
+
+#[macro_export]
+macro_rules! dispatch {
+    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
+        #[inline(always)]
+        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
+            let $mach = unsafe { $crate::generic::GenericMachine::instance() };
+            #[inline(always)]
+            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
+            fn_impl($mach, $($arg),*)
+        }
+    };
+    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
+        dispatch!($mach, $MTy, {
+            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
+        });
+    }
+}
+#[macro_export]
+macro_rules! dispatch_light128 {
+    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
+        #[inline(always)]
+        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
+            let $mach = unsafe { $crate::generic::GenericMachine::instance() };
+            #[inline(always)]
+            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
+            fn_impl($mach, $($arg),*)
+        }
+    };
+    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
+        dispatch!($mach, $MTy, {
+            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
+        });
+    }
+}
+#[macro_export]
+macro_rules! dispatch_light256 {
+    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
+        #[inline(always)]
+        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
+            let $mach = unsafe { $crate::generic::GenericMachine::instance() };
+            #[inline(always)]
+            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
+            fn_impl($mach, $($arg),*)
+        }
+    };
+    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
+        dispatch!($mach, $MTy, {
+            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
+        });
+    }
+}
+#[macro_export]
+macro_rules! dispatch_light512 {
+    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
+        #[inline(always)]
+        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
+            let $mach = unsafe { $crate::generic::GenericMachine::instance() };
+            #[inline(always)]
+            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
+            fn_impl($mach, $($arg),*)
+        }
+    };
+    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
+        dispatch!($mach, $MTy, {
+            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
+        });
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_bswap32() {
+        let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
+        let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
+
+        let m = unsafe { GenericMachine::instance() };
+
+        let x: <GenericMachine as Machine>::u32x4 = m.vec(xs);
+        let x = x.bswap();
+
+        let y = m.vec(ys);
+        assert_eq!(x, y);
+    }
+}
--- a/zeroidc/vendor/ppv-lite86/src/lib.rs
+++ b/zeroidc/vendor/ppv-lite86/src/lib.rs
@@ -0,0 +1,22 @@
+#![no_std]
+
+// Design:
+// - safety: safe creation of any machine type is done only by instance methods of a
+//   Machine (which is a ZST + Copy type), which can only by created unsafely or safely
+//   through feature detection (e.g. fn AVX2::try_get() -> Option<Machine>).
+
+mod soft;
+mod types;
+pub use self::types::*;
+
+#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
+pub mod x86_64;
+#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
+use self::x86_64 as arch;
+
+#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
+pub mod generic;
+#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
+use self::generic as arch;
+
+pub use self::arch::{vec128_storage, vec256_storage, vec512_storage};
--- a/zeroidc/vendor/ppv-lite86/src/soft.rs
+++ b/zeroidc/vendor/ppv-lite86/src/soft.rs
@@ -0,0 +1,472 @@
+//! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
+
+use crate::types::*;
+use crate::{vec128_storage, vec256_storage, vec512_storage};
+use core::marker::PhantomData;
+use core::ops::*;
+
+#[derive(Copy, Clone, Default)]
+#[allow(non_camel_case_types)]
+pub struct x2<W, G>(pub [W; 2], PhantomData<G>);
+impl<W, G> x2<W, G> {
+    #[inline(always)]
+    pub fn new(xs: [W; 2]) -> Self {
+        x2(xs, PhantomData)
+    }
+}
+macro_rules! fwd_binop_x2 {
+    ($trait:ident, $fn:ident) => {
+        impl<W: $trait + Copy, G> $trait for x2<W, G> {
+            type Output = x2<W::Output, G>;
+            #[inline(always)]
+            fn $fn(self, rhs: Self) -> Self::Output {
+                x2::new([self.0[0].$fn(rhs.0[0]), self.0[1].$fn(rhs.0[1])])
+            }
+        }
+    };
+}
+macro_rules! fwd_binop_assign_x2 {
+    ($trait:ident, $fn_assign:ident) => {
+        impl<W: $trait + Copy, G> $trait for x2<W, G> {
+            #[inline(always)]
+            fn $fn_assign(&mut self, rhs: Self) {
+                (self.0[0]).$fn_assign(rhs.0[0]);
+                (self.0[1]).$fn_assign(rhs.0[1]);
+            }
+        }
+    };
+}
+macro_rules! fwd_unop_x2 {
+    ($fn:ident) => {
+        #[inline(always)]
+        fn $fn(self) -> Self {
+            x2::new([self.0[0].$fn(), self.0[1].$fn()])
+        }
+    };
+}
+impl<W, G> RotateEachWord32 for x2<W, G>
+where
+    W: Copy + RotateEachWord32,
+{
+    fwd_unop_x2!(rotate_each_word_right7);
+    fwd_unop_x2!(rotate_each_word_right8);
+    fwd_unop_x2!(rotate_each_word_right11);
+    fwd_unop_x2!(rotate_each_word_right12);
+    fwd_unop_x2!(rotate_each_word_right16);
+    fwd_unop_x2!(rotate_each_word_right20);
+    fwd_unop_x2!(rotate_each_word_right24);
+    fwd_unop_x2!(rotate_each_word_right25);
+}
+impl<W, G> RotateEachWord64 for x2<W, G>
+where
+    W: Copy + RotateEachWord64,
+{
+    fwd_unop_x2!(rotate_each_word_right32);
+}
+impl<W, G> RotateEachWord128 for x2<W, G> where W: RotateEachWord128 {}
+impl<W, G> BitOps0 for x2<W, G>
+where
+    W: BitOps0,
+    G: Copy,
+{
+}
+impl<W, G> BitOps32 for x2<W, G>
+where
+    W: BitOps32 + BitOps0,
+    G: Copy,
+{
+}
+impl<W, G> BitOps64 for x2<W, G>
+where
+    W: BitOps64 + BitOps0,
+    G: Copy,
+{
+}
+impl<W, G> BitOps128 for x2<W, G>
+where
+    W: BitOps128 + BitOps0,
+    G: Copy,
+{
+}
+fwd_binop_x2!(BitAnd, bitand);
+fwd_binop_x2!(BitOr, bitor);
+fwd_binop_x2!(BitXor, bitxor);
+fwd_binop_x2!(AndNot, andnot);
+fwd_binop_assign_x2!(BitAndAssign, bitand_assign);
+fwd_binop_assign_x2!(BitOrAssign, bitor_assign);
+fwd_binop_assign_x2!(BitXorAssign, bitxor_assign);
+impl<W, G> ArithOps for x2<W, G>
+where
+    W: ArithOps,
+    G: Copy,
+{
+}
+fwd_binop_x2!(Add, add);
+fwd_binop_assign_x2!(AddAssign, add_assign);
+impl<W: Not + Copy, G> Not for x2<W, G> {
+    type Output = x2<W::Output, G>;
+    #[inline(always)]
+    fn not(self) -> Self::Output {
+        x2::new([self.0[0].not(), self.0[1].not()])
+    }
+}
+impl<W, G> UnsafeFrom<[W; 2]> for x2<W, G> {
+    #[inline(always)]
+    unsafe fn unsafe_from(xs: [W; 2]) -> Self {
+        x2::new(xs)
+    }
+}
+impl<W: Copy, G> Vec2<W> for x2<W, G> {
+    #[inline(always)]
+    fn extract(self, i: u32) -> W {
+        self.0[i as usize]
+    }
+    #[inline(always)]
+    fn insert(mut self, w: W, i: u32) -> Self {
+        self.0[i as usize] = w;
+        self
+    }
+}
+impl<W: Copy + Store<vec128_storage>, G> Store<vec256_storage> for x2<W, G> {
+    #[inline(always)]
+    unsafe fn unpack(p: vec256_storage) -> Self {
+        let p = p.split128();
+        x2::new([W::unpack(p[0]), W::unpack(p[1])])
+    }
+}
+impl<W, G> From<x2<W, G>> for vec256_storage
+where
+    W: Copy,
+    vec128_storage: From<W>,
+{
+    #[inline(always)]
+    fn from(x: x2<W, G>) -> Self {
+        vec256_storage::new128([x.0[0].into(), x.0[1].into()])
+    }
+}
+impl<W, G> Swap64 for x2<W, G>
+where
+    W: Swap64 + Copy,
+{
+    fwd_unop_x2!(swap1);
+    fwd_unop_x2!(swap2);
+    fwd_unop_x2!(swap4);
+    fwd_unop_x2!(swap8);
+    fwd_unop_x2!(swap16);
+    fwd_unop_x2!(swap32);
+    fwd_unop_x2!(swap64);
+}
+impl<W: Copy, G> MultiLane<[W; 2]> for x2<W, G> {
+    #[inline(always)]
+    fn to_lanes(self) -> [W; 2] {
+        self.0
+    }
+    #[inline(always)]
+    fn from_lanes(lanes: [W; 2]) -> Self {
+        x2::new(lanes)
+    }
+}
+impl<W: BSwap + Copy, G> BSwap for x2<W, G> {
+    #[inline(always)]
+    fn bswap(self) -> Self {
+        x2::new([self.0[0].bswap(), self.0[1].bswap()])
+    }
+}
+impl<W: StoreBytes + BSwap + Copy, G> StoreBytes for x2<W, G> {
+    #[inline(always)]
+    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
+        let input = input.split_at(input.len() / 2);
+        x2::new([W::unsafe_read_le(input.0), W::unsafe_read_le(input.1)])
+    }
+    #[inline(always)]
+    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
+        let input = input.split_at(input.len() / 2);
+        x2::new([W::unsafe_read_be(input.0), W::unsafe_read_be(input.1)])
+    }
+    #[inline(always)]
+    fn write_le(self, out: &mut [u8]) {
+        let out = out.split_at_mut(out.len() / 2);
+        self.0[0].write_le(out.0);
+        self.0[1].write_le(out.1);
+    }
+    #[inline(always)]
+    fn write_be(self, out: &mut [u8]) {
+        let out = out.split_at_mut(out.len() / 2);
+        self.0[0].write_be(out.0);
+        self.0[1].write_be(out.1);
+    }
+}
+impl<W: Copy + LaneWords4, G: Copy> LaneWords4 for x2<W, G> {
+    #[inline(always)]
+    fn shuffle_lane_words2301(self) -> Self {
+        Self::new([
+            self.0[0].shuffle_lane_words2301(),
+            self.0[1].shuffle_lane_words2301(),
+        ])
+    }
+    #[inline(always)]
+    fn shuffle_lane_words1230(self) -> Self {
+        Self::new([
+            self.0[0].shuffle_lane_words1230(),
+            self.0[1].shuffle_lane_words1230(),
+        ])
+    }
+    #[inline(always)]
+    fn shuffle_lane_words3012(self) -> Self {
+        Self::new([
+            self.0[0].shuffle_lane_words3012(),
+            self.0[1].shuffle_lane_words3012(),
+        ])
+    }
+}
+
+#[derive(Copy, Clone, Default)]
+#[allow(non_camel_case_types)]
+pub struct x4<W>(pub [W; 4]);
+impl<W> x4<W> {
+    #[inline(always)]
+    pub fn new(xs: [W; 4]) -> Self {
+        x4(xs)
+    }
+}
+macro_rules! fwd_binop_x4 {
+    ($trait:ident, $fn:ident) => {
+        impl<W: $trait + Copy> $trait for x4<W> {
+            type Output = x4<W::Output>;
+            #[inline(always)]
+            fn $fn(self, rhs: Self) -> Self::Output {
+                x4([
+                    self.0[0].$fn(rhs.0[0]),
+                    self.0[1].$fn(rhs.0[1]),
+                    self.0[2].$fn(rhs.0[2]),
+                    self.0[3].$fn(rhs.0[3]),
+                ])
+            }
+        }
+    };
+}
+macro_rules! fwd_binop_assign_x4 {
+    ($trait:ident, $fn_assign:ident) => {
+        impl<W: $trait + Copy> $trait for x4<W> {
+            #[inline(always)]
+            fn $fn_assign(&mut self, rhs: Self) {
+                self.0[0].$fn_assign(rhs.0[0]);
+                self.0[1].$fn_assign(rhs.0[1]);
+                self.0[2].$fn_assign(rhs.0[2]);
+                self.0[3].$fn_assign(rhs.0[3]);
+            }
+        }
+    };
+}
+macro_rules! fwd_unop_x4 {
+    ($fn:ident) => {
+        #[inline(always)]
+        fn $fn(self) -> Self {
+            x4([
+                self.0[0].$fn(),
+                self.0[1].$fn(),
+                self.0[2].$fn(),
+                self.0[3].$fn(),
+            ])
+        }
+    };
+}
+impl<W> RotateEachWord32 for x4<W>
+where
+    W: Copy + RotateEachWord32,
+{
+    fwd_unop_x4!(rotate_each_word_right7);
+    fwd_unop_x4!(rotate_each_word_right8);
+    fwd_unop_x4!(rotate_each_word_right11);
+    fwd_unop_x4!(rotate_each_word_right12);
+    fwd_unop_x4!(rotate_each_word_right16);
+    fwd_unop_x4!(rotate_each_word_right20);
+    fwd_unop_x4!(rotate_each_word_right24);
+    fwd_unop_x4!(rotate_each_word_right25);
+}
+impl<W> RotateEachWord64 for x4<W>
+where
+    W: Copy + RotateEachWord64,
+{
+    fwd_unop_x4!(rotate_each_word_right32);
+}
+impl<W> RotateEachWord128 for x4<W> where W: RotateEachWord128 {}
+impl<W> BitOps0 for x4<W> where W: BitOps0 {}
+impl<W> BitOps32 for x4<W> where W: BitOps32 + BitOps0 {}
+impl<W> BitOps64 for x4<W> where W: BitOps64 + BitOps0 {}
+impl<W> BitOps128 for x4<W> where W: BitOps128 + BitOps0 {}
+fwd_binop_x4!(BitAnd, bitand);
+fwd_binop_x4!(BitOr, bitor);
+fwd_binop_x4!(BitXor, bitxor);
+fwd_binop_x4!(AndNot, andnot);
+fwd_binop_assign_x4!(BitAndAssign, bitand_assign);
+fwd_binop_assign_x4!(BitOrAssign, bitor_assign);
+fwd_binop_assign_x4!(BitXorAssign, bitxor_assign);
+impl<W> ArithOps for x4<W> where W: ArithOps {}
+fwd_binop_x4!(Add, add);
+fwd_binop_assign_x4!(AddAssign, add_assign);
+impl<W: Not + Copy> Not for x4<W> {
+    type Output = x4<W::Output>;
+    #[inline(always)]
+    fn not(self) -> Self::Output {
+        x4([
+            self.0[0].not(),
+            self.0[1].not(),
+            self.0[2].not(),
+            self.0[3].not(),
+        ])
+    }
+}
+impl<W> UnsafeFrom<[W; 4]> for x4<W> {
+    #[inline(always)]
+    unsafe fn unsafe_from(xs: [W; 4]) -> Self {
+        x4(xs)
+    }
+}
+impl<W: Copy> Vec4<W> for x4<W> {
+    #[inline(always)]
+    fn extract(self, i: u32) -> W {
+        self.0[i as usize]
+    }
+    #[inline(always)]
+    fn insert(mut self, w: W, i: u32) -> Self {
+        self.0[i as usize] = w;
+        self
+    }
+}
+impl<W: Copy> Vec4Ext<W> for x4<W> {
+    #[inline(always)]
+    fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self)
+    where
+        Self: Sized,
+    {
+        (
+            x4([a.0[0], b.0[0], c.0[0], d.0[0]]),
+            x4([a.0[1], b.0[1], c.0[1], d.0[1]]),
+            x4([a.0[2], b.0[2], c.0[2], d.0[2]]),
+            x4([a.0[3], b.0[3], c.0[3], d.0[3]]),
+        )
+    }
+}
+impl<W: Copy + Store<vec128_storage>> Store<vec512_storage> for x4<W> {
+    #[inline(always)]
+    unsafe fn unpack(p: vec512_storage) -> Self {
+        let p = p.split128();
+        x4([
+            W::unpack(p[0]),
+            W::unpack(p[1]),
+            W::unpack(p[2]),
+            W::unpack(p[3]),
+        ])
+    }
+}
+impl<W> From<x4<W>> for vec512_storage
+where
+    W: Copy,
+    vec128_storage: From<W>,
+{
+    #[inline(always)]
+    fn from(x: x4<W>) -> Self {
+        vec512_storage::new128([x.0[0].into(), x.0[1].into(), x.0[2].into(), x.0[3].into()])
+    }
+}
+impl<W> Swap64 for x4<W>
+where
+    W: Swap64 + Copy,
+{
+    fwd_unop_x4!(swap1);
+    fwd_unop_x4!(swap2);
+    fwd_unop_x4!(swap4);
+    fwd_unop_x4!(swap8);
+    fwd_unop_x4!(swap16);
+    fwd_unop_x4!(swap32);
+    fwd_unop_x4!(swap64);
+}
+impl<W: Copy> MultiLane<[W; 4]> for x4<W> {
+    #[inline(always)]
+    fn to_lanes(self) -> [W; 4] {
+        self.0
+    }
+    #[inline(always)]
+    fn from_lanes(lanes: [W; 4]) -> Self {
+        x4(lanes)
+    }
+}
+impl<W: BSwap + Copy> BSwap for x4<W> {
+    #[inline(always)]
+    fn bswap(self) -> Self {
+        x4([
+            self.0[0].bswap(),
+            self.0[1].bswap(),
+            self.0[2].bswap(),
+            self.0[3].bswap(),
+        ])
+    }
+}
+impl<W: StoreBytes + BSwap + Copy> StoreBytes for x4<W> {
+    #[inline(always)]
+    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
+        let n = input.len() / 4;
+        x4([
+            W::unsafe_read_le(&input[..n]),
+            W::unsafe_read_le(&input[n..n * 2]),
+            W::unsafe_read_le(&input[n * 2..n * 3]),
+            W::unsafe_read_le(&input[n * 3..]),
+        ])
+    }
+    #[inline(always)]
+    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
+        let n = input.len() / 4;
+        x4([
+            W::unsafe_read_be(&input[..n]),
+            W::unsafe_read_be(&input[n..n * 2]),
+            W::unsafe_read_be(&input[n * 2..n * 3]),
+            W::unsafe_read_be(&input[n * 3..]),
+        ])
+    }
+    #[inline(always)]
+    fn write_le(self, out: &mut [u8]) {
+        let n = out.len() / 4;
+        self.0[0].write_le(&mut out[..n]);
+        self.0[1].write_le(&mut out[n..n * 2]);
+        self.0[2].write_le(&mut out[n * 2..n * 3]);
+        self.0[3].write_le(&mut out[n * 3..]);
+    }
+    #[inline(always)]
+    fn write_be(self, out: &mut [u8]) {
+        let n = out.len() / 4;
+        self.0[0].write_be(&mut out[..n]);
+        self.0[1].write_be(&mut out[n..n * 2]);
+        self.0[2].write_be(&mut out[n * 2..n * 3]);
+        self.0[3].write_be(&mut out[n * 3..]);
+    }
+}
+impl<W: Copy + LaneWords4> LaneWords4 for x4<W> {
+    #[inline(always)]
+    fn shuffle_lane_words2301(self) -> Self {
+        x4([
+            self.0[0].shuffle_lane_words2301(),
+            self.0[1].shuffle_lane_words2301(),
+            self.0[2].shuffle_lane_words2301(),
+            self.0[3].shuffle_lane_words2301(),
+        ])
+    }
+    #[inline(always)]
+    fn shuffle_lane_words1230(self) -> Self {
+        x4([
+            self.0[0].shuffle_lane_words1230(),
+            self.0[1].shuffle_lane_words1230(),
+            self.0[2].shuffle_lane_words1230(),
+            self.0[3].shuffle_lane_words1230(),
+        ])
+    }
+    #[inline(always)]
+    fn shuffle_lane_words3012(self) -> Self {
+        x4([
+            self.0[0].shuffle_lane_words3012(),
+            self.0[1].shuffle_lane_words3012(),
+            self.0[2].shuffle_lane_words3012(),
+            self.0[3].shuffle_lane_words3012(),
+        ])
+    }
+}
--- a/zeroidc/vendor/ppv-lite86/src/types.rs
+++ b/zeroidc/vendor/ppv-lite86/src/types.rs
@@ -0,0 +1,298 @@
+#![allow(non_camel_case_types)]
+use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
+
+pub trait AndNot {
+    type Output;
+    fn andnot(self, rhs: Self) -> Self::Output;
+}
+pub trait BSwap {
+    fn bswap(self) -> Self;
+}
+/// Ops that depend on word size
+pub trait ArithOps: Add<Output = Self> + AddAssign + Sized + Copy + Clone + BSwap {}
+/// Ops that are independent of word size and endian
+pub trait BitOps0:
+    BitAnd<Output = Self>
+    + BitOr<Output = Self>
+    + BitXor<Output = Self>
+    + BitXorAssign
+    + Not<Output = Self>
+    + AndNot<Output = Self>
+    + Sized
+    + Copy
+    + Clone
+{
+}
+
+pub trait BitOps32: BitOps0 + RotateEachWord32 {}
+pub trait BitOps64: BitOps32 + RotateEachWord64 {}
+pub trait BitOps128: BitOps64 + RotateEachWord128 {}
+
+pub trait RotateEachWord32 {
+    fn rotate_each_word_right7(self) -> Self;
+    fn rotate_each_word_right8(self) -> Self;
+    fn rotate_each_word_right11(self) -> Self;
+    fn rotate_each_word_right12(self) -> Self;
+    fn rotate_each_word_right16(self) -> Self;
+    fn rotate_each_word_right20(self) -> Self;
+    fn rotate_each_word_right24(self) -> Self;
+    fn rotate_each_word_right25(self) -> Self;
+}
+
+pub trait RotateEachWord64 {
+    fn rotate_each_word_right32(self) -> Self;
+}
+
+pub trait RotateEachWord128 {}
+
+// Vector type naming scheme:
+// uN[xP]xL
+// Unsigned; N-bit words * P bits per lane * L lanes
+//
+// A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
+// wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
+// slow inter-lane operations.
+
+use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
+
+#[allow(clippy::missing_safety_doc)]
+pub trait UnsafeFrom<T> {
+    unsafe fn unsafe_from(t: T) -> Self;
+}
+
+/// A vector composed of two elements, which may be words or themselves vectors.
+pub trait Vec2<W> {
+    fn extract(self, i: u32) -> W;
+    fn insert(self, w: W, i: u32) -> Self;
+}
+
+/// A vector composed of four elements, which may be words or themselves vectors.
+pub trait Vec4<W> {
+    fn extract(self, i: u32) -> W;
+    fn insert(self, w: W, i: u32) -> Self;
+}
+/// Vec4 functions which may not be implemented yet for all Vec4 types.
+/// NOTE: functions in this trait may be moved to Vec4 in any patch release. To avoid breakage,
+/// import Vec4Ext only together with Vec4, and don't qualify its methods.
+pub trait Vec4Ext<W> {
+    fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self)
+    where
+        Self: Sized;
+}
+pub trait Vector<T> {
+    fn to_scalars(self) -> T;
+}
+
+// TODO: multiples of 4 should inherit this
+/// A vector composed of four words; depending on their size, operations may cross lanes.
+pub trait Words4 {
+    fn shuffle1230(self) -> Self;
+    fn shuffle2301(self) -> Self;
+    fn shuffle3012(self) -> Self;
+}
+
+/// A vector composed one or more lanes each composed of four words.
+pub trait LaneWords4 {
+    fn shuffle_lane_words1230(self) -> Self;
+    fn shuffle_lane_words2301(self) -> Self;
+    fn shuffle_lane_words3012(self) -> Self;
+}
+
+// TODO: make this a part of BitOps
+/// Exchange neigboring ranges of bits of the specified size
+pub trait Swap64 {
+    fn swap1(self) -> Self;
+    fn swap2(self) -> Self;
+    fn swap4(self) -> Self;
+    fn swap8(self) -> Self;
+    fn swap16(self) -> Self;
+    fn swap32(self) -> Self;
+    fn swap64(self) -> Self;
+}
+
+pub trait u32x4<M: Machine>:
+    BitOps32
+    + Store<vec128_storage>
+    + ArithOps
+    + Vec4<u32>
+    + Words4
+    + LaneWords4
+    + StoreBytes
+    + MultiLane<[u32; 4]>
+    + Into<vec128_storage>
+{
+}
+pub trait u64x2<M: Machine>:
+    BitOps64 + Store<vec128_storage> + ArithOps + Vec2<u64> + MultiLane<[u64; 2]> + Into<vec128_storage>
+{
+}
+pub trait u128x1<M: Machine>:
+    BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
+{
+}
+
+pub trait u32x4x2<M: Machine>:
+    BitOps32
+    + Store<vec256_storage>
+    + Vec2<M::u32x4>
+    + MultiLane<[M::u32x4; 2]>
+    + ArithOps
+    + Into<vec256_storage>
+    + StoreBytes
+{
+}
+pub trait u64x2x2<M: Machine>:
+    BitOps64
+    + Store<vec256_storage>
+    + Vec2<M::u64x2>
+    + MultiLane<[M::u64x2; 2]>
+    + ArithOps
+    + StoreBytes
+    + Into<vec256_storage>
+{
+}
+pub trait u64x4<M: Machine>:
+    BitOps64
+    + Store<vec256_storage>
+    + Vec4<u64>
+    + MultiLane<[u64; 4]>
+    + ArithOps
+    + Words4
+    + StoreBytes
+    + Into<vec256_storage>
+{
+}
+pub trait u128x2<M: Machine>:
+    BitOps128
+    + Store<vec256_storage>
+    + Vec2<M::u128x1>
+    + MultiLane<[M::u128x1; 2]>
+    + Swap64
+    + Into<vec256_storage>
+{
+}
+
+pub trait u32x4x4<M: Machine>:
+    BitOps32
+    + Store<vec512_storage>
+    + Vec4<M::u32x4>
+    + Vec4Ext<M::u32x4>
+    + Vector<[u32; 16]>
+    + MultiLane<[M::u32x4; 4]>
+    + ArithOps
+    + LaneWords4
+    + Into<vec512_storage>
+    + StoreBytes
+{
+}
+pub trait u64x2x4<M: Machine>:
+    BitOps64
+    + Store<vec512_storage>
+    + Vec4<M::u64x2>
+    + MultiLane<[M::u64x2; 4]>
+    + ArithOps
+    + Into<vec512_storage>
+{
+}
+// TODO: Words4
+pub trait u128x4<M: Machine>:
+    BitOps128
+    + Store<vec512_storage>
+    + Vec4<M::u128x1>
+    + MultiLane<[M::u128x1; 4]>
+    + Swap64
+    + Into<vec512_storage>
+{
+}
+
+/// A vector composed of multiple 128-bit lanes.
+pub trait MultiLane<Lanes> {
+    /// Split a multi-lane vector into single-lane vectors.
+    fn to_lanes(self) -> Lanes;
+    /// Build a multi-lane vector from individual lanes.
+    fn from_lanes(lanes: Lanes) -> Self;
+}
+
+/// Combine single vectors into a multi-lane vector.
+pub trait VZip<V> {
+    fn vzip(self) -> V;
+}
+
+impl<V, T> VZip<V> for T
+where
+    V: MultiLane<T>,
+{
+    #[inline(always)]
+    fn vzip(self) -> V {
+        V::from_lanes(self)
+    }
+}
+
+pub trait Machine: Sized + Copy {
+    type u32x4: u32x4<Self>;
+    type u64x2: u64x2<Self>;
+    type u128x1: u128x1<Self>;
+
+    type u32x4x2: u32x4x2<Self>;
+    type u64x2x2: u64x2x2<Self>;
+    type u64x4: u64x4<Self>;
+    type u128x2: u128x2<Self>;
+
+    type u32x4x4: u32x4x4<Self>;
+    type u64x2x4: u64x2x4<Self>;
+    type u128x4: u128x4<Self>;
+
+    #[inline(always)]
+    fn unpack<S, V: Store<S>>(self, s: S) -> V {
+        unsafe { V::unpack(s) }
+    }
+
+    #[inline(always)]
+    fn vec<V, A>(self, a: A) -> V
+    where
+        V: MultiLane<A>,
+    {
+        V::from_lanes(a)
+    }
+
+    #[inline(always)]
+    fn read_le<V>(self, input: &[u8]) -> V
+    where
+        V: StoreBytes,
+    {
+        unsafe { V::unsafe_read_le(input) }
+    }
+
+    #[inline(always)]
+    fn read_be<V>(self, input: &[u8]) -> V
+    where
+        V: StoreBytes,
+    {
+        unsafe { V::unsafe_read_be(input) }
+    }
+
+    /// # Safety
+    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
+    /// environment.
+    unsafe fn instance() -> Self;
+}
+
+pub trait Store<S> {
+    /// # Safety
+    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
+    /// environment.
+    unsafe fn unpack(p: S) -> Self;
+}
+
+pub trait StoreBytes {
+    /// # Safety
+    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
+    /// environment.
+    unsafe fn unsafe_read_le(input: &[u8]) -> Self;
+    /// # Safety
+    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
+    /// environment.
+    unsafe fn unsafe_read_be(input: &[u8]) -> Self;
+    fn write_le(self, out: &mut [u8]);
+    fn write_be(self, out: &mut [u8]);
+}
--- a/zeroidc/vendor/ppv-lite86/src/x86_64/mod.rs
+++ b/zeroidc/vendor/ppv-lite86/src/x86_64/mod.rs
@@ -0,0 +1,437 @@
+// crate minimums: sse2, x86_64
+
+use crate::types::*;
+use core::arch::x86_64::{__m128i, __m256i};
+
+mod sse2;
+
+#[derive(Copy, Clone)]
+pub struct YesS3;
+#[derive(Copy, Clone)]
+pub struct NoS3;
+
+#[derive(Copy, Clone)]
+pub struct YesS4;
+#[derive(Copy, Clone)]
+pub struct NoS4;
+
+#[derive(Copy, Clone)]
+pub struct YesA1;
+#[derive(Copy, Clone)]
+pub struct NoA1;
+
+#[derive(Copy, Clone)]
+pub struct YesA2;
+#[derive(Copy, Clone)]
+pub struct NoA2;
+
+#[derive(Copy, Clone)]
+pub struct YesNI;
+#[derive(Copy, Clone)]
+pub struct NoNI;
+
+use core::marker::PhantomData;
+
+#[derive(Copy, Clone)]
+pub struct SseMachine<S3, S4, NI>(PhantomData<(S3, S4, NI)>);
+impl<S3: Copy, S4: Copy, NI: Copy> Machine for SseMachine<S3, S4, NI>
+where
+    sse2::u128x1_sse2<S3, S4, NI>: Swap64,
+    sse2::u64x2_sse2<S3, S4, NI>: BSwap + RotateEachWord32 + MultiLane<[u64; 2]> + Vec2<u64>,
+    sse2::u32x4_sse2<S3, S4, NI>: BSwap + RotateEachWord32 + MultiLane<[u32; 4]> + Vec4<u32>,
+    sse2::u64x4_sse2<S3, S4, NI>: BSwap + Words4,
+    sse2::u128x1_sse2<S3, S4, NI>: BSwap,
+    sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u64x2x2_sse2<S3, S4, NI>>,
+    sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u64x4_sse2<S3, S4, NI>>,
+    sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u32x4x2_sse2<S3, S4, NI>>,
+    sse2::u128x4_sse2<S3, S4, NI>: Into<sse2::u64x2x4_sse2<S3, S4, NI>>,
+    sse2::u128x4_sse2<S3, S4, NI>: Into<sse2::u32x4x4_sse2<S3, S4, NI>>,
+{
+    type u32x4 = sse2::u32x4_sse2<S3, S4, NI>;
+    type u64x2 = sse2::u64x2_sse2<S3, S4, NI>;
+    type u128x1 = sse2::u128x1_sse2<S3, S4, NI>;
+
+    type u32x4x2 = sse2::u32x4x2_sse2<S3, S4, NI>;
+    type u64x2x2 = sse2::u64x2x2_sse2<S3, S4, NI>;
+    type u64x4 = sse2::u64x4_sse2<S3, S4, NI>;
+    type u128x2 = sse2::u128x2_sse2<S3, S4, NI>;
+
+    type u32x4x4 = sse2::u32x4x4_sse2<S3, S4, NI>;
+    type u64x2x4 = sse2::u64x2x4_sse2<S3, S4, NI>;
+    type u128x4 = sse2::u128x4_sse2<S3, S4, NI>;
+
+    #[inline(always)]
+    unsafe fn instance() -> Self {
+        SseMachine(PhantomData)
+    }
+}
+
+#[derive(Copy, Clone)]
+pub struct Avx2Machine<NI>(PhantomData<NI>);
+impl<NI: Copy> Machine for Avx2Machine<NI>
+where
+    sse2::u128x1_sse2<YesS3, YesS4, NI>: BSwap + Swap64,
+    sse2::u64x2_sse2<YesS3, YesS4, NI>: BSwap + RotateEachWord32 + MultiLane<[u64; 2]> + Vec2<u64>,
+    sse2::u32x4_sse2<YesS3, YesS4, NI>: BSwap + RotateEachWord32 + MultiLane<[u32; 4]> + Vec4<u32>,
+    sse2::u64x4_sse2<YesS3, YesS4, NI>: BSwap + Words4,
+{
+    type u32x4 = sse2::u32x4_sse2<YesS3, YesS4, NI>;
+    type u64x2 = sse2::u64x2_sse2<YesS3, YesS4, NI>;
+    type u128x1 = sse2::u128x1_sse2<YesS3, YesS4, NI>;
+
+    type u32x4x2 = sse2::avx2::u32x4x2_avx2<NI>;
+    type u64x2x2 = sse2::u64x2x2_sse2<YesS3, YesS4, NI>;
+    type u64x4 = sse2::u64x4_sse2<YesS3, YesS4, NI>;
+    type u128x2 = sse2::u128x2_sse2<YesS3, YesS4, NI>;
+
+    type u32x4x4 = sse2::avx2::u32x4x4_avx2<NI>;
+    type u64x2x4 = sse2::u64x2x4_sse2<YesS3, YesS4, NI>;
+    type u128x4 = sse2::u128x4_sse2<YesS3, YesS4, NI>;
+
+    #[inline(always)]
+    unsafe fn instance() -> Self {
+        Avx2Machine(PhantomData)
+    }
+}
+
+pub type SSE2 = SseMachine<NoS3, NoS4, NoNI>;
+pub type SSSE3 = SseMachine<YesS3, NoS4, NoNI>;
+pub type SSE41 = SseMachine<YesS3, YesS4, NoNI>;
+/// AVX but not AVX2: only 128-bit integer operations, but use VEX versions of everything
+/// to avoid expensive SSE/VEX conflicts.
+pub type AVX = SseMachine<YesS3, YesS4, NoNI>;
+pub type AVX2 = Avx2Machine<NoNI>;
+
+/// Generic wrapper for unparameterized storage of any of the possible impls.
+/// Converting into and out of this type should be essentially free, although it may be more
+/// aligned than a particular impl requires.
+#[allow(non_camel_case_types)]
+#[derive(Copy, Clone)]
+pub union vec128_storage {
+    u32x4: [u32; 4],
+    u64x2: [u64; 2],
+    u128x1: [u128; 1],
+    sse2: __m128i,
+}
+impl Store<vec128_storage> for vec128_storage {
+    #[inline(always)]
+    unsafe fn unpack(p: vec128_storage) -> Self {
+        p
+    }
+}
+impl<'a> From<&'a vec128_storage> for &'a [u32; 4] {
+    #[inline(always)]
+    fn from(x: &'a vec128_storage) -> Self {
+        unsafe { &x.u32x4 }
+    }
+}
+impl From<[u32; 4]> for vec128_storage {
+    #[inline(always)]
+    fn from(u32x4: [u32; 4]) -> Self {
+        vec128_storage { u32x4 }
+    }
+}
+impl Default for vec128_storage {
+    #[inline(always)]
+    fn default() -> Self {
+        vec128_storage { u128x1: [0] }
+    }
+}
+impl Eq for vec128_storage {}
+impl PartialEq for vec128_storage {
+    #[inline(always)]
+    fn eq(&self, rhs: &Self) -> bool {
+        unsafe { self.u128x1 == rhs.u128x1 }
+    }
+}
+
+#[allow(non_camel_case_types)]
+#[derive(Copy, Clone)]
+pub union vec256_storage {
+    u32x8: [u32; 8],
+    u64x4: [u64; 4],
+    u128x2: [u128; 2],
+    sse2: [vec128_storage; 2],
+    avx: __m256i,
+}
+impl From<[u64; 4]> for vec256_storage {
+    #[inline(always)]
+    fn from(u64x4: [u64; 4]) -> Self {
+        vec256_storage { u64x4 }
+    }
+}
+impl Default for vec256_storage {
+    #[inline(always)]
+    fn default() -> Self {
+        vec256_storage { u128x2: [0, 0] }
+    }
+}
+impl vec256_storage {
+    #[inline(always)]
+    pub fn new128(xs: [vec128_storage; 2]) -> Self {
+        Self { sse2: xs }
+    }
+    #[inline(always)]
+    pub fn split128(self) -> [vec128_storage; 2] {
+        unsafe { self.sse2 }
+    }
+}
+impl Eq for vec256_storage {}
+impl PartialEq for vec256_storage {
+    #[inline(always)]
+    fn eq(&self, rhs: &Self) -> bool {
+        unsafe { self.sse2 == rhs.sse2 }
+    }
+}
+
+#[allow(non_camel_case_types)]
+#[derive(Copy, Clone)]
+pub union vec512_storage {
+    u32x16: [u32; 16],
+    u64x8: [u64; 8],
+    u128x4: [u128; 4],
+    sse2: [vec128_storage; 4],
+    avx: [vec256_storage; 2],
+}
+impl Default for vec512_storage {
+    #[inline(always)]
+    fn default() -> Self {
+        vec512_storage {
+            u128x4: [0, 0, 0, 0],
+        }
+    }
+}
+impl vec512_storage {
+    #[inline(always)]
+    pub fn new128(xs: [vec128_storage; 4]) -> Self {
+        Self { sse2: xs }
+    }
+    #[inline(always)]
+    pub fn split128(self) -> [vec128_storage; 4] {
+        unsafe { self.sse2 }
+    }
+}
+impl Eq for vec512_storage {}
+impl PartialEq for vec512_storage {
+    #[inline(always)]
+    fn eq(&self, rhs: &Self) -> bool {
+        unsafe { self.avx == rhs.avx }
+    }
+}
+
+macro_rules! impl_into {
+    ($storage:ident, $array:ty, $name:ident) => {
+        impl From<$storage> for $array {
+            #[inline(always)]
+            fn from(vec: $storage) -> Self {
+                unsafe { vec.$name }
+            }
+        }
+    };
+}
+impl_into!(vec128_storage, [u32; 4], u32x4);
+impl_into!(vec128_storage, [u64; 2], u64x2);
+impl_into!(vec128_storage, [u128; 1], u128x1);
+impl_into!(vec256_storage, [u32; 8], u32x8);
+impl_into!(vec256_storage, [u64; 4], u64x4);
+impl_into!(vec256_storage, [u128; 2], u128x2);
+impl_into!(vec512_storage, [u32; 16], u32x16);
+impl_into!(vec512_storage, [u64; 8], u64x8);
+impl_into!(vec512_storage, [u128; 4], u128x4);
+
+/// Generate the full set of optimized implementations to take advantage of the most important
+/// hardware feature sets.
+///
+/// This dispatcher is suitable for maximizing throughput.
+#[macro_export]
+macro_rules! dispatch {
+    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
+        #[cfg(feature = "std")]
+        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
+            #[inline(always)]
+            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
+            use std::arch::x86_64::*;
+            #[target_feature(enable = "avx2")]
+            unsafe fn impl_avx2($($arg: $argty),*) -> $ret {
+                let ret = fn_impl($crate::x86_64::AVX2::instance(), $($arg),*);
+                _mm256_zeroupper();
+                ret
+            }
+            #[target_feature(enable = "avx")]
+            #[target_feature(enable = "sse4.1")]
+            #[target_feature(enable = "ssse3")]
+            unsafe fn impl_avx($($arg: $argty),*) -> $ret {
+                let ret = fn_impl($crate::x86_64::AVX::instance(), $($arg),*);
+                _mm256_zeroupper();
+                ret
+            }
+            #[target_feature(enable = "sse4.1")]
+            #[target_feature(enable = "ssse3")]
+            unsafe fn impl_sse41($($arg: $argty),*) -> $ret {
+                fn_impl($crate::x86_64::SSE41::instance(), $($arg),*)
+            }
+            #[target_feature(enable = "ssse3")]
+            unsafe fn impl_ssse3($($arg: $argty),*) -> $ret {
+                fn_impl($crate::x86_64::SSSE3::instance(), $($arg),*)
+            }
+            #[target_feature(enable = "sse2")]
+            unsafe fn impl_sse2($($arg: $argty),*) -> $ret {
+                fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
+            }
+            unsafe {
+                if is_x86_feature_detected!("avx2") {
+                    impl_avx2($($arg),*)
+                } else if is_x86_feature_detected!("avx") {
+                    impl_avx($($arg),*)
+                } else if is_x86_feature_detected!("sse4.1") {
+                    impl_sse41($($arg),*)
+                } else if is_x86_feature_detected!("ssse3") {
+                    impl_ssse3($($arg),*)
+                } else if is_x86_feature_detected!("sse2") {
+                    impl_sse2($($arg),*)
+                } else {
+                    unimplemented!()
+                }
+            }
+        }
+        #[cfg(not(feature = "std"))]
+        #[inline(always)]
+        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
+            unsafe fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
+            unsafe {
+                if cfg!(target_feature = "avx2") {
+                    fn_impl($crate::x86_64::AVX2::instance(), $($arg),*)
+                } else if cfg!(target_feature = "avx") {
+                    fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
+                } else if cfg!(target_feature = "sse4.1") {
+                    fn_impl($crate::x86_64::SSE41::instance(), $($arg),*)
+                } else if cfg!(target_feature = "ssse3") {
+                    fn_impl($crate::x86_64::SSSE3::instance(), $($arg),*)
+                } else {
+                    fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
+                }
+            }
+        }
+    };
+    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
+        dispatch!($mach, $MTy, {
+            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
+        });
+    }
+}
+
+/// Generate only the basic implementations necessary to be able to operate efficiently on 128-bit
+/// vectors on this platfrom. For x86-64, that would mean SSE2 and AVX.
+///
+/// This dispatcher is suitable for vector operations that do not benefit from advanced hardware
+/// features (e.g. because they are done infrequently), so minimizing their contribution to code
+/// size is more important.
+#[macro_export]
+macro_rules! dispatch_light128 {
+    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
+        #[cfg(feature = "std")]
+        $($pub $(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
+            #[inline(always)]
+            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
+            use std::arch::x86_64::*;
+            #[target_feature(enable = "avx")]
+            unsafe fn impl_avx($($arg: $argty),*) -> $ret {
+                fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
+            }
+            #[target_feature(enable = "sse2")]
+            unsafe fn impl_sse2($($arg: $argty),*) -> $ret {
+                fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
+            }
+            unsafe {
+                if is_x86_feature_detected!("avx") {
+                    impl_avx($($arg),*)
+                } else if is_x86_feature_detected!("sse2") {
+                    impl_sse2($($arg),*)
+                } else {
+                    unimplemented!()
+                }
+            }
+        }
+        #[cfg(not(feature = "std"))]
+        #[inline(always)]
+        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
+            unsafe fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
+            unsafe {
+                if cfg!(target_feature = "avx2") {
+                    fn_impl($crate::x86_64::AVX2::instance(), $($arg),*)
+                } else if cfg!(target_feature = "avx") {
+                    fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
+                } else if cfg!(target_feature = "sse4.1") {
+                    fn_impl($crate::x86_64::SSE41::instance(), $($arg),*)
+                } else if cfg!(target_feature = "ssse3") {
+                    fn_impl($crate::x86_64::SSSE3::instance(), $($arg),*)
+                } else {
+                    fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
+                }
+            }
+        }
+    };
+    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
+        dispatch_light128!($mach, $MTy, {
+            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
+        });
+    }
+}
+
+/// Generate only the basic implementations necessary to be able to operate efficiently on 256-bit
+/// vectors on this platfrom. For x86-64, that would mean SSE2, AVX, and AVX2.
+///
+/// This dispatcher is suitable for vector operations that do not benefit from advanced hardware
+/// features (e.g. because they are done infrequently), so minimizing their contribution to code
+/// size is more important.
+#[macro_export]
+macro_rules! dispatch_light256 {
+    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
+        #[cfg(feature = "std")]
+        $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> $ret {
+            #[inline(always)]
+            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
+            use std::arch::x86_64::*;
+            #[target_feature(enable = "avx")]
+            unsafe fn impl_avx($($arg: $argty),*) -> $ret {
+                fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
+            }
+            #[target_feature(enable = "sse2")]
+            unsafe fn impl_sse2($($arg: $argty),*) -> $ret {
+                fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
+            }
+            unsafe {
+                if is_x86_feature_detected!("avx") {
+                    impl_avx($($arg),*)
+                } else if is_x86_feature_detected!("sse2") {
+                    impl_sse2($($arg),*)
+                } else {
+                    unimplemented!()
+                }
+            }
+        }
+        #[cfg(not(feature = "std"))]
+        #[inline(always)]
+        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
+            unsafe fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
+            unsafe {
+                if cfg!(target_feature = "avx2") {
+                    fn_impl($crate::x86_64::AVX2::instance(), $($arg),*)
+                } else if cfg!(target_feature = "avx") {
+                    fn_impl($crate::x86_64::AVX::instance(), $($arg),*)
+                } else if cfg!(target_feature = "sse4.1") {
+                    fn_impl($crate::x86_64::SSE41::instance(), $($arg),*)
+                } else if cfg!(target_feature = "ssse3") {
+                    fn_impl($crate::x86_64::SSSE3::instance(), $($arg),*)
+                } else {
+                    fn_impl($crate::x86_64::SSE2::instance(), $($arg),*)
+                }
+            }
+        }
+    };
+    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
+        dispatch_light256!($mach, $MTy, {
+            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
+        });
+    }
+}
--- a/zeroidc/vendor/ppv-lite86/src/x86_64/sse2.rs
+++ b/zeroidc/vendor/ppv-lite86/src/x86_64/sse2.rs