RPM build fix (reverted CI changes which will need to be un-reverted or made conditional) and vendor Rust dependencies to make builds much faster in any CI system.
This commit is contained in:
1
zeroidc/vendor/idna/.cargo-checksum.json
vendored
Normal file
1
zeroidc/vendor/idna/.cargo-checksum.json
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"files":{"Cargo.toml":"fa141dcb135262e5fda9f680671699045326d96779bb1acf38d48c70c712bcdf","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"20c7855c364d57ea4c97889a5e8d98470a9952dade37bd9248b9a54431670e5e","benches/all.rs":"e734b9c9092ed66986725f86cfe90f3756cfddb058af308b796ba494f9beefc2","src/IdnaMappingTable.txt":"87d6553a4b86bc49dcade38bf26b745cd81800eb8af295dc3fb99b4729eaea38","src/lib.rs":"d61b2bfcf4265b9a41eedd1de33ab49ea615e3c06df944321b30c57950a85342","src/make_uts46_mapping_table.py":"917055fa841f813de2bcf79cc79b595da3d5551559ee768db8660ab77cb26c34","src/punycode.rs":"07edf5293bc384a164eebb01bc18fe3d4b2d009b4565a36b74a3030978ea6e04","src/uts46.rs":"40521a01e5b8c38667252d5b1e0141c5a71f63aeae2f451b986792984e633b09","src/uts46_mapping_table.rs":"942fff78147c61da942f5f3a7ff4e90f9d7a00a29285733ac3fc3357eb2ed06f","tests/IdnaTestV2.txt":"c6f3778b0545fd150c8063286c7f5adc901e16557eddccc3751213646d07593d","tests/punycode.rs":"e6fb978f48445d1525a6b97351c41c5393a1612a35f85b9a7f45b8794fce9aba","tests/punycode_tests.json":"3d4ac0cf25984c37b9ce197f5df680a0136f728fb8ec82bc76624e42139eb3a8","tests/tests.rs":"de7425a3e4e6e871255721107803704d1431246601fa9c87105224d88dfe60d6","tests/unit.rs":"be025a7d9bab3bd1ce134c87f9d848269e157b31ca5ba0ea03426c1ac736b69e","tests/uts46.rs":"ca91d48811d366fb9e32d7aa79cfda1261b93c271b6ed7fb5535de9a2500205b"},"package":"418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8"}
|
||||
54
zeroidc/vendor/idna/Cargo.toml
vendored
Normal file
54
zeroidc/vendor/idna/Cargo.toml
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "idna"
|
||||
version = "0.2.3"
|
||||
authors = ["The rust-url developers"]
|
||||
autotests = false
|
||||
description = "IDNA (Internationalizing Domain Names in Applications) and Punycode."
|
||||
license = "MIT/Apache-2.0"
|
||||
repository = "https://github.com/servo/rust-url/"
|
||||
|
||||
[lib]
|
||||
doctest = false
|
||||
|
||||
[[test]]
|
||||
name = "tests"
|
||||
harness = false
|
||||
|
||||
[[test]]
|
||||
name = "unit"
|
||||
|
||||
[[bench]]
|
||||
name = "all"
|
||||
harness = false
|
||||
[dependencies.matches]
|
||||
version = "0.1"
|
||||
|
||||
[dependencies.unicode-bidi]
|
||||
version = "0.3"
|
||||
|
||||
[dependencies.unicode-normalization]
|
||||
version = "0.1.17"
|
||||
[dev-dependencies.assert_matches]
|
||||
version = "1.3"
|
||||
|
||||
[dev-dependencies.bencher]
|
||||
version = "0.1"
|
||||
|
||||
[dev-dependencies.rustc-test]
|
||||
version = "0.3"
|
||||
|
||||
[dev-dependencies.serde_json]
|
||||
version = "1.0"
|
||||
201
zeroidc/vendor/idna/LICENSE-APACHE
vendored
Normal file
201
zeroidc/vendor/idna/LICENSE-APACHE
vendored
Normal file
@@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
25
zeroidc/vendor/idna/LICENSE-MIT
vendored
Normal file
25
zeroidc/vendor/idna/LICENSE-MIT
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
Copyright (c) 2013-2016 The rust-url developers
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
53
zeroidc/vendor/idna/benches/all.rs
vendored
Normal file
53
zeroidc/vendor/idna/benches/all.rs
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
#[macro_use]
|
||||
extern crate bencher;
|
||||
extern crate idna;
|
||||
|
||||
use bencher::{black_box, Bencher};
|
||||
use idna::Config;
|
||||
|
||||
fn to_unicode_puny_label(bench: &mut Bencher) {
|
||||
let encoded = "abc.xn--mgbcm";
|
||||
let config = Config::default();
|
||||
bench.iter(|| config.to_unicode(black_box(encoded)));
|
||||
}
|
||||
|
||||
fn to_unicode_ascii(bench: &mut Bencher) {
|
||||
let encoded = "example.com";
|
||||
let config = Config::default();
|
||||
bench.iter(|| config.to_unicode(black_box(encoded)));
|
||||
}
|
||||
|
||||
fn to_unicode_merged_label(bench: &mut Bencher) {
|
||||
let encoded = "Beispiel.xn--vermgensberater-ctb";
|
||||
let config = Config::default();
|
||||
bench.iter(|| config.to_unicode(black_box(encoded)));
|
||||
}
|
||||
|
||||
fn to_ascii_puny_label(bench: &mut Bencher) {
|
||||
let encoded = "abc.ابج";
|
||||
let config = Config::default();
|
||||
bench.iter(|| config.to_ascii(black_box(encoded)));
|
||||
}
|
||||
|
||||
fn to_ascii_simple(bench: &mut Bencher) {
|
||||
let encoded = "example.com";
|
||||
let config = Config::default();
|
||||
bench.iter(|| config.to_ascii(black_box(encoded)));
|
||||
}
|
||||
|
||||
fn to_ascii_merged(bench: &mut Bencher) {
|
||||
let encoded = "beispiel.vermögensberater";
|
||||
let config = Config::default();
|
||||
bench.iter(|| config.to_ascii(black_box(encoded)));
|
||||
}
|
||||
|
||||
benchmark_group!(
|
||||
benches,
|
||||
to_unicode_puny_label,
|
||||
to_unicode_ascii,
|
||||
to_unicode_merged_label,
|
||||
to_ascii_puny_label,
|
||||
to_ascii_simple,
|
||||
to_ascii_merged,
|
||||
);
|
||||
benchmark_main!(benches);
|
||||
8727
zeroidc/vendor/idna/src/IdnaMappingTable.txt
vendored
Normal file
8727
zeroidc/vendor/idna/src/IdnaMappingTable.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
73
zeroidc/vendor/idna/src/lib.rs
vendored
Normal file
73
zeroidc/vendor/idna/src/lib.rs
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
// Copyright 2016 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! This Rust crate implements IDNA
|
||||
//! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna).
|
||||
//!
|
||||
//! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing*
|
||||
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
|
||||
//! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492).
|
||||
//!
|
||||
//! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction):
|
||||
//!
|
||||
//! > Initially, domain names were restricted to ASCII characters.
|
||||
//! > A system was introduced in 2003 for internationalized domain names (IDN).
|
||||
//! > This system is called Internationalizing Domain Names for Applications,
|
||||
//! > or IDNA2003 for short.
|
||||
//! > This mechanism supports IDNs by means of a client software transformation
|
||||
//! > into a format known as Punycode.
|
||||
//! > A revision of IDNA was approved in 2010 (IDNA2008).
|
||||
//! > This revision has a number of incompatibilities with IDNA2003.
|
||||
//! >
|
||||
//! > The incompatibilities force implementers of client software,
|
||||
//! > such as browsers and emailers,
|
||||
//! > to face difficult choices during the transition period
|
||||
//! > as registries shift from IDNA2003 to IDNA2008.
|
||||
//! > This document specifies a mechanism
|
||||
//! > that minimizes the impact of this transition for client software,
|
||||
//! > allowing client software to access domains that are valid under either system.
|
||||
|
||||
#[macro_use]
|
||||
extern crate matches;
|
||||
|
||||
pub mod punycode;
|
||||
mod uts46;
|
||||
|
||||
pub use crate::uts46::{Config, Errors, Idna};
|
||||
|
||||
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm.
|
||||
///
|
||||
/// Return the ASCII representation a domain name,
|
||||
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
|
||||
/// and using Punycode as necessary.
|
||||
///
|
||||
/// This process may fail.
|
||||
pub fn domain_to_ascii(domain: &str) -> Result<String, uts46::Errors> {
|
||||
Config::default().to_ascii(domain)
|
||||
}
|
||||
|
||||
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm,
|
||||
/// with the `beStrict` flag set.
|
||||
pub fn domain_to_ascii_strict(domain: &str) -> Result<String, uts46::Errors> {
|
||||
Config::default()
|
||||
.use_std3_ascii_rules(true)
|
||||
.verify_dns_length(true)
|
||||
.to_ascii(domain)
|
||||
}
|
||||
|
||||
/// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm.
|
||||
///
|
||||
/// Return the Unicode representation of a domain name,
|
||||
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
|
||||
/// and decoding Punycode as necessary.
|
||||
///
|
||||
/// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation)
|
||||
/// but always returns a string for the mapped domain.
|
||||
pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) {
|
||||
Config::default().to_unicode(domain)
|
||||
}
|
||||
185
zeroidc/vendor/idna/src/make_uts46_mapping_table.py
vendored
Normal file
185
zeroidc/vendor/idna/src/make_uts46_mapping_table.py
vendored
Normal file
@@ -0,0 +1,185 @@
|
||||
# Copyright 2013-2014 The rust-url developers.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
# Run as: python make_uts46_mapping_table.py IdnaMappingTable.txt > uts46_mapping_table.rs
|
||||
# You can get the latest idna table from
|
||||
# http://www.unicode.org/Public/idna/latest/IdnaMappingTable.txt
|
||||
|
||||
import collections
|
||||
import itertools
|
||||
|
||||
print('''\
|
||||
// Copyright 2013-2020 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// Generated by make_idna_table.py
|
||||
''')
|
||||
|
||||
txt = open("IdnaMappingTable.txt")
|
||||
|
||||
def escape_char(c):
|
||||
return "\\u{%x}" % ord(c[0])
|
||||
|
||||
def char(s):
|
||||
return chr(int(s, 16))
|
||||
|
||||
strtab = collections.OrderedDict()
|
||||
strtab_offset = 0
|
||||
|
||||
def strtab_slice(s):
|
||||
global strtab, strtab_offset
|
||||
|
||||
if s in strtab:
|
||||
return strtab[s]
|
||||
else:
|
||||
utf8_len = len(s.encode('utf8'))
|
||||
c = (strtab_offset, utf8_len)
|
||||
strtab[s] = c
|
||||
strtab_offset += utf8_len
|
||||
return c
|
||||
|
||||
def rust_slice(s):
|
||||
start = s[0]
|
||||
length = s[1]
|
||||
start_lo = start & 0xff
|
||||
start_hi = start >> 8
|
||||
assert length <= 255
|
||||
assert start_hi <= 255
|
||||
return "(StringTableSlice { byte_start_lo: %d, byte_start_hi: %d, byte_len: %d })" % (start_lo, start_hi, length)
|
||||
|
||||
ranges = []
|
||||
|
||||
for line in txt:
|
||||
# remove comments
|
||||
line, _, _ = line.partition('#')
|
||||
# skip empty lines
|
||||
if len(line.strip()) == 0:
|
||||
continue
|
||||
fields = line.split(';')
|
||||
if fields[0].strip() == 'D800..DFFF':
|
||||
continue # Surrogates don't occur in Rust strings.
|
||||
first, _, last = fields[0].strip().partition('..')
|
||||
if not last:
|
||||
last = first
|
||||
mapping = fields[1].strip().replace('_', ' ').title().replace(' ', '')
|
||||
unicode_str = None
|
||||
if len(fields) > 2:
|
||||
if fields[2].strip():
|
||||
unicode_str = u''.join(char(c) for c in fields[2].strip().split(' '))
|
||||
elif mapping == "Deviation":
|
||||
unicode_str = u''
|
||||
|
||||
if len(fields) > 3:
|
||||
assert fields[3].strip() in ('NV8', 'XV8'), fields[3]
|
||||
assert mapping == 'Valid', mapping
|
||||
mapping = 'DisallowedIdna2008'
|
||||
|
||||
ranges.append((first, last, mapping, unicode_str))
|
||||
|
||||
def mergeable_key(r):
|
||||
mapping = r[2]
|
||||
|
||||
# These types have associated data, so we should not merge them.
|
||||
if mapping in ('Mapped', 'Deviation', 'DisallowedStd3Mapped'):
|
||||
return r
|
||||
assert mapping in ('Valid', 'Ignored', 'Disallowed', 'DisallowedStd3Valid', 'DisallowedIdna2008')
|
||||
return mapping
|
||||
|
||||
grouped_ranges = itertools.groupby(ranges, key=mergeable_key)
|
||||
|
||||
optimized_ranges = []
|
||||
|
||||
for (k, g) in grouped_ranges:
|
||||
group = list(g)
|
||||
if len(group) == 1:
|
||||
optimized_ranges.append(group[0])
|
||||
continue
|
||||
# Assert that nothing in the group has an associated unicode string.
|
||||
for g in group:
|
||||
if g[3] is not None and len(g[3]) > 2:
|
||||
assert not g[3][2].strip()
|
||||
# Assert that consecutive members of the group don't leave gaps in
|
||||
# the codepoint space.
|
||||
a, b = itertools.tee(group)
|
||||
next(b, None)
|
||||
for (g1, g2) in zip(a, b):
|
||||
last_char = int(g1[1], 16)
|
||||
next_char = int(g2[0], 16)
|
||||
if last_char + 1 == next_char:
|
||||
continue
|
||||
# There's a gap where surrogates would appear, but we don't have to
|
||||
# worry about that gap, as surrogates never appear in Rust strings.
|
||||
# Assert we're seeing the surrogate case here.
|
||||
assert last_char == 0xd7ff
|
||||
assert next_char == 0xe000
|
||||
optimized_ranges.append((group[0][0], group[-1][1]) + group[0][2:])
|
||||
|
||||
def is_single_char_range(r):
|
||||
(first, last, _, _) = r
|
||||
return first == last
|
||||
|
||||
# We can reduce the size of the character range table and the index table to about 1/4
|
||||
# by merging runs of single character ranges and using character offsets from the start
|
||||
# of that range to retrieve the correct `Mapping` value
|
||||
def merge_single_char_ranges(ranges):
|
||||
current = []
|
||||
for r in ranges:
|
||||
if not current or is_single_char_range(current[-1]) and is_single_char_range(r):
|
||||
current.append(r)
|
||||
continue
|
||||
if len(current) != 0:
|
||||
ret = current
|
||||
current = [r]
|
||||
yield ret
|
||||
continue
|
||||
current.append(r)
|
||||
ret = current
|
||||
current = []
|
||||
yield ret
|
||||
yield current
|
||||
|
||||
optimized_ranges = list(merge_single_char_ranges(optimized_ranges))
|
||||
|
||||
SINGLE_MARKER = 1 << 15
|
||||
|
||||
print("static TABLE: &[(char, u16)] = &[")
|
||||
|
||||
offset = 0
|
||||
for ranges in optimized_ranges:
|
||||
assert offset < SINGLE_MARKER
|
||||
|
||||
block_len = len(ranges)
|
||||
single = SINGLE_MARKER if block_len == 1 else 0
|
||||
index = offset | single
|
||||
offset += block_len
|
||||
|
||||
start = escape_char(char(ranges[0][0]))
|
||||
print(" ('%s', %s)," % (start, index))
|
||||
|
||||
print("];\n")
|
||||
|
||||
print("static MAPPING_TABLE: &[Mapping] = &[")
|
||||
|
||||
for ranges in optimized_ranges:
|
||||
for (first, last, mapping, unicode_str) in ranges:
|
||||
if unicode_str is not None:
|
||||
mapping += rust_slice(strtab_slice(unicode_str))
|
||||
print(" %s," % mapping)
|
||||
|
||||
print("];\n")
|
||||
|
||||
def escape_str(s):
|
||||
return [escape_char(c) for c in s]
|
||||
|
||||
print("static STRING_TABLE: &str = \"%s\";"
|
||||
% '\\\n '.join(itertools.chain(*[escape_str(s) for s in strtab.keys()])))
|
||||
315
zeroidc/vendor/idna/src/punycode.rs
vendored
Normal file
315
zeroidc/vendor/idna/src/punycode.rs
vendored
Normal file
@@ -0,0 +1,315 @@
|
||||
// Copyright 2013 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! Punycode ([RFC 3492](http://tools.ietf.org/html/rfc3492)) implementation.
|
||||
//!
|
||||
//! Since Punycode fundamentally works on unicode code points,
|
||||
//! `encode` and `decode` take and return slices and vectors of `char`.
|
||||
//! `encode_str` and `decode_to_string` provide convenience wrappers
|
||||
//! that convert from and to Rust’s UTF-8 based `str` and `String` types.
|
||||
|
||||
use std::char;
|
||||
use std::u32;
|
||||
|
||||
// Bootstring parameters for Punycode
|
||||
static BASE: u32 = 36;
|
||||
static T_MIN: u32 = 1;
|
||||
static T_MAX: u32 = 26;
|
||||
static SKEW: u32 = 38;
|
||||
static DAMP: u32 = 700;
|
||||
static INITIAL_BIAS: u32 = 72;
|
||||
static INITIAL_N: u32 = 0x80;
|
||||
static DELIMITER: char = '-';
|
||||
|
||||
#[inline]
|
||||
fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 {
|
||||
delta /= if first_time { DAMP } else { 2 };
|
||||
delta += delta / num_points;
|
||||
let mut k = 0;
|
||||
while delta > ((BASE - T_MIN) * T_MAX) / 2 {
|
||||
delta /= BASE - T_MIN;
|
||||
k += BASE;
|
||||
}
|
||||
k + (((BASE - T_MIN + 1) * delta) / (delta + SKEW))
|
||||
}
|
||||
|
||||
/// Convert Punycode to an Unicode `String`.
|
||||
///
|
||||
/// This is a convenience wrapper around `decode`.
|
||||
#[inline]
|
||||
pub fn decode_to_string(input: &str) -> Option<String> {
|
||||
decode(input).map(|chars| chars.into_iter().collect())
|
||||
}
|
||||
|
||||
/// Convert Punycode to Unicode.
|
||||
///
|
||||
/// Return None on malformed input or overflow.
|
||||
/// Overflow can only happen on inputs that take more than
|
||||
/// 63 encoded bytes, the DNS limit on domain name labels.
|
||||
pub fn decode(input: &str) -> Option<Vec<char>> {
|
||||
Some(Decoder::default().decode(input).ok()?.collect())
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct Decoder {
|
||||
insertions: Vec<(usize, char)>,
|
||||
}
|
||||
|
||||
impl Decoder {
|
||||
/// Split the input iterator and return a Vec with insertions of encoded characters
|
||||
pub(crate) fn decode<'a>(&'a mut self, input: &'a str) -> Result<Decode<'a>, ()> {
|
||||
self.insertions.clear();
|
||||
// Handle "basic" (ASCII) code points.
|
||||
// They are encoded as-is before the last delimiter, if any.
|
||||
let (base, input) = match input.rfind(DELIMITER) {
|
||||
None => ("", input),
|
||||
Some(position) => (
|
||||
&input[..position],
|
||||
if position > 0 {
|
||||
&input[position + 1..]
|
||||
} else {
|
||||
input
|
||||
},
|
||||
),
|
||||
};
|
||||
|
||||
if !base.is_ascii() {
|
||||
return Err(());
|
||||
}
|
||||
|
||||
let base_len = base.len();
|
||||
let mut length = base_len as u32;
|
||||
let mut code_point = INITIAL_N;
|
||||
let mut bias = INITIAL_BIAS;
|
||||
let mut i = 0;
|
||||
let mut iter = input.bytes();
|
||||
loop {
|
||||
let previous_i = i;
|
||||
let mut weight = 1;
|
||||
let mut k = BASE;
|
||||
let mut byte = match iter.next() {
|
||||
None => break,
|
||||
Some(byte) => byte,
|
||||
};
|
||||
|
||||
// Decode a generalized variable-length integer into delta,
|
||||
// which gets added to i.
|
||||
loop {
|
||||
let digit = match byte {
|
||||
byte @ b'0'..=b'9' => byte - b'0' + 26,
|
||||
byte @ b'A'..=b'Z' => byte - b'A',
|
||||
byte @ b'a'..=b'z' => byte - b'a',
|
||||
_ => return Err(()),
|
||||
} as u32;
|
||||
if digit > (u32::MAX - i) / weight {
|
||||
return Err(()); // Overflow
|
||||
}
|
||||
i += digit * weight;
|
||||
let t = if k <= bias {
|
||||
T_MIN
|
||||
} else if k >= bias + T_MAX {
|
||||
T_MAX
|
||||
} else {
|
||||
k - bias
|
||||
};
|
||||
if digit < t {
|
||||
break;
|
||||
}
|
||||
if weight > u32::MAX / (BASE - t) {
|
||||
return Err(()); // Overflow
|
||||
}
|
||||
weight *= BASE - t;
|
||||
k += BASE;
|
||||
byte = match iter.next() {
|
||||
None => return Err(()), // End of input before the end of this delta
|
||||
Some(byte) => byte,
|
||||
};
|
||||
}
|
||||
|
||||
bias = adapt(i - previous_i, length + 1, previous_i == 0);
|
||||
if i / (length + 1) > u32::MAX - code_point {
|
||||
return Err(()); // Overflow
|
||||
}
|
||||
|
||||
// i was supposed to wrap around from length+1 to 0,
|
||||
// incrementing code_point each time.
|
||||
code_point += i / (length + 1);
|
||||
i %= length + 1;
|
||||
let c = match char::from_u32(code_point) {
|
||||
Some(c) => c,
|
||||
None => return Err(()),
|
||||
};
|
||||
|
||||
// Move earlier insertions farther out in the string
|
||||
for (idx, _) in &mut self.insertions {
|
||||
if *idx >= i as usize {
|
||||
*idx += 1;
|
||||
}
|
||||
}
|
||||
self.insertions.push((i as usize, c));
|
||||
length += 1;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
self.insertions.sort_by_key(|(i, _)| *i);
|
||||
Ok(Decode {
|
||||
base: base.chars(),
|
||||
insertions: &self.insertions,
|
||||
inserted: 0,
|
||||
position: 0,
|
||||
len: base_len + self.insertions.len(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct Decode<'a> {
|
||||
base: std::str::Chars<'a>,
|
||||
pub(crate) insertions: &'a [(usize, char)],
|
||||
inserted: usize,
|
||||
position: usize,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Decode<'a> {
|
||||
type Item = char;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
match self.insertions.get(self.inserted) {
|
||||
Some((pos, c)) if *pos == self.position => {
|
||||
self.inserted += 1;
|
||||
self.position += 1;
|
||||
return Some(*c);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
if let Some(c) = self.base.next() {
|
||||
self.position += 1;
|
||||
return Some(c);
|
||||
} else if self.inserted >= self.insertions.len() {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let len = self.len - self.position;
|
||||
(len, Some(len))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ExactSizeIterator for Decode<'a> {
|
||||
fn len(&self) -> usize {
|
||||
self.len - self.position
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an Unicode `str` to Punycode.
|
||||
///
|
||||
/// This is a convenience wrapper around `encode`.
|
||||
#[inline]
|
||||
pub fn encode_str(input: &str) -> Option<String> {
|
||||
let mut buf = String::with_capacity(input.len());
|
||||
encode_into(input.chars(), &mut buf).ok().map(|()| buf)
|
||||
}
|
||||
|
||||
/// Convert Unicode to Punycode.
|
||||
///
|
||||
/// Return None on overflow, which can only happen on inputs that would take more than
|
||||
/// 63 encoded bytes, the DNS limit on domain name labels.
|
||||
pub fn encode(input: &[char]) -> Option<String> {
|
||||
let mut buf = String::with_capacity(input.len());
|
||||
encode_into(input.iter().copied(), &mut buf)
|
||||
.ok()
|
||||
.map(|()| buf)
|
||||
}
|
||||
|
||||
pub(crate) fn encode_into<I>(input: I, output: &mut String) -> Result<(), ()>
|
||||
where
|
||||
I: Iterator<Item = char> + Clone,
|
||||
{
|
||||
// Handle "basic" (ASCII) code points. They are encoded as-is.
|
||||
let (mut input_length, mut basic_length) = (0, 0);
|
||||
for c in input.clone() {
|
||||
input_length += 1;
|
||||
if c.is_ascii() {
|
||||
output.push(c);
|
||||
basic_length += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if basic_length > 0 {
|
||||
output.push('-')
|
||||
}
|
||||
let mut code_point = INITIAL_N;
|
||||
let mut delta = 0;
|
||||
let mut bias = INITIAL_BIAS;
|
||||
let mut processed = basic_length;
|
||||
while processed < input_length {
|
||||
// All code points < code_point have been handled already.
|
||||
// Find the next larger one.
|
||||
let min_code_point = input
|
||||
.clone()
|
||||
.map(|c| c as u32)
|
||||
.filter(|&c| c >= code_point)
|
||||
.min()
|
||||
.unwrap();
|
||||
if min_code_point - code_point > (u32::MAX - delta) / (processed + 1) {
|
||||
return Err(()); // Overflow
|
||||
}
|
||||
// Increase delta to advance the decoder’s <code_point,i> state to <min_code_point,0>
|
||||
delta += (min_code_point - code_point) * (processed + 1);
|
||||
code_point = min_code_point;
|
||||
for c in input.clone() {
|
||||
let c = c as u32;
|
||||
if c < code_point {
|
||||
delta += 1;
|
||||
if delta == 0 {
|
||||
return Err(()); // Overflow
|
||||
}
|
||||
}
|
||||
if c == code_point {
|
||||
// Represent delta as a generalized variable-length integer:
|
||||
let mut q = delta;
|
||||
let mut k = BASE;
|
||||
loop {
|
||||
let t = if k <= bias {
|
||||
T_MIN
|
||||
} else if k >= bias + T_MAX {
|
||||
T_MAX
|
||||
} else {
|
||||
k - bias
|
||||
};
|
||||
if q < t {
|
||||
break;
|
||||
}
|
||||
let value = t + ((q - t) % (BASE - t));
|
||||
output.push(value_to_digit(value));
|
||||
q = (q - t) / (BASE - t);
|
||||
k += BASE;
|
||||
}
|
||||
output.push(value_to_digit(q));
|
||||
bias = adapt(delta, processed + 1, processed == basic_length);
|
||||
delta = 0;
|
||||
processed += 1;
|
||||
}
|
||||
}
|
||||
delta += 1;
|
||||
code_point += 1;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn value_to_digit(value: u32) -> char {
|
||||
match value {
|
||||
0..=25 => (value as u8 + b'a') as char, // a..z
|
||||
26..=35 => (value as u8 - 26 + b'0') as char, // 0..9
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
727
zeroidc/vendor/idna/src/uts46.rs
vendored
Normal file
727
zeroidc/vendor/idna/src/uts46.rs
vendored
Normal file
@@ -0,0 +1,727 @@
|
||||
// Copyright 2013-2014 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! [*Unicode IDNA Compatibility Processing*
|
||||
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
|
||||
|
||||
use self::Mapping::*;
|
||||
use crate::punycode;
|
||||
use std::{error::Error as StdError, fmt};
|
||||
use unicode_bidi::{bidi_class, BidiClass};
|
||||
use unicode_normalization::char::is_combining_mark;
|
||||
use unicode_normalization::{is_nfc, UnicodeNormalization};
|
||||
|
||||
include!("uts46_mapping_table.rs");
|
||||
|
||||
const PUNYCODE_PREFIX: &str = "xn--";
|
||||
|
||||
#[derive(Debug)]
|
||||
struct StringTableSlice {
|
||||
// Store these as separate fields so the structure will have an
|
||||
// alignment of 1 and thus pack better into the Mapping enum, below.
|
||||
byte_start_lo: u8,
|
||||
byte_start_hi: u8,
|
||||
byte_len: u8,
|
||||
}
|
||||
|
||||
fn decode_slice(slice: &StringTableSlice) -> &'static str {
|
||||
let lo = slice.byte_start_lo as usize;
|
||||
let hi = slice.byte_start_hi as usize;
|
||||
let start = (hi << 8) | lo;
|
||||
let len = slice.byte_len as usize;
|
||||
&STRING_TABLE[start..(start + len)]
|
||||
}
|
||||
|
||||
#[repr(u8)]
|
||||
#[derive(Debug)]
|
||||
enum Mapping {
|
||||
Valid,
|
||||
Ignored,
|
||||
Mapped(StringTableSlice),
|
||||
Deviation(StringTableSlice),
|
||||
Disallowed,
|
||||
DisallowedStd3Valid,
|
||||
DisallowedStd3Mapped(StringTableSlice),
|
||||
DisallowedIdna2008,
|
||||
}
|
||||
|
||||
fn find_char(codepoint: char) -> &'static Mapping {
|
||||
let idx = match TABLE.binary_search_by_key(&codepoint, |&val| val.0) {
|
||||
Ok(idx) => idx,
|
||||
Err(idx) => idx - 1,
|
||||
};
|
||||
|
||||
const SINGLE_MARKER: u16 = 1 << 15;
|
||||
|
||||
let (base, x) = TABLE[idx];
|
||||
let single = (x & SINGLE_MARKER) != 0;
|
||||
let offset = !SINGLE_MARKER & x;
|
||||
|
||||
if single {
|
||||
&MAPPING_TABLE[offset as usize]
|
||||
} else {
|
||||
&MAPPING_TABLE[(offset + (codepoint as u16 - base as u16)) as usize]
|
||||
}
|
||||
}
|
||||
|
||||
struct Mapper<'a> {
|
||||
chars: std::str::Chars<'a>,
|
||||
config: Config,
|
||||
errors: &'a mut Errors,
|
||||
slice: Option<std::str::Chars<'static>>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Mapper<'a> {
|
||||
type Item = char;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
if let Some(s) = &mut self.slice {
|
||||
match s.next() {
|
||||
Some(c) => return Some(c),
|
||||
None => {
|
||||
self.slice = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let codepoint = self.chars.next()?;
|
||||
if let '.' | '-' | 'a'..='z' | '0'..='9' = codepoint {
|
||||
return Some(codepoint);
|
||||
}
|
||||
|
||||
return Some(match *find_char(codepoint) {
|
||||
Mapping::Valid => codepoint,
|
||||
Mapping::Ignored => continue,
|
||||
Mapping::Mapped(ref slice) => {
|
||||
self.slice = Some(decode_slice(slice).chars());
|
||||
continue;
|
||||
}
|
||||
Mapping::Deviation(ref slice) => {
|
||||
if self.config.transitional_processing {
|
||||
self.slice = Some(decode_slice(slice).chars());
|
||||
continue;
|
||||
} else {
|
||||
codepoint
|
||||
}
|
||||
}
|
||||
Mapping::Disallowed => {
|
||||
self.errors.disallowed_character = true;
|
||||
codepoint
|
||||
}
|
||||
Mapping::DisallowedStd3Valid => {
|
||||
if self.config.use_std3_ascii_rules {
|
||||
self.errors.disallowed_by_std3_ascii_rules = true;
|
||||
};
|
||||
codepoint
|
||||
}
|
||||
Mapping::DisallowedStd3Mapped(ref slice) => {
|
||||
if self.config.use_std3_ascii_rules {
|
||||
self.errors.disallowed_mapped_in_std3 = true;
|
||||
};
|
||||
self.slice = Some(decode_slice(slice).chars());
|
||||
continue;
|
||||
}
|
||||
Mapping::DisallowedIdna2008 => {
|
||||
if self.config.use_idna_2008_rules {
|
||||
self.errors.disallowed_in_idna_2008 = true;
|
||||
}
|
||||
codepoint
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// http://tools.ietf.org/html/rfc5893#section-2
|
||||
fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
|
||||
// Rule 0: Bidi Rules apply to Bidi Domain Names: a name with at least one RTL label. A label
|
||||
// is RTL if it contains at least one character of bidi class R, AL or AN.
|
||||
if !is_bidi_domain {
|
||||
return true;
|
||||
}
|
||||
|
||||
let mut chars = label.chars();
|
||||
let first_char_class = match chars.next() {
|
||||
Some(c) => bidi_class(c),
|
||||
None => return true, // empty string
|
||||
};
|
||||
|
||||
match first_char_class {
|
||||
// LTR label
|
||||
BidiClass::L => {
|
||||
// Rule 5
|
||||
while let Some(c) = chars.next() {
|
||||
if !matches!(
|
||||
bidi_class(c),
|
||||
BidiClass::L
|
||||
| BidiClass::EN
|
||||
| BidiClass::ES
|
||||
| BidiClass::CS
|
||||
| BidiClass::ET
|
||||
| BidiClass::ON
|
||||
| BidiClass::BN
|
||||
| BidiClass::NSM
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Rule 6
|
||||
// must end in L or EN followed by 0 or more NSM
|
||||
let mut rev_chars = label.chars().rev();
|
||||
let mut last_non_nsm = rev_chars.next();
|
||||
loop {
|
||||
match last_non_nsm {
|
||||
Some(c) if bidi_class(c) == BidiClass::NSM => {
|
||||
last_non_nsm = rev_chars.next();
|
||||
continue;
|
||||
}
|
||||
_ => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
match last_non_nsm {
|
||||
Some(c) if bidi_class(c) == BidiClass::L || bidi_class(c) == BidiClass::EN => {}
|
||||
Some(_) => {
|
||||
return false;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// RTL label
|
||||
BidiClass::R | BidiClass::AL => {
|
||||
let mut found_en = false;
|
||||
let mut found_an = false;
|
||||
|
||||
// Rule 2
|
||||
for c in chars {
|
||||
let char_class = bidi_class(c);
|
||||
if char_class == BidiClass::EN {
|
||||
found_en = true;
|
||||
} else if char_class == BidiClass::AN {
|
||||
found_an = true;
|
||||
}
|
||||
|
||||
if !matches!(
|
||||
char_class,
|
||||
BidiClass::R
|
||||
| BidiClass::AL
|
||||
| BidiClass::AN
|
||||
| BidiClass::EN
|
||||
| BidiClass::ES
|
||||
| BidiClass::CS
|
||||
| BidiClass::ET
|
||||
| BidiClass::ON
|
||||
| BidiClass::BN
|
||||
| BidiClass::NSM
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Rule 3
|
||||
let mut rev_chars = label.chars().rev();
|
||||
let mut last = rev_chars.next();
|
||||
loop {
|
||||
// must end in L or EN followed by 0 or more NSM
|
||||
match last {
|
||||
Some(c) if bidi_class(c) == BidiClass::NSM => {
|
||||
last = rev_chars.next();
|
||||
continue;
|
||||
}
|
||||
_ => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
match last {
|
||||
Some(c)
|
||||
if matches!(
|
||||
bidi_class(c),
|
||||
BidiClass::R | BidiClass::AL | BidiClass::EN | BidiClass::AN
|
||||
) => {}
|
||||
_ => {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Rule 4
|
||||
if found_an && found_en {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Rule 1: Should start with L or R/AL
|
||||
_ => {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Check the validity criteria for the given label
|
||||
///
|
||||
/// V1 (NFC) and V8 (Bidi) are checked inside `processing()` to prevent doing duplicate work.
|
||||
///
|
||||
/// http://www.unicode.org/reports/tr46/#Validity_Criteria
|
||||
fn check_validity(label: &str, config: Config, errors: &mut Errors) {
|
||||
let first_char = label.chars().next();
|
||||
if first_char == None {
|
||||
// Empty string, pass
|
||||
return;
|
||||
}
|
||||
|
||||
// V2: No U+002D HYPHEN-MINUS in both third and fourth positions.
|
||||
//
|
||||
// NOTE: Spec says that the label must not contain a HYPHEN-MINUS character in both the
|
||||
// third and fourth positions. But nobody follows this criteria. See the spec issue below:
|
||||
// https://github.com/whatwg/url/issues/53
|
||||
|
||||
// V3: neither begin nor end with a U+002D HYPHEN-MINUS
|
||||
if config.check_hyphens && (label.starts_with('-') || label.ends_with('-')) {
|
||||
errors.check_hyphens = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// V4: not contain a U+002E FULL STOP
|
||||
//
|
||||
// Here, label can't contain '.' since the input is from .split('.')
|
||||
|
||||
// V5: not begin with a GC=Mark
|
||||
if is_combining_mark(first_char.unwrap()) {
|
||||
errors.start_combining_mark = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// V6: Check against Mapping Table
|
||||
if label.chars().any(|c| match *find_char(c) {
|
||||
Mapping::Valid | Mapping::DisallowedIdna2008 => false,
|
||||
Mapping::Deviation(_) => config.transitional_processing,
|
||||
Mapping::DisallowedStd3Valid => config.use_std3_ascii_rules,
|
||||
_ => true,
|
||||
}) {
|
||||
errors.invalid_mapping = true;
|
||||
}
|
||||
|
||||
// V7: ContextJ rules
|
||||
//
|
||||
// TODO: Implement rules and add *CheckJoiners* flag.
|
||||
|
||||
// V8: Bidi rules are checked inside `processing()`
|
||||
}
|
||||
|
||||
/// http://www.unicode.org/reports/tr46/#Processing
|
||||
#[allow(clippy::manual_strip)] // introduced in 1.45, MSRV is 1.36
|
||||
fn processing(
|
||||
domain: &str,
|
||||
config: Config,
|
||||
normalized: &mut String,
|
||||
output: &mut String,
|
||||
) -> Errors {
|
||||
// Weed out the simple cases: only allow all lowercase ASCII characters and digits where none
|
||||
// of the labels start with PUNYCODE_PREFIX and labels don't start or end with hyphen.
|
||||
let (mut prev, mut simple, mut puny_prefix) = ('?', !domain.is_empty(), 0);
|
||||
for c in domain.chars() {
|
||||
if c == '.' {
|
||||
if prev == '-' {
|
||||
simple = false;
|
||||
break;
|
||||
}
|
||||
puny_prefix = 0;
|
||||
continue;
|
||||
} else if puny_prefix == 0 && c == '-' {
|
||||
simple = false;
|
||||
break;
|
||||
} else if puny_prefix < 5 {
|
||||
if c == ['x', 'n', '-', '-'][puny_prefix] {
|
||||
puny_prefix += 1;
|
||||
if puny_prefix == 4 {
|
||||
simple = false;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
puny_prefix = 5;
|
||||
}
|
||||
}
|
||||
if !c.is_ascii_lowercase() && !c.is_ascii_digit() {
|
||||
simple = false;
|
||||
break;
|
||||
}
|
||||
prev = c;
|
||||
}
|
||||
|
||||
if simple {
|
||||
output.push_str(domain);
|
||||
return Errors::default();
|
||||
}
|
||||
|
||||
normalized.clear();
|
||||
let mut errors = Errors::default();
|
||||
let offset = output.len();
|
||||
|
||||
let iter = Mapper {
|
||||
chars: domain.chars(),
|
||||
config,
|
||||
errors: &mut errors,
|
||||
slice: None,
|
||||
};
|
||||
|
||||
normalized.extend(iter.nfc());
|
||||
|
||||
let mut decoder = punycode::Decoder::default();
|
||||
let non_transitional = config.transitional_processing(false);
|
||||
let (mut first, mut has_bidi_labels) = (true, false);
|
||||
for label in normalized.split('.') {
|
||||
if !first {
|
||||
output.push('.');
|
||||
}
|
||||
first = false;
|
||||
if label.starts_with(PUNYCODE_PREFIX) {
|
||||
match decoder.decode(&label[PUNYCODE_PREFIX.len()..]) {
|
||||
Ok(decode) => {
|
||||
let start = output.len();
|
||||
output.extend(decode);
|
||||
let decoded_label = &output[start..];
|
||||
|
||||
if !has_bidi_labels {
|
||||
has_bidi_labels |= is_bidi_domain(decoded_label);
|
||||
}
|
||||
|
||||
if !errors.is_err() {
|
||||
if !is_nfc(&decoded_label) {
|
||||
errors.nfc = true;
|
||||
} else {
|
||||
check_validity(decoded_label, non_transitional, &mut errors);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(()) => {
|
||||
has_bidi_labels = true;
|
||||
errors.punycode = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if !has_bidi_labels {
|
||||
has_bidi_labels |= is_bidi_domain(label);
|
||||
}
|
||||
|
||||
// `normalized` is already `NFC` so we can skip that check
|
||||
check_validity(label, config, &mut errors);
|
||||
output.push_str(label)
|
||||
}
|
||||
}
|
||||
|
||||
for label in output[offset..].split('.') {
|
||||
// V8: Bidi rules
|
||||
//
|
||||
// TODO: Add *CheckBidi* flag
|
||||
if !passes_bidi(label, has_bidi_labels) {
|
||||
errors.check_bidi = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Idna {
|
||||
config: Config,
|
||||
normalized: String,
|
||||
output: String,
|
||||
}
|
||||
|
||||
impl Idna {
|
||||
pub fn new(config: Config) -> Self {
|
||||
Self {
|
||||
config,
|
||||
normalized: String::new(),
|
||||
output: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// http://www.unicode.org/reports/tr46/#ToASCII
|
||||
#[allow(clippy::wrong_self_convention)]
|
||||
pub fn to_ascii<'a>(&'a mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
|
||||
let mut errors = processing(domain, self.config, &mut self.normalized, &mut self.output);
|
||||
|
||||
let mut first = true;
|
||||
for label in self.output.split('.') {
|
||||
if !first {
|
||||
out.push('.');
|
||||
}
|
||||
first = false;
|
||||
|
||||
if label.is_ascii() {
|
||||
out.push_str(label);
|
||||
} else {
|
||||
let offset = out.len();
|
||||
out.push_str(PUNYCODE_PREFIX);
|
||||
if let Err(()) = punycode::encode_into(label.chars(), out) {
|
||||
errors.punycode = true;
|
||||
out.truncate(offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.config.verify_dns_length {
|
||||
let domain = if out.ends_with('.') {
|
||||
&out[..out.len() - 1]
|
||||
} else {
|
||||
&*out
|
||||
};
|
||||
if domain.is_empty() || domain.split('.').any(|label| label.is_empty()) {
|
||||
errors.too_short_for_dns = true;
|
||||
}
|
||||
if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) {
|
||||
errors.too_long_for_dns = true;
|
||||
}
|
||||
}
|
||||
|
||||
errors.into()
|
||||
}
|
||||
|
||||
/// http://www.unicode.org/reports/tr46/#ToUnicode
|
||||
#[allow(clippy::wrong_self_convention)]
|
||||
pub fn to_unicode<'a>(&'a mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
|
||||
processing(domain, self.config, &mut self.normalized, out).into()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Config {
|
||||
use_std3_ascii_rules: bool,
|
||||
transitional_processing: bool,
|
||||
verify_dns_length: bool,
|
||||
check_hyphens: bool,
|
||||
use_idna_2008_rules: bool,
|
||||
}
|
||||
|
||||
/// The defaults are that of https://url.spec.whatwg.org/#idna
|
||||
impl Default for Config {
|
||||
fn default() -> Self {
|
||||
Config {
|
||||
use_std3_ascii_rules: false,
|
||||
transitional_processing: false,
|
||||
check_hyphens: false,
|
||||
// check_bidi: true,
|
||||
// check_joiners: true,
|
||||
|
||||
// Only use for to_ascii, not to_unicode
|
||||
verify_dns_length: false,
|
||||
use_idna_2008_rules: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
#[inline]
|
||||
pub fn use_std3_ascii_rules(mut self, value: bool) -> Self {
|
||||
self.use_std3_ascii_rules = value;
|
||||
self
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn transitional_processing(mut self, value: bool) -> Self {
|
||||
self.transitional_processing = value;
|
||||
self
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn verify_dns_length(mut self, value: bool) -> Self {
|
||||
self.verify_dns_length = value;
|
||||
self
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn check_hyphens(mut self, value: bool) -> Self {
|
||||
self.check_hyphens = value;
|
||||
self
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn use_idna_2008_rules(mut self, value: bool) -> Self {
|
||||
self.use_idna_2008_rules = value;
|
||||
self
|
||||
}
|
||||
|
||||
/// http://www.unicode.org/reports/tr46/#ToASCII
|
||||
pub fn to_ascii(self, domain: &str) -> Result<String, Errors> {
|
||||
let mut result = String::new();
|
||||
let mut codec = Idna::new(self);
|
||||
codec.to_ascii(domain, &mut result).map(|()| result)
|
||||
}
|
||||
|
||||
/// http://www.unicode.org/reports/tr46/#ToUnicode
|
||||
pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) {
|
||||
let mut codec = Idna::new(self);
|
||||
let mut out = String::with_capacity(domain.len());
|
||||
let result = codec.to_unicode(domain, &mut out);
|
||||
(out, result)
|
||||
}
|
||||
}
|
||||
|
||||
fn is_bidi_domain(s: &str) -> bool {
|
||||
for c in s.chars() {
|
||||
if c.is_ascii_graphic() {
|
||||
continue;
|
||||
}
|
||||
match bidi_class(c) {
|
||||
BidiClass::R | BidiClass::AL | BidiClass::AN => return true,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Errors recorded during UTS #46 processing.
|
||||
///
|
||||
/// This is opaque for now, indicating what types of errors have been encountered at least once.
|
||||
/// More details may be exposed in the future.
|
||||
#[derive(Default)]
|
||||
pub struct Errors {
|
||||
punycode: bool,
|
||||
check_hyphens: bool,
|
||||
check_bidi: bool,
|
||||
start_combining_mark: bool,
|
||||
invalid_mapping: bool,
|
||||
nfc: bool,
|
||||
disallowed_by_std3_ascii_rules: bool,
|
||||
disallowed_mapped_in_std3: bool,
|
||||
disallowed_character: bool,
|
||||
too_long_for_dns: bool,
|
||||
too_short_for_dns: bool,
|
||||
disallowed_in_idna_2008: bool,
|
||||
}
|
||||
|
||||
impl Errors {
|
||||
fn is_err(&self) -> bool {
|
||||
let Errors {
|
||||
punycode,
|
||||
check_hyphens,
|
||||
check_bidi,
|
||||
start_combining_mark,
|
||||
invalid_mapping,
|
||||
nfc,
|
||||
disallowed_by_std3_ascii_rules,
|
||||
disallowed_mapped_in_std3,
|
||||
disallowed_character,
|
||||
too_long_for_dns,
|
||||
too_short_for_dns,
|
||||
disallowed_in_idna_2008,
|
||||
} = *self;
|
||||
punycode
|
||||
|| check_hyphens
|
||||
|| check_bidi
|
||||
|| start_combining_mark
|
||||
|| invalid_mapping
|
||||
|| nfc
|
||||
|| disallowed_by_std3_ascii_rules
|
||||
|| disallowed_mapped_in_std3
|
||||
|| disallowed_character
|
||||
|| too_long_for_dns
|
||||
|| too_short_for_dns
|
||||
|| disallowed_in_idna_2008
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Errors {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let Errors {
|
||||
punycode,
|
||||
check_hyphens,
|
||||
check_bidi,
|
||||
start_combining_mark,
|
||||
invalid_mapping,
|
||||
nfc,
|
||||
disallowed_by_std3_ascii_rules,
|
||||
disallowed_mapped_in_std3,
|
||||
disallowed_character,
|
||||
too_long_for_dns,
|
||||
too_short_for_dns,
|
||||
disallowed_in_idna_2008,
|
||||
} = *self;
|
||||
|
||||
let fields = [
|
||||
("punycode", punycode),
|
||||
("check_hyphens", check_hyphens),
|
||||
("check_bidi", check_bidi),
|
||||
("start_combining_mark", start_combining_mark),
|
||||
("invalid_mapping", invalid_mapping),
|
||||
("nfc", nfc),
|
||||
(
|
||||
"disallowed_by_std3_ascii_rules",
|
||||
disallowed_by_std3_ascii_rules,
|
||||
),
|
||||
("disallowed_mapped_in_std3", disallowed_mapped_in_std3),
|
||||
("disallowed_character", disallowed_character),
|
||||
("too_long_for_dns", too_long_for_dns),
|
||||
("too_short_for_dns", too_short_for_dns),
|
||||
("disallowed_in_idna_2008", disallowed_in_idna_2008),
|
||||
];
|
||||
|
||||
let mut empty = true;
|
||||
f.write_str("Errors { ")?;
|
||||
for (name, val) in &fields {
|
||||
if *val {
|
||||
if !empty {
|
||||
f.write_str(", ")?;
|
||||
}
|
||||
f.write_str(*name)?;
|
||||
empty = false;
|
||||
}
|
||||
}
|
||||
|
||||
if !empty {
|
||||
f.write_str(" }")
|
||||
} else {
|
||||
f.write_str("}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Errors> for Result<(), Errors> {
|
||||
fn from(e: Errors) -> Result<(), Errors> {
|
||||
if !e.is_err() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl StdError for Errors {}
|
||||
|
||||
impl fmt::Display for Errors {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{find_char, Mapping};
|
||||
|
||||
#[test]
|
||||
fn mapping_fast_path() {
|
||||
assert_matches!(find_char('-'), &Mapping::Valid);
|
||||
assert_matches!(find_char('.'), &Mapping::Valid);
|
||||
for c in &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] {
|
||||
assert_matches!(find_char(*c), &Mapping::Valid);
|
||||
}
|
||||
for c in &[
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
|
||||
'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
] {
|
||||
assert_matches!(find_char(*c), &Mapping::Valid);
|
||||
}
|
||||
}
|
||||
}
|
||||
15256
zeroidc/vendor/idna/src/uts46_mapping_table.rs
vendored
Normal file
15256
zeroidc/vendor/idna/src/uts46_mapping_table.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6334
zeroidc/vendor/idna/tests/IdnaTestV2.txt
vendored
Normal file
6334
zeroidc/vendor/idna/tests/IdnaTestV2.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
77
zeroidc/vendor/idna/tests/punycode.rs
vendored
Normal file
77
zeroidc/vendor/idna/tests/punycode.rs
vendored
Normal file
@@ -0,0 +1,77 @@
|
||||
// Copyright 2013 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use crate::test::TestFn;
|
||||
use idna::punycode::{decode, encode_str};
|
||||
use serde_json::map::Map;
|
||||
use serde_json::Value;
|
||||
use std::str::FromStr;
|
||||
|
||||
fn one_test(decoded: &str, encoded: &str) {
|
||||
match decode(encoded) {
|
||||
None => panic!("Decoding {} failed.", encoded),
|
||||
Some(result) => {
|
||||
let result = result.into_iter().collect::<String>();
|
||||
assert!(
|
||||
result == decoded,
|
||||
"Incorrect decoding of \"{}\":\n \"{}\"\n!= \"{}\"\n",
|
||||
encoded,
|
||||
result,
|
||||
decoded
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
match encode_str(decoded) {
|
||||
None => panic!("Encoding {} failed.", decoded),
|
||||
Some(result) => assert!(
|
||||
result == encoded,
|
||||
"Incorrect encoding of \"{}\":\n \"{}\"\n!= \"{}\"\n",
|
||||
decoded,
|
||||
result,
|
||||
encoded
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_string<'a>(map: &'a Map<String, Value>, key: &str) -> &'a str {
|
||||
match map.get(&key.to_string()) {
|
||||
Some(&Value::String(ref s)) => s,
|
||||
None => "",
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
|
||||
match Value::from_str(include_str!("punycode_tests.json")) {
|
||||
Ok(Value::Array(tests)) => {
|
||||
for (i, test) in tests.into_iter().enumerate() {
|
||||
match test {
|
||||
Value::Object(o) => {
|
||||
let test_name = {
|
||||
let desc = get_string(&o, "description");
|
||||
if desc.is_empty() {
|
||||
format!("Punycode {}", i + 1)
|
||||
} else {
|
||||
format!("Punycode {}: {}", i + 1, desc)
|
||||
}
|
||||
};
|
||||
add_test(
|
||||
test_name,
|
||||
TestFn::dyn_test_fn(move || {
|
||||
one_test(get_string(&o, "decoded"), get_string(&o, "encoded"))
|
||||
}),
|
||||
)
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
other => panic!("{:?}", other),
|
||||
}
|
||||
}
|
||||
120
zeroidc/vendor/idna/tests/punycode_tests.json
vendored
Normal file
120
zeroidc/vendor/idna/tests/punycode_tests.json
vendored
Normal file
@@ -0,0 +1,120 @@
|
||||
[
|
||||
{
|
||||
"description": "These tests are copied from https://github.com/bestiejs/punycode.js/blob/master/tests/tests.js , used under the MIT license.",
|
||||
"decoded": "",
|
||||
"encoded": ""
|
||||
},
|
||||
{
|
||||
"description": "a single basic code point",
|
||||
"decoded": "Bach",
|
||||
"encoded": "Bach-"
|
||||
},
|
||||
{
|
||||
"description": "a single non-ASCII character",
|
||||
"decoded": "\u00FC",
|
||||
"encoded": "tda"
|
||||
},
|
||||
{
|
||||
"description": "multiple non-ASCII characters",
|
||||
"decoded": "\u00FC\u00EB\u00E4\u00F6\u2665",
|
||||
"encoded": "4can8av2009b"
|
||||
},
|
||||
{
|
||||
"description": "mix of ASCII and non-ASCII characters",
|
||||
"decoded": "b\u00FCcher",
|
||||
"encoded": "bcher-kva"
|
||||
},
|
||||
{
|
||||
"description": "long string with both ASCII and non-ASCII characters",
|
||||
"decoded": "Willst du die Bl\u00FCthe des fr\u00FChen, die Fr\u00FCchte des sp\u00E4teren Jahres",
|
||||
"encoded": "Willst du die Blthe des frhen, die Frchte des spteren Jahres-x9e96lkal"
|
||||
},
|
||||
{
|
||||
"description": "Arabic (Egyptian)",
|
||||
"decoded": "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
|
||||
"encoded": "egbpdaj6bu4bxfgehfvwxn"
|
||||
},
|
||||
{
|
||||
"description": "Chinese (simplified)",
|
||||
"decoded": "\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2d\u6587",
|
||||
"encoded": "ihqwcrb4cv8a8dqg056pqjye"
|
||||
},
|
||||
{
|
||||
"description": "Chinese (traditional)",
|
||||
"decoded": "\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
|
||||
"encoded": "ihqwctvzc91f659drss3x8bo0yb"
|
||||
},
|
||||
{
|
||||
"description": "Czech",
|
||||
"decoded": "Pro\u010Dprost\u011Bnemluv\u00ED\u010Desky",
|
||||
"encoded": "Proprostnemluvesky-uyb24dma41a"
|
||||
},
|
||||
{
|
||||
"description": "Hebrew",
|
||||
"decoded": "\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2\u05D1\u05E8\u05D9\u05EA",
|
||||
"encoded": "4dbcagdahymbxekheh6e0a7fei0b"
|
||||
},
|
||||
{
|
||||
"description": "Hindi (Devanagari)",
|
||||
"decoded": "\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947\u0939\u0948\u0902",
|
||||
"encoded": "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
|
||||
},
|
||||
{
|
||||
"description": "Japanese (kanji and hiragana)",
|
||||
"decoded": "\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
|
||||
"encoded": "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
|
||||
},
|
||||
{
|
||||
"description": "Korean (Hangul syllables)",
|
||||
"decoded": "\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
|
||||
"encoded": "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
|
||||
},
|
||||
{
|
||||
"description": "Russian (Cyrillic)",
|
||||
"decoded": "\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A\u0438",
|
||||
"encoded": "b1abfaaepdrnnbgefbadotcwatmq2g4l"
|
||||
},
|
||||
{
|
||||
"description": "Spanish",
|
||||
"decoded": "Porqu\u00E9nopuedensimplementehablarenEspa\u00F1ol",
|
||||
"encoded": "PorqunopuedensimplementehablarenEspaol-fmd56a"
|
||||
},
|
||||
{
|
||||
"description": "Vietnamese",
|
||||
"decoded": "T\u1EA1isaoh\u1ECDkh\u00F4ngth\u1EC3ch\u1EC9n\u00F3iti\u1EBFngVi\u1EC7t",
|
||||
"encoded": "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
|
||||
},
|
||||
{
|
||||
"decoded": "3\u5E74B\u7D44\u91D1\u516B\u5148\u751F",
|
||||
"encoded": "3B-ww4c5e180e575a65lsy2b"
|
||||
},
|
||||
{
|
||||
"decoded": "\u5B89\u5BA4\u5948\u7F8E\u6075-with-SUPER-MONKEYS",
|
||||
"encoded": "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"
|
||||
},
|
||||
{
|
||||
"decoded": "Hello-Another-Way-\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
|
||||
"encoded": "Hello-Another-Way--fc4qua05auwb3674vfr0b"
|
||||
},
|
||||
{
|
||||
"decoded": "\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B2",
|
||||
"encoded": "2-u9tlzr9756bt3uc0v"
|
||||
},
|
||||
{
|
||||
"decoded": "Maji\u3067Koi\u3059\u308B5\u79D2\u524D",
|
||||
"encoded": "MajiKoi5-783gue6qz075azm5e"
|
||||
},
|
||||
{
|
||||
"decoded": "\u30D1\u30D5\u30A3\u30FCde\u30EB\u30F3\u30D0",
|
||||
"encoded": "de-jg4avhby1noc0d"
|
||||
},
|
||||
{
|
||||
"decoded": "\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
|
||||
"encoded": "d9juau41awczczp"
|
||||
},
|
||||
{
|
||||
"description": "ASCII string that breaks the existing rules for host-name labels (It's not a realistic example for IDNA, because IDNA never encodes pure ASCII labels.)",
|
||||
"decoded": "-> $1.00 <-",
|
||||
"encoded": "-> $1.00 <--"
|
||||
}
|
||||
]
|
||||
19
zeroidc/vendor/idna/tests/tests.rs
vendored
Normal file
19
zeroidc/vendor/idna/tests/tests.rs
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
use rustc_test as test;
|
||||
|
||||
mod punycode;
|
||||
mod uts46;
|
||||
|
||||
fn main() {
|
||||
let mut tests = Vec::new();
|
||||
{
|
||||
let mut add_test = |name, run| {
|
||||
tests.push(test::TestDescAndFn {
|
||||
desc: test::TestDesc::new(test::DynTestName(name)),
|
||||
testfn: run,
|
||||
})
|
||||
};
|
||||
punycode::collect_tests(&mut add_test);
|
||||
uts46::collect_tests(&mut add_test);
|
||||
}
|
||||
test::test_main(&std::env::args().collect::<Vec<_>>(), tests)
|
||||
}
|
||||
139
zeroidc/vendor/idna/tests/unit.rs
vendored
Normal file
139
zeroidc/vendor/idna/tests/unit.rs
vendored
Normal file
@@ -0,0 +1,139 @@
|
||||
use assert_matches::assert_matches;
|
||||
use unicode_normalization::char::is_combining_mark;
|
||||
|
||||
/// https://github.com/servo/rust-url/issues/373
|
||||
#[test]
|
||||
fn test_punycode_prefix_with_length_check() {
|
||||
let config = idna::Config::default()
|
||||
.verify_dns_length(true)
|
||||
.check_hyphens(true)
|
||||
.use_std3_ascii_rules(true);
|
||||
|
||||
assert!(config.to_ascii("xn--").is_err());
|
||||
assert!(config.to_ascii("xn---").is_err());
|
||||
assert!(config.to_ascii("xn-----").is_err());
|
||||
assert!(config.to_ascii("xn--.").is_err());
|
||||
assert!(config.to_ascii("xn--...").is_err());
|
||||
assert!(config.to_ascii(".xn--").is_err());
|
||||
assert!(config.to_ascii("...xn--").is_err());
|
||||
assert!(config.to_ascii("xn--.xn--").is_err());
|
||||
assert!(config.to_ascii("xn--.example.org").is_err());
|
||||
}
|
||||
|
||||
/// https://github.com/servo/rust-url/issues/373
|
||||
#[test]
|
||||
fn test_punycode_prefix_without_length_check() {
|
||||
let config = idna::Config::default()
|
||||
.verify_dns_length(false)
|
||||
.check_hyphens(true)
|
||||
.use_std3_ascii_rules(true);
|
||||
|
||||
assert_eq!(config.to_ascii("xn--").unwrap(), "");
|
||||
assert!(config.to_ascii("xn---").is_err());
|
||||
assert!(config.to_ascii("xn-----").is_err());
|
||||
assert_eq!(config.to_ascii("xn--.").unwrap(), ".");
|
||||
assert_eq!(config.to_ascii("xn--...").unwrap(), "...");
|
||||
assert_eq!(config.to_ascii(".xn--").unwrap(), ".");
|
||||
assert_eq!(config.to_ascii("...xn--").unwrap(), "...");
|
||||
assert_eq!(config.to_ascii("xn--.xn--").unwrap(), ".");
|
||||
assert_eq!(config.to_ascii("xn--.example.org").unwrap(), ".example.org");
|
||||
}
|
||||
|
||||
// http://www.unicode.org/reports/tr46/#Table_Example_Processing
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
let mut codec = idna::Idna::default();
|
||||
let mut out = String::new();
|
||||
|
||||
assert_matches!(codec.to_unicode("Bloß.de", &mut out), Ok(()));
|
||||
assert_eq!(out, "bloß.de");
|
||||
|
||||
out.clear();
|
||||
assert_matches!(codec.to_unicode("xn--blo-7ka.de", &mut out), Ok(()));
|
||||
assert_eq!(out, "bloß.de");
|
||||
|
||||
out.clear();
|
||||
assert_matches!(codec.to_unicode("u\u{308}.com", &mut out), Ok(()));
|
||||
assert_eq!(out, "ü.com");
|
||||
|
||||
out.clear();
|
||||
assert_matches!(codec.to_unicode("xn--tda.com", &mut out), Ok(()));
|
||||
assert_eq!(out, "ü.com");
|
||||
|
||||
out.clear();
|
||||
assert_matches!(codec.to_unicode("xn--u-ccb.com", &mut out), Err(_));
|
||||
|
||||
out.clear();
|
||||
assert_matches!(codec.to_unicode("a⒈com", &mut out), Err(_));
|
||||
|
||||
out.clear();
|
||||
assert_matches!(codec.to_unicode("xn--a-ecp.ru", &mut out), Err(_));
|
||||
|
||||
out.clear();
|
||||
assert_matches!(codec.to_unicode("xn--0.pt", &mut out), Err(_));
|
||||
|
||||
out.clear();
|
||||
assert_matches!(codec.to_unicode("日本語。JP", &mut out), Ok(()));
|
||||
assert_eq!(out, "日本語.jp");
|
||||
|
||||
out.clear();
|
||||
assert_matches!(codec.to_unicode("☕.us", &mut out), Ok(()));
|
||||
assert_eq!(out, "☕.us");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_v5() {
|
||||
let config = idna::Config::default()
|
||||
.verify_dns_length(true)
|
||||
.use_std3_ascii_rules(true);
|
||||
|
||||
// IdnaTest:784 蔏。𑰺
|
||||
assert!(is_combining_mark('\u{11C3A}'));
|
||||
assert!(config.to_ascii("\u{11C3A}").is_err());
|
||||
assert!(config.to_ascii("\u{850f}.\u{11C3A}").is_err());
|
||||
assert!(config.to_ascii("\u{850f}\u{ff61}\u{11C3A}").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_v8_bidi_rules() {
|
||||
let config = idna::Config::default()
|
||||
.verify_dns_length(true)
|
||||
.use_std3_ascii_rules(true);
|
||||
|
||||
assert_eq!(config.to_ascii("abc").unwrap(), "abc");
|
||||
assert_eq!(config.to_ascii("123").unwrap(), "123");
|
||||
assert_eq!(config.to_ascii("אבּג").unwrap(), "xn--kdb3bdf");
|
||||
assert_eq!(config.to_ascii("ابج").unwrap(), "xn--mgbcm");
|
||||
assert_eq!(config.to_ascii("abc.ابج").unwrap(), "abc.xn--mgbcm");
|
||||
assert_eq!(config.to_ascii("אבּג.ابج").unwrap(), "xn--kdb3bdf.xn--mgbcm");
|
||||
|
||||
// Bidi domain names cannot start with digits
|
||||
assert!(config.to_ascii("0a.\u{05D0}").is_err());
|
||||
assert!(config.to_ascii("0à.\u{05D0}").is_err());
|
||||
|
||||
// Bidi chars may be punycode-encoded
|
||||
assert!(config.to_ascii("xn--0ca24w").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emoji_domains() {
|
||||
// HOT BEVERAGE is allowed here...
|
||||
let config = idna::Config::default()
|
||||
.verify_dns_length(true)
|
||||
.use_std3_ascii_rules(true);
|
||||
assert_eq!(config.to_ascii("☕.com").unwrap(), "xn--53h.com");
|
||||
|
||||
// ... but not here
|
||||
let config = idna::Config::default()
|
||||
.verify_dns_length(true)
|
||||
.use_std3_ascii_rules(true)
|
||||
.use_idna_2008_rules(true);
|
||||
let error = format!("{:?}", config.to_ascii("☕.com").unwrap_err());
|
||||
assert!(error.contains("disallowed_in_idna_2008"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unicode_before_delimiter() {
|
||||
let config = idna::Config::default();
|
||||
assert!(config.to_ascii("xn--f\u{34a}-PTP").is_err());
|
||||
}
|
||||
193
zeroidc/vendor/idna/tests/uts46.rs
vendored
Normal file
193
zeroidc/vendor/idna/tests/uts46.rs
vendored
Normal file
@@ -0,0 +1,193 @@
|
||||
// Copyright 2013-2014 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use crate::test::TestFn;
|
||||
use std::char;
|
||||
|
||||
use idna::Errors;
|
||||
|
||||
pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
|
||||
// https://www.unicode.org/Public/idna/13.0.0/IdnaTestV2.txt
|
||||
for (i, line) in include_str!("IdnaTestV2.txt").lines().enumerate() {
|
||||
if line.is_empty() || line.starts_with('#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Remove comments
|
||||
let line = match line.find('#') {
|
||||
Some(index) => &line[0..index],
|
||||
None => line,
|
||||
};
|
||||
|
||||
let mut pieces = line.split(';').map(|x| x.trim()).collect::<Vec<&str>>();
|
||||
let source = unescape(&pieces.remove(0));
|
||||
|
||||
// ToUnicode
|
||||
let mut to_unicode = unescape(&pieces.remove(0));
|
||||
if to_unicode.is_empty() {
|
||||
to_unicode = source.clone();
|
||||
}
|
||||
let to_unicode_status = status(pieces.remove(0));
|
||||
|
||||
// ToAsciiN
|
||||
let to_ascii_n = pieces.remove(0);
|
||||
let to_ascii_n = if to_ascii_n.is_empty() {
|
||||
to_unicode.clone()
|
||||
} else {
|
||||
to_ascii_n.to_owned()
|
||||
};
|
||||
let to_ascii_n_status = pieces.remove(0);
|
||||
let to_ascii_n_status = if to_ascii_n_status.is_empty() {
|
||||
to_unicode_status.clone()
|
||||
} else {
|
||||
status(to_ascii_n_status)
|
||||
};
|
||||
|
||||
// ToAsciiT
|
||||
let to_ascii_t = pieces.remove(0);
|
||||
let to_ascii_t = if to_ascii_t.is_empty() {
|
||||
to_ascii_n.clone()
|
||||
} else {
|
||||
to_ascii_t.to_owned()
|
||||
};
|
||||
let to_ascii_t_status = pieces.remove(0);
|
||||
let to_ascii_t_status = if to_ascii_t_status.is_empty() {
|
||||
to_ascii_n_status.clone()
|
||||
} else {
|
||||
status(to_ascii_t_status)
|
||||
};
|
||||
|
||||
let test_name = format!("UTS #46 line {}", i + 1);
|
||||
add_test(
|
||||
test_name,
|
||||
TestFn::dyn_test_fn(move || {
|
||||
let config = idna::Config::default()
|
||||
.use_std3_ascii_rules(true)
|
||||
.verify_dns_length(true)
|
||||
.check_hyphens(true);
|
||||
|
||||
// http://unicode.org/reports/tr46/#Deviations
|
||||
// applications that perform IDNA2008 lookup are not required to check
|
||||
// for these contexts, so we skip all tests annotated with C*
|
||||
|
||||
// Everybody ignores V2
|
||||
// https://github.com/servo/rust-url/pull/240
|
||||
// https://github.com/whatwg/url/issues/53#issuecomment-181528158
|
||||
// http://www.unicode.org/review/pri317/
|
||||
|
||||
// "The special error codes X3 and X4_2 are now returned where a toASCII error code
|
||||
// was formerly being generated in toUnicode due to an empty label."
|
||||
// This is not implemented yet, so we skip toUnicode X4_2 tests for now, too.
|
||||
|
||||
let (to_unicode_value, to_unicode_result) =
|
||||
config.transitional_processing(false).to_unicode(&source);
|
||||
let to_unicode_result = to_unicode_result.map(|()| to_unicode_value);
|
||||
check(
|
||||
&source,
|
||||
(&to_unicode, &to_unicode_status),
|
||||
to_unicode_result,
|
||||
|e| e.starts_with('C') || e == "V2" || e == "X4_2",
|
||||
);
|
||||
|
||||
let to_ascii_n_result = config.transitional_processing(false).to_ascii(&source);
|
||||
check(
|
||||
&source,
|
||||
(&to_ascii_n, &to_ascii_n_status),
|
||||
to_ascii_n_result,
|
||||
|e| e.starts_with('C') || e == "V2",
|
||||
);
|
||||
|
||||
let to_ascii_t_result = config.transitional_processing(true).to_ascii(&source);
|
||||
check(
|
||||
&source,
|
||||
(&to_ascii_t, &to_ascii_t_status),
|
||||
to_ascii_t_result,
|
||||
|e| e.starts_with('C') || e == "V2",
|
||||
);
|
||||
}),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::redundant_clone)]
|
||||
fn check<F>(source: &str, expected: (&str, &[&str]), actual: Result<String, Errors>, ignore: F)
|
||||
where
|
||||
F: Fn(&str) -> bool,
|
||||
{
|
||||
if !expected.1.is_empty() {
|
||||
if !expected.1.iter().copied().any(ignore) {
|
||||
let res = actual.ok();
|
||||
assert_eq!(
|
||||
res.clone(),
|
||||
None,
|
||||
"Expected error {:?}. result: {} | source: {}",
|
||||
expected.1,
|
||||
res.unwrap(),
|
||||
source,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
assert!(
|
||||
actual.is_ok(),
|
||||
"Couldn't parse {} | error: {:?}",
|
||||
source,
|
||||
actual.err().unwrap(),
|
||||
);
|
||||
assert_eq!(actual.unwrap(), expected.0, "source: {}", source);
|
||||
}
|
||||
}
|
||||
|
||||
fn unescape(input: &str) -> String {
|
||||
let mut output = String::new();
|
||||
let mut chars = input.chars();
|
||||
loop {
|
||||
match chars.next() {
|
||||
None => return output,
|
||||
Some(c) => {
|
||||
if c == '\\' {
|
||||
match chars.next().unwrap() {
|
||||
'\\' => output.push('\\'),
|
||||
'u' => {
|
||||
let c1 = chars.next().unwrap().to_digit(16).unwrap();
|
||||
let c2 = chars.next().unwrap().to_digit(16).unwrap();
|
||||
let c3 = chars.next().unwrap().to_digit(16).unwrap();
|
||||
let c4 = chars.next().unwrap().to_digit(16).unwrap();
|
||||
match char::from_u32(((c1 * 16 + c2) * 16 + c3) * 16 + c4) {
|
||||
Some(c) => output.push(c),
|
||||
None => {
|
||||
output
|
||||
.push_str(&format!("\\u{:X}{:X}{:X}{:X}", c1, c2, c3, c4));
|
||||
}
|
||||
};
|
||||
}
|
||||
_ => panic!("Invalid test data input"),
|
||||
}
|
||||
} else {
|
||||
output.push(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn status(status: &str) -> Vec<&str> {
|
||||
if status.is_empty() || status == "[]" {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let mut result = status.split(", ").collect::<Vec<_>>();
|
||||
assert!(result[0].starts_with('['));
|
||||
result[0] = &result[0][1..];
|
||||
|
||||
let idx = result.len() - 1;
|
||||
let last = &mut result[idx];
|
||||
assert!(last.ends_with(']'));
|
||||
*last = &last[..last.len() - 1];
|
||||
|
||||
result
|
||||
}
|
||||
Reference in New Issue
Block a user